1. 首页
  2. 自学中心
  3. 软件
  4. SAS

运用import过程进行SAS数据导入完全实用教程

运用import过程进行SAS数据导入完全实用教程

转载请注明出处:http://blog.sina.com.cn/s/blog_5d3b177c0100e7fa.html

1 单个规范格式文件导入。

对单个文件进行导入是我们遇到最多的情况,主要有以下几种:

1.1 对指定分隔符(’|’,’ ’,’!’,’ab’等)数据的导入,这里以’!’为例delimiter=’!’进行说明:

data _null_;

  file ‘c:\temp\pipefile.txt’;

  put”X1!X2!X3!X4″;

  put “11!22!.! “;

  put “111!.!333!apple”;

run;

导入程序:

proc import

  datafile=’c:\temp\pipefile.txt’

  out=work.test

  dbms=dlm

  replace;

delimiter=’!’;

GUESSINGROWS=2000;

DATAROW=2;

  getnames=yes;

run;

注意GUESSINGROWS的值阈为1 到 3276

1.2 对CSV格式的数据进行导入:

data _null_;

  file ‘c:\temp\csvfile.csv’;

  put “Fruit1,Fruit2,Fruit3,Fruit4”;

  put “apple,banana,coconut,date”;

  put “apricot,berry,crabapple,dewberry”;

run;

导入程序:

proc import

  datafile=’c:\temp\csvfile.csv’

  out=work.fruit

  dbms=csv

  replace;

run;

1.3 对tab分隔数据的导入:

data _null_;

  file ‘c:\temp\tabfile.txt’;

  put “cereal” “09”x “eggs” “09”x “bacon”;

  put “muffin” “09”x “berries” “09”x “toast”;

run;

proc import

  datafile=’c:\temp\tabfile.txt’

  out=work.breakfast

  dbms=tab

  replace;

  getnames=no;

run;

1.4 对dbf数据库数据进行导入:

proc import datafile=”/myfiles/mydata.dbf”

              out=sasuser.mydata

              dbms=dbf

              replace;

run;

 

1.5对excel数据进行导入:

PROC IMPORT OUT= hospital1

            DATAFILE= ” C:\My Documents\Excel Files\Hospital1.xls “

            DBMS=EXCEL REPLACE;

     SHEET=”Sheet1$”;

     GETNAMES=YES;

     MIXED=NO;

     SCANTEXT=YES;

     USEDATE=YES;

     SCANTIME=YES;

RUN;

1.6对access数据进行导入:

PROC IMPORT DBMS=ACCESS TABLE=”customers” OUT=sasuser.cust;

     DATABASE=”c:\demo\customers.mdb”;

     UID=”bob”;

     PWD=”cat”;                                WGDB=”c:\winnt\system32\system.mdb”; 

RUN;

proc print data=sasuser.cust;

run;

 

1.7 import过程步中,dbms选项汇总:

Identifier Input Data Source Extension Host Availability
ACCESS Microsoft Access 2000 or 2002 table .mdb Microsoft Windows *
ACCESS97 Microsoft Access 97 table .mdb Microsoft Windows *
ACCESS2000 Microsoft Access 2000 table .mdb Microsoft Windows *
ACCESS2002 Microsoft Access 2002 table .mdb Microsoft Windows *
ACCESSCS Microsoft Access table .mdb UNIX
CSV delimited file (comma-separated values) .csv OpenVMS Alpha, UNIX, Microsoft Windows
DBF dBASE 5.0, IV, III+, and III files .dbf UNIX, Microsoft Windows
DLM delimited file (default delimiter is a blank) .* OpenVMS Alpha, UNIX, Microsoft Windows
EXCEL Excel 2000 or 2002 spreadsheet .xls Microsoft Windows *
EXCEL4 Excel 4.0 spreadsheet .xls Microsoft Windows
EXCEL5 Excel 5.0 or 7.0 (95) spreadsheet .xls Microsoft Windows
EXCEL97 Excel 97 or 7.0 (95) spreadsheet .xls Microsoft Windows *
EXCEL2000 Excel 2000 spreadsheet .xls Microsoft Windows *
EXCELCS Excel spreadsheet .xls UNIX
JMP JMP table .jmp UNIX, Microsoft Windows
PCFS Files on PC server .* UNIX
TAB delimited file (tab-delimited values) .txt OpenVMS Alpha, UNIX, Microsoft Windows
WK1 Lotus 1-2-3 Release 2 spreadsheet .wk1 Microsoft Windows
WK3 Lotus 1-2-3 Release 3 spreadsheet .wk3 Microsoft Windows
WK4 Lotus 1-2-3 Release 4 or 5 spreadsheet .wk4 Microsoft Windows

2 导入一个文件夹下的所有文件的数据。

2.1下面的代码导入一个文件夹下的所有文件的数据,要使用本代码需注意几点:首先,这个文件夹下的数据文件必须是同一类型分隔的数据,比如例子中都是tab分隔的txt文件,当然也可以对本代码进行改进,例如中间的proc import的dbms改为excel,就可以导入excel文件了。其次,本代码直接将文件名作为SAS数据集的名字,因此文件名必须是英文,且满足SAS命名规则。

%macro directory(dir=);

%let rs=%sysfunc(filename(filref,&dir));

%let did=%sysfunc(dopen(&filref));

%let nobs=%sysfunc(dnum(&did));

%do i=1 %to &nobs.;

    %let name=%qscan(%qsysfunc(dread(&did,&i)),1,.);

    %let ext=%qscan(%qsysfunc(dread(&did,&i)),-1,.);

    proc import out=&name. datafile=”&dir.\&name..&ext” dbms=tab replace;

        getnames=no;

        datarow=1;

    run;

%end;

%let rc=%sysfunc(dclose(&did));

%mend;

%directory(dir=C:\PRIVATE);

如果要将数据集进行汇总到一张表,或者则可以直接将proc import out=&name中的&name改为a&i,然后对所有的a:数据集进行set操作。除此之外,我们还可以对&ext进行设置来达到读取指定文件格式的数据。

2.2 这里运用pipe读取到文件名称,再读取数据。首先建立三个数据集:

data _null_;

  file ‘c:\junk\extfile1.txt’;

  put “05JAN2001 6 W12301 1.59 9.54”;

  put “12JAN2001 3 P01219 2.99 8.97”;

run;

data _null_;

  file ‘c:\junk\extfile2.txt’;

  put “02FEB2001 1 P01219 2.99 2.99”;

  put “05FEB2001 3 A00901 1.99 5.97”;

  put “07FEB2001 2 C21135 3.00 6.00”;

run;

data _null_;

  file ‘c:\junk\extfile3.txt’;

  put “06MAR2001 4 A00101 3.59 14.36”;

  put “12MAR2001 2 P01219 2.99 5.98”;

run    

filename blah pipe ‘dir C:\Junk /b’;

 

data _null_;

  infile blah truncover end=last;

  length fname $20;

  input fname;

  i+1;

  call symput(‘fname’||trim(left(put(i,8.))),scan(trim(fname),1,’.’));

  call symput(‘pext’||trim(left(put(i,8.))),trim(fname));

  if last then call symput(‘total’,trim(left(put(i,8.))));

run;

%macro test;

  %do i=1 %to &total;

     proc import datafile=”c:\Junk\&&pext&i”

                 out=work.&&fname&i

                 dbms=dlm replace;

     delimiter=’ ‘;

     getnames=no ;

     run;

     proc print data=work.&&fname&i;;

     title &&fname&i;

     run;

  %end;

%mend;

%test;

这里,如果要导入指定文件类型的数据,例如txt,则只需要将filename blah pipe ‘dir C:\Junk /b’;改为filename blah pipe ‘dir C:\Junk.*.txt /b’;即可。

 

除了用filename blah pipe ‘dir C:\Junk.*.txt /b’;得到指定类型的文件名,我们还可以%sysexec dir *.xls /b/o:n > flist.txt;来将xls文件输出到指定的文件中,供读取操作用。这个将在下面的内容作介绍。

 

 

3 导入excel表中的所有sheet的数据,并将其汇总到一个数据表中。

3.1 Excel表是sas导入导出最多的数据表之一,本例中,我们将导入一个excel中的不同的数据

%let dir=C:\ExcelFiles;      

%macro ReadXls (inf);

libname excellib excel “&dir.\&inf”;

proc sql noprint;

    create table sheetname as

    select tranwrd(memname, “””, “‘”) as sheetname

    from sashelp.vstabvw

    where libname=”EXCELLIB”;

    select count(DISTINCT sheetname) into :cnt_sht

    from sheetname;

    select DISTINCT sheetname into :sheet1 – :sheet%left(&cnt_sht)

    from sheetname;

quit;

libname excellib clear;

%do i=1 %to &cnt_sht;

 proc import datafile=”&dir.\&inf”

       out=sheet&i replace;

       sheet=”&&sheet&i”;

       getnames=yes;

       mixed=yes;      

 run;

proc append base=master data=sheet&i force;

   run;

%end;

%mend ReadXls;

%ReadXls(all1.xls);

这样,我们可以通过%ReadXls(all2.xls); %ReadXls(all3.xls);等来得到多个excel文件的所有数据集。

3.2 我们可以结合3.1和2.1或2.2的方法来读取多个文件中的多个表。这里再介绍一种新的读取多个文件的方法:

options noxwait;

%macro ReadXls (dir=);

%sysexec cd &dir; %sysexec dir *.xls /b/o:n > flist.txt;

 

data _indexfile;

    length filen $200;

    infile “&dir./flist.txt”;

    input filen $;

run;

 

proc sql noprint;

    select count(filen) into :cntfile from _indexfile;

    %if &cntfile>=1 %then %do;

    select filen into :filen1-:filen%left(&cntfile)

    from _indexfile;

    %end;

quit;

 

 

%do i=1 %to &cntfile;

libname excellib excel “&dir.\&&filen&i”; 

proc sql noprint;

    create table sheetname as

    select tranwrd(memname, “””, “‘”) as sheetname

    from sashelp.vstabvw

    where libname=”EXCELLIB”;

    select count(DISTINCT sheetname) into :cnt_sht

    from sheetname;

    select DISTINCT sheetname into :sheet1 – :sheet%left(&cnt_sht)

    from sheetname;

quit;

%do j=1 %to &cnt_sht;

     proc import datafile=”&dir.\&&filen&i”

          out=sheet&j replace;

          sheet=”&&sheet&j”;

          getnames=yes;

          mixed=yes;

     run;

data sheet&j;

   length _excelfilename $100 _sheetname $32;

   set sheet&j;

   _excelfilename=”&&filen&z”;

   _sheetname=”&&sheet&j”;

run;

proc append base=master data=sheet&j force;

run;

%end;

libname excellib clear; 

%end;

%mend ReadXls;

%readxls (dir=C:\ExcelFiles);

 

4 从多个文件夹下读取多个数据。

直接给源代码吧。

%macro etl(ds, ds2,path);

data &ds &ds2;

LENGTH DateTime 8

UserName $ 20

Submit $ 10

SentNumber $ 11

IP $ 15

MessageID $ 15

SendingMode $ 6

Contents $ 160 ;

 

%let filrf=mydir;

%let rc=%sysfunc(filename(filrf,”&path”));

%let did=%sysfunc(dopen(&filrf));

%let memcount=%sysfunc(dnum(&did));

%do i=1 %to &memcount;

AccountNum+1;

%let counter = AccountNum;

%let username&i=%sysfunc(dread(&did,&i));

 

%let filref=mydir2;

%let file=%sysfunc(filename(filref,”&path\&&username&i”));

%let op=%sysfunc(dopen(&filref));

%let flcount=%sysfunc(dnum(&op));

 

filename FT77F001 “D:\SMSGatewayData2\USERS\&&username&i\*.log”;

%do j=1 %to &flcount;

%let trans&j=%sysfunc(dread(&op,&j));

%put ‘&&username&i = ‘ &&username&i ‘&&trans&j= ‘ &&trans&j ‘&flcount = ‘ &flcount ‘&filref = ‘ &filref ‘&filrf = ‘ &filrf;

 

infile FT77F001 filename=filename eov=eov end = done length=L DSD;

INPUT DateTime : ANYDTDTM19.

UserName $

Submit $

SentNumber $

IP $

MessageID $

SendingMode $

Contents $;

output;

%end;

%end;

run;

%mend;

%etl(sms2, sms,D:\SMSGatewayData2\USERS)

这里就不做测试了,原理其实跟3.2差不多,就是将目录也参数化。

5 参考文献:

A Case Study of Importing Multiple Worksheet Files

http://www2.sas.com/proceedings/sugi31/034-31.pdf

Reading multiple files with PROC IMPORT 

http://support.sas.com/kb/24/707.html

How to Invoke PROC IMPORT to read a CSV, TAB or Delimited File  

http://ftp.sas.com/techsup/download/sample/datastep/import.html

Use PROC IMPORT to read a CSV, TAB or delimited file.sas

Reading multiple files with PROC IMPORT.sas

directory_import_dbms.sas

proc_import_dbf.html

http://ftp.sas.com/

原创文章,作者:xsmile,如若转载,请注明出处:http://www.17bigdata.com/%e8%bf%90%e7%94%a8import%e8%bf%87%e7%a8%8b%e8%bf%9b%e8%a1%8csas%e6%95%b0%e6%8d%ae%e5%af%bc%e5%85%a5%e5%ae%8c%e5%85%a8%e5%ae%9e%e7%94%a8%e6%95%99%e7%a8%8b/

联系我们

在线咨询:点击这里给我发消息

邮件:23683716@qq.com

跳至工具栏