Macros for SAS Application Developers
https://github.com/sasjs/core
Loading...
Searching...
No Matches
mp_csv2ds.sas
Go to the documentation of this file.
1/**
2 @file mp_csv2ds.sas
3 @brief Efficient import of arbitrary CSV using a dataset as template
4 @details Used to import relevant columns from a large CSV using
5 a dataset to provide the types and lengths. Assumes that a header
6 row is provided, and datarows start on line 2. Extra columns in
7 both the CSV and base dataset are ignored.
8
9 Usage:
10
11 filename mycsv temp;
12 data _null_;
13 file mycsv;
14 put 'name,age,nickname';
15 put 'John,48,Jonny';
16 put 'Jennifer,23,Jen';
17 run;
18
19 %mp_csv2ds(inref=mycsv,outds=myds,baseds=sashelp.class)
20
21
22 @param [in] inref= (0) Fileref to the CSV
23 @param [out] outds= (0) Output ds (lib.ds format)
24 @param [in] view= (NO) Set to YES or NO to determine whether the output
25 should be a view or not. Default is NO (not a view).
26 @param [in] baseds= (0)
27 Template dataset on which to create the input statement.
28 Is used to determine types, lengths, and any informats.
29
30 @version 9.2
31 @author Allan Bowe
32
33 <h4> SAS Macros </h4>
34 @li mp_abort.sas
35 @li mf_existds.sas
36
37**/
38
39%macro mp_csv2ds(inref=0,outds=0,baseds=0,view=NO);
40
41%mp_abort(iftrue=( &inref=0 )
42 ,mac=&sysmacroname
43 ,msg=%str(the INREF variable must be provided)
44)
45%mp_abort(iftrue=( %superq(outds)=0 )
46 ,mac=&sysmacroname
47 ,msg=%str(the OUTDS variable must be provided)
48)
49%mp_abort(iftrue=( &baseds=0 )
50 ,mac=&sysmacroname
51 ,msg=%str(the BASEDS variable must be provided)
52)
53%mp_abort(iftrue=( %mf_existds(&baseds)=0 )
54 ,mac=&sysmacroname
55 ,msg=%str(the BASEDS dataset (&baseds) needs to be assigned, and to exist)
56)
57
58/* count rows */
59%local hasheader; %let hasheader=0;
60data _null_;
61 if _N_ > 1 then do;
62 call symputx('hasheader',1,'l');
63 stop;
64 end;
65 infile &inref;
66 input;
67run;
68%mp_abort(iftrue=( &hasheader=0 )
69 ,mac=&sysmacroname
70 ,msg=%str(No header row in &inref)
71)
72
73/* get the variables in the CSV */
74data _data_;
75 infile &inref;
76 input;
77 length name $32;
78 do i=1 to countc(_infile_,',')+1;
79 name=upcase(scan(_infile_,i,','));
80 output;
81 end;
82 stop;
83run;
84%local csv_vars;%let csv_vars=&syslast;
85
86/* get the variables in the dataset */
87proc contents noprint data=&baseds
88 out=_data_ (keep=name type length format: informat);
89run;
90%local base_vars; %let base_vars=&syslast;
91
92proc sql undo_policy=none;
93create table &csv_vars as
94 select a.*
95 ,b.type
96 ,b.length
97 ,b.format
98 ,b.formatd
99 ,b.formatl
100 ,b.informat
101 from &csv_vars a
102 left join &base_vars b
103 on a.name=upcase(b.name)
104 order by i;
105
106/* prepare the input statement */
107%local instat dropvars;
108data _null_;
109 set &syslast end=last;
110 length in dropvars $32767;
111 retain in dropvars;
112 if missing(type) then do;
113 informat='$1.';
114 dropvars=catx(' ',dropvars,name);
115 end;
116 else if missing(informat) then do;
117 if type=1 then informat='best.';
118 else informat=cats('$',length,'.');
119 end;
120 else informat=cats(informat,'.');
121 in=catx(' ',in,name,':',informat);
122 if last then do;
123 call symputx('instat',in,'l');
124 call symputx('dropvars',dropvars,'l');
125 end;
126run;
127
128/* import the CSV */
129data &outds
130 %if %upcase(&view)=YES %then %do;
131 /view=&outds
132 %end;
133 ;
134 infile &inref dsd firstobs=2;
135 input &instat;
136 %if %length(&dropvars)>0 %then %do;
137 drop &dropvars;
138 %end;
139run;
140
141%mend mp_csv2ds;