Macros for SAS Application Developers
https://github.com/sasjs/core
Loading...
Searching...
No Matches
mp_ds2cards.sas
Go to the documentation of this file.
1/**
2 @file
3 @brief Create a CARDS file from a SAS dataset.
4 @details Uses dataset attributes to convert all data into datalines.
5 Running the generated file will rebuild the original dataset. Includes
6 support for large decimals, binary data, PROCESSED_DTTM columns, and
7 alternative encoding. If the input dataset is empty, the cards file will
8 still be created.
9
10 Additional support to generate a random sample and max rows.
11
12 Usage:
13
14 %mp_ds2cards(sashelp.class
15 , tgt_ds=work.class
16 , cards_file= "C:\temp\class.sas"
17 , showlog=NO
18 , maxobs=5
19 )
20
21 TODO:
22 - labelling the dataset
23 - explicity setting a unix LF
24 - constraints / indexes etc
25
26 @param [in] base_ds Should be two level - eg work.blah. This is the table
27 that is converted to a cards file.
28 @param [in] tgt_ds= Table that the generated cards file would create.
29 Optional - if omitted, will be same as BASE_DS.
30 @param [out] cards_file= ("%sysfunc(pathname(work))/cardgen.sas") Location in
31 which to write the (.sas) cards file
32 @param [in] maxobs= (max) To limit output to the first <code>maxobs</code>
33 observations, enter an integer here.
34 @param [in] random_sample= (NO) Set to YES to generate a random sample of
35 data. Can be quite slow.
36 @param [in] showlog= (YES) Whether to show generated cards file in the SAS
37 log. Valid values:
38 @li YES
39 @li NO
40 @param [in] outencoding= Provide encoding value for file statement (eg utf-8)
41 @param [in] append= (NO) If NO then will rebuild the cards file if it already
42 exists, otherwise will append to it. Used by the mp_lib2cards.sas macro.
43
44 <h4> Related Macros </h4>
45 @li mp_lib2cards.sas
46 @li mp_ds2inserts.sas
47 @li mp_mdtablewrite.sas
48
49 @version 9.2
50 @author Allan Bowe
51 @cond
52**/
53
54%macro mp_ds2cards(base_ds, tgt_ds=
55 ,cards_file="%sysfunc(pathname(work))/cardgen.sas"
56 ,maxobs=max
57 ,random_sample=NO
58 ,showlog=YES
59 ,outencoding=
60 ,append=NO
61)/*/STORE SOURCE*/;
62%local i setds nvars;
63
64%if not %sysfunc(exist(&base_ds)) %then %do;
65 %put %str(WARN)ING: &base_ds does not exist;
66 %return;
67%end;
68
69%if %index(&base_ds,.)=0 %then %let base_ds=WORK.&base_ds;
70%if (&tgt_ds = ) %then %let tgt_ds=&base_ds;
71%if %index(&tgt_ds,.)=0 %then %let tgt_ds=WORK.%scan(&base_ds,2,.);
72%if ("&outencoding" ne "") %then %let outencoding=encoding="&outencoding";
73%if ("&append" = "" or "&append" = "NO") %then %let append=;
74%else %let append=mod;
75
76/* get varcount */
77%let nvars=0;
78proc sql noprint;
79select count(*) into: nvars from dictionary.columns
80 where upcase(libname)="%scan(%upcase(&base_ds),1)"
81 and upcase(memname)="%scan(%upcase(&base_ds),2)";
82%if &nvars=0 %then %do;
83 %put %str(WARN)ING: Dataset &base_ds has no variables, will not be converted.;
84 %return;
85%end;
86
87/* get indexes */
88proc sort
89 data=sashelp.vindex(
90 where=(upcase(libname)="%scan(%upcase(&base_ds),1)"
91 and upcase(memname)="%scan(%upcase(&base_ds),2)")
92 )
93 out=_data_;
94 by indxname indxpos;
95run;
96
97%local indexes;
98data _null_;
99 set &syslast end=last;
100 if _n_=1 then call symputx('indexes','(index=(','l');
101 by indxname indxpos;
102 length vars $32767 nom uni $8;
103 retain vars;
104 if first.indxname then do;
105 idxcnt+1;
106 nom='';
107 uni='';
108 vars=name;
109 end;
110 else vars=catx(' ',vars,name);
111 if last.indxname then do;
112 if nomiss='yes' then nom='/nomiss';
113 if unique='yes' then uni='/unique';
114 call symputx('indexes'
115 ,catx(' ',symget('indexes'),indxname,'=(',vars,')',nom,uni)
116 ,'l');
117 end;
118 if last then call symputx('indexes',cats(symget('indexes'),'))'),'l');
119run;
120
121
122data;run;
123%let setds=&syslast;
124proc sql
125%if %datatyp(&maxobs)=NUMERIC %then %do;
126 outobs=&maxobs;
127%end;
128 ;
129 create table &setds as select * from &base_ds
130%if &random_sample=YES %then %do;
131 order by ranuni(42)
132%end;
133 ;
134reset outobs=max;
135create table datalines1 as
136 select name,type,length,varnum,format,label from dictionary.columns
137 where upcase(libname)="%upcase(%scan(&base_ds,1))"
138 and upcase(memname)="%upcase(%scan(&base_ds,2))";
139
140/**
141 Due to long decimals cannot use best. format
142 So - use bestd. format and then use character functions to strip trailing
143 zeros, if NOT an integer or missing!! Cannot use int() as it upsets
144 note2err when there are missings.
145 resolved code = ifc( mod(coalesce(VARIABLE,0),1)=0
146 ,put(VARIABLE,best32.)
147 ,substrn(put(VARIABLE,bestd32.),1
148 ,findc(put(VARIABLE,bestd32.),'0','TBK')));
149**/
150
151data datalines_2;
152 format dataline $32000.;
153 set datalines1 (where=(upcase(name) not in
154 ('PROCESSED_DTTM','VALID_FROM_DTTM','VALID_TO_DTTM')));
155 if type='num' then dataline=
156 cats('ifc(mod(coalesce(',name,',0),1)=0
157 ,put(',name,',best32.-l)
158 ,substrn(put(',name,',bestd32.-l),1
159 ,findc(put(',name,',bestd32.-l),"0","TBK")))');
160 /**
161 * binary data must be converted, to store in text format. It is identified
162 * by the presence of the $HEX keyword in the format.
163 */
164 else if upcase(format)=:'$HEX' then
165 dataline=cats('put(trim(',name,'),',format,')');
166 /**
167 * There is no easy way to store line breaks in a cards file.
168 * To discuss this, use: https://github.com/sasjs/core/issues/80
169 * Removing all nonprintables with kw (keep writeable)
170 */
171 else dataline=cats('compress(',name,', ,"kw")');
172run;
173
174proc sql noprint;
175select dataline into: datalines separated by ',' from datalines_2;
176
177%local
178 process_dttm_flg
179 valid_from_dttm_flg
180 valid_to_dttm_flg
181;
182%let process_dttm_flg = N;
183%let valid_from_dttm_flg = N;
184%let valid_to_dttm_flg = N;
185data _null_;
186 set datalines1 ;
187/* build attrib statement */
188 if type='char' then type2='$';
189 if strip(format) ne '' then format2=cats('format=',format);
190 if strip(label) ne '' then label2=cats('label=',quote(trim(label)));
191 str1=catx(' ',(put(name,$33.)||'length=')
192 ,put(cats(type2,length),$7.)||format2,label2);
193
194
195/* Build input statement */
196 if upcase(format)=:'$HEX' then type3=':'!!format;
197 else if type='char' then type3=':$char.';
198 str2=put(name,$33.)||type3;
199
200
201 if(upcase(name) = "PROCESSED_DTTM") then
202 call symputx("process_dttm_flg", "Y", "L");
203 if(upcase(name) = "VALID_FROM_DTTM") then
204 call symputx("valid_from_dttm_flg", "Y", "L");
205 if(upcase(name) = "VALID_TO_DTTM") then
206 call symputx("valid_to_dttm_flg", "Y", "L");
207
208
209 call symputx(cats("attrib_stmt_", put(_N_, 8.)), str1, "L");
210 call symputx(cats("input_stmt_", put(_N_, 8.))
211 , ifc(upcase(name) not in
212 ('PROCESSED_DTTM','VALID_FROM_DTTM','VALID_TO_DTTM'), str2, ""), "L");
213run;
214
215data _null_;
216 file &cards_file. &outencoding lrecl=32767 termstr=nl &append;
217 length __attrib $32767;
218 if _n_=1 then do;
219 put '/**';
220 put ' @file';
221 put " @brief Datalines for %upcase(%scan(&base_ds,2)) dataset";
222 put " @details Generated by %nrstr(%%)mp_ds2cards()";
223 put " Source: https://github.com/sasjs/core";
224 put ' @cond ';
225 put '**/';
226 put "data &tgt_ds &indexes;";
227 put "attrib ";
228 %do i = 1 %to &nvars;
229 __attrib=symget("attrib_stmt_&i");
230 put __attrib;
231 %end;
232 put ";";
233
234 %if &process_dttm_flg. eq Y %then %do;
235 put 'retain PROCESSED_DTTM %sysfunc(datetime());';
236 %end;
237 %if &valid_from_dttm_flg. eq Y %then %do;
238 put 'retain VALID_FROM_DTTM &low_date;';
239 %end;
240 %if &valid_to_dttm_flg. eq Y %then %do;
241 put 'retain VALID_TO_DTTM &high_date;';
242 %end;
243 if __nobs=0 then do;
244 put 'call missing(of _all_);/* avoid uninitialised notes */';
245 put 'stop;';
246 put 'run;';
247 end;
248 else do;
249 put "infile cards dsd;";
250 put "input ";
251 %do i = 1 %to &nvars.;
252 %if(%length(&&input_stmt_&i..)) %then
253 put " &&input_stmt_&i..";
254 ;
255 %end;
256 put ";";
257 put 'missing a b c d e f g h i j k l m n o p q r s t u v w x y z _;';
258 put "datalines4;";
259 end;
260 end;
261 set &setds end=__lastobs nobs=__nobs;
262/* remove all formats for write purposes - some have long underlying decimals */
263 format _numeric_ best30.29;
264 length __dataline $32767;
265 __dataline=catq('cqtmb',&datalines);
266 put __dataline;
267 if __lastobs then do;
268 put ';;;;';
269 put 'run;';
270 put '/** @endcond **/';
271 stop;
272 end;
273run;
274proc sql;
275 drop table &setds;
276quit;
277
278%if &showlog=YES %then %do;
279 data _null_;
280 infile &cards_file lrecl=32767;
281 input;
282 put _infile_;
283 run;
284%end;
285
286%put NOTE: CARDS FILE SAVED IN:;
287%put NOTE-;%put NOTE-;
288%put NOTE- %sysfunc(dequote(&cards_file.));
289%put NOTE-;%put NOTE-;
290%mend mp_ds2cards;
291/** @endcond **/