Macros for SAS Application Developers
https://github.com/sasjs/core
Loading...
Searching...
No Matches
mp_ds2squeeze.sas
Go to the documentation of this file.
1/**
2 @file
3 @brief Create a smaller version of a dataset, without data loss
4 @details This macro will scan the input dataset and create a new one, that
5 has the minimum variable lengths needed to store the data without data loss.
6
7 Inspiration was taken from [How to Reduce the Disk Space Required by a
8 SASĀ® Data Set](https://www.lexjansen.com/nesug/nesug06/io/io18.pdf) by
9 Selvaratnam Sridharma. The end of the referenced paper presents a macro named
10 "squeeze", hence the nomenclature.
11
12 Usage:
13
14 data big;
15 length my big $32000;
16 do i=1 to 1e4;
17 my=repeat('oh my',100);
18 big='dawg';
19 special=._;
20 output;
21 end;
22 run;
23
24 %mp_ds2squeeze(work.big,outds=work.smaller)
25
26 The following will also be printed to the log (exact values may differ
27 depending on your OS and COMPRESS settings):
28
29 > MP_DS2SQUEEZE: work.big was 625MB
30
31 > MP_DS2SQUEEZE: work.smaller is 5MB
32
33 @param [in] libds The library.dataset to be squeezed
34 @param [out] outds= (work.mp_ds2squeeze) The squeezed dataset to create
35 @param [in] mdebug= (0) Set to 1 to enable DEBUG messages
36
37 <h4> SAS Macros </h4>
38 @li mf_getfilesize.sas
39 @li mf_getuniquefileref.sas
40 @li mf_getuniquename.sas
41 @li mp_getmaxvarlengths.sas
42
43 <h4> Related Programs </h4>
44 @li mp_ds2squeeze.test.sas
45
46 @version 9.3
47 @author Allan Bowe
48**/
49
50%macro mp_ds2squeeze(
51 libds,
52 outds=work.mp_ds2squeeze,
53 mdebug=0
54)/*/STORE SOURCE*/;
55%local dbg source;
56%if &mdebug=1 %then %do;
57 %put &sysmacroname entry vars:;
58 %put _local_;
59%end;
60%else %do;
61 %let dbg=*;
62 %let source=/source2;
63%end;
64
65%local optval ds fref startsize;
66%let ds=%mf_getuniquename();
67%let fref=%mf_getuniquefileref();
68%let startsize=%mf_getfilesize(libds=&libds,format=yes);
69
70%mp_getmaxvarlengths(&libds,outds=&ds)
71
72data _null_;
73 set &ds end=last;
74 file &fref;
75 /* grab the types */
76 retain dsid;
77 if _n_=1 then dsid=open("&libds",'is');
78 if dsid le 0 then do;
79 msg=sysmsg();
80 put msg=;
81 stop;
82 end;
83 type=vartype(dsid,varnum(dsid, name));
84 if last then rc=close(dsid);
85 /* write out the length statement */
86 if _n_=1 then put 'length ';
87 length len $6;
88 if type='C' then do;
89 if maxlen=0 then len='$1';
90 else len=cats('$',maxlen);
91 end;
92 else do;
93 if maxlen=0 then len='3';
94 else len=cats(maxlen);
95 end;
96 put ' ' name ' ' len;
97 if last then put ';';
98run;
99
100/* configure varlenchk - as we are explicitly shortening the variables */
101%let optval=%sysfunc(getoption(varlenchk));
102options varlenchk=NOWARN;
103
104data &outds;
105 %inc &fref &source;
106 set &libds;
107run;
108
109options varlenchk=&optval;
110
111%if &mdebug=0 %then %do;
112 proc sql;
113 drop table &ds;
114 filename &fref clear;
115%end;
116
117%put &sysmacroname: &libds was &startsize;
118%put &sysmacroname: &outds is %mf_getfilesize(libds=&outds,format=yes);
119
120%mend mp_ds2squeeze;