Wednesday, January 23, 2008

Subsetting IBM Dataset based on outside file info with out affecting the integrity of the data layout

/**** A WAY TO SUBSET YOUR IBM FILE SO THAT THE INTEGRITY OF THE IBM FILE STRUCTURE IS
RETAINED FOR FURTHER PROCESSING; THE TRICKY PART IS THE SUBSETTING IS BASED ON ANOTHER
FILE WITH A COMPLEX SUBSETING PROCESS

In this example, the susbeting IDs are in YS.active_high(N=1429) read into work.subset;
The larger ibm dataset is ibm.dat (N=55,000)
The output data set is ibm_subset(N=1429)
******/

/************=================================================*********/;

libname ys "c:\SAS\PROJECT1";

%macro fsubset(outname=);
proc sort data=ys.&&outname out=subset; by ind_id1; /* N=1429 */run;

filename ibm "c:\SAS\PROJECT1\ib.dat" RECFM=F LRECL=1305;

data ibm; infile ibm dsd missover;
input @1 _full $EBCDIC1305.
@1156 id1 $EBCDIC8.;
run;

proc sort data=ibm; by id1; run;

data ibm_subset; merge ibm(in=c) subset(in=s); by ind_id1;
if s; /* subset set of ibm data N=1429 */;
run;

filename ibm_s "c:\SAS\PROJECT1\&outname..dat" RECFM=F LRECL=1305;

data _null_;
file ibm_s RECFM=F LRECL=1305;
set ibm_subset;
put @1 _full $EBCDIC1305.;
run;

/**********=====================================================******/;
%mend;

options mlogic mprint;
%fsubset(outname=A_HIGH);
%fsubset(outname=active_med);
%fsubset(outname=active_mhi);
%fsubset(outname=active_low);

No comments: