Thursday, January 3, 2008

Reading all your Netflix Challenge data into SAS

/* The filenames MID1, MID2, ..., MID17770 are already created as macro variables *
Hint: Use PROC SQL using INTO option */

data netdown.superdat;
length mid $7. cid 8. rank 8. rank_dt $10.; /* cid is customer ID, rank is score, rank_dt is date of score */
run;


%macro netdata(tfiles=j);

%do i=1 %to &tfiles;
data netdown.mm (keep=mid);
length mid $7.;
mid1="&&mid&i";
mid=substr(mid1,4,7);
run;
data netdown.add1(keep=cid rank rank_dt);
infile "c:\Documents and Settings\All Users\Documents\NetFlixChallenge\training_set\&&MID&i...txt" dsd dlm=","
missover truncover firstobs=2 ;
input cid rank rank_dt $10.;
keep cid rank rank_dt;
run;
data netdown.add ; if _n_=1 then set netdown.mm; set netdown.add1; run;
proc append base=netdown.superdat data=netdown.add ; run;
%end;
%mend;

%netdata(tfiles=17770);

No comments: