#!/bin/perl # run the perl program with perl cpsmw_sas.prl > cpsmw.sas # by Jean Roth March 27, 2001 # Step 0: Print out heading material $date = `date`; chomp($date); open (CPS, "> ./cpsmw.sas") or die "Can't write ./cpsmw.sas"; select(CPS); print "\n/*------------------------------------------------------------------------------------*/\n"; print "/*by Jean Roth\t$date\tPlease report errors to jroth\@nber.org\n"; print " Make two changes:\n"; print " (1) specify the output file location in the 'libname' statement\n"; print " (2) specify the input file name/location & year in the 'filename' statement\n"; print " Other changes may be user and platform specific. */\n"; print "/*------------------------------------------------------------------------------------*/\n\n "; print "options obs=100;\n"; print "options compress=yes;\n"; print "*change compress=no if using a conversion package such as stat/transfer;\n"; print "options formdlim = \" \";\n"; print "\nlibname out '/tmp/';\n"; # Step 1: Create SAS PROC FORMAT with the VALUE Statement print "/* Note: some value labels are longer than 32 characters */\n"; print "PROC FORMAT;\n"; open (FILE, "./mw_varlist"); $i = 1; while ($line = ) { chomp($line); ($x, $varname) = split(/\s+/, $line, 2); $the_x[$i] = ($x); $the_varname[$i] = ($varname); #print "i=$i $the_x[$i] $the_varname[$i]\n"; $i++; } #end while $max_i = $i; open (FILE, "/homes/nber/jroth/text/mw_icpsr.txt"); $count = 0; $lasttv = 0; while ($line = ) { $print = 1; if ($line =~ /^ ?\d{1,3}\./) { chomp($line); ($tv, $rest) = split(/\./, $line, 2); #$tv =~ s/^\s+//g; #remove leading spaces #$tv =~ s/\.//g; $tv =~ s/ //g; #$tv = "x" . $tv; #print "tv=$tv\n"; } #end if ($line =~ /^ if ($line =~ /APPENDIX/ ) { $end = 1 } if ($line =~ / \d{1,12} =/ && $line !~ /variable 41/ && $end ne 1 && $tv ne 118 && $tv ne 122 ) { $snip = substr($line, 30, 41); $snip =~ s/\s+$//g; ($value, $value_label) = split(/=/, $snip, 2); #print "v=$value vl=$value_label\n"; $value =~s/^\s+//; #remove leading spaces #print "v=$value vl=$value_label\n"; $value =~ s/\s+$//; #remove trailing spaces $value_label =~ s/^\s+//; #remove leading spaces $value_label =~ s/\s+$//; #remove trailing spaces $value =~ s/^00$/0/; $value =~ s/^01$/1/; $value =~ s/^02$/2/; $value =~ s/^03$/3/; $value =~ s/^04$/4/; $value =~ s/^05$/5/; $value =~ s/^06$/6/; $value =~ s/^07$/7/; $value =~ s/^08$/8/; $value =~ s/^09$/9/; $value =~ s/^000$/0/; $value =~ s/^000000$/0/; # print "$tv $the_varname[$tv]\n"; # print "xxx$tv yyy$the_varname[$tv] zzz$snip\n"; # print "tv=$tv lasttv=$lasttv v=$value vl=$value_label\n"; if ($lasttv != $tv ) { $firstval = 1 } else { $firstval = 0 } $lasttv = $tv; if ($the_varname[$tv] =~ /wwly2/ ) { $value_label = $value_label . " [1964-1975 only]" ; } $value_label = "\"" . $value_label . "\""; if ($the_varname[$tv] =~ /^smsa$|^state$|^typefam$|^mainrea$|^presenc$/ ) { $print = 0 } if ($print == 1 ) { if ($firstval == 1 ) { $count++; $name = "P" . $count . "L"; printf (";\nVALUE %-8s\t(default=32)\n", $name); $firstval = 1; } #end if ($firstval printf ("\t%-10s= %-32s\n", $value, $value_label); } #end if ($print } #end if ($line } #end while print ";\n"; # Step 2: Create SAS INPUT statement open (FILE, "./mw_icpsr.txt"); $laststart=1; $lastlength=0; print "\nfilename raw pipe \"zcat /home/data/mare_winship/cpsmw64.Z \";\n"; print "data temp;\n"; #print "length\n"; #print " default =3;\n"; #print "missing A;\n"; print "infile raw lrecl = 386 missover ;\n"; $lastline = ""; print "\nINPUT\n\n"; while ($line =) { chomp($line); if ($line =~ /MARCH CPS UNIFORM FILE CODEBOOK/ ) { $begin = 1; } if ($lastline =~ /^ ?\d{1,3}\./ ) { $label2 = substr($line, 5, 25); $label = $label . " " . $label2; $varname = $label; process_varname($varname); #open (VL, ">> ./varlist") or die "Can't write ./varllist"; #select (VL); #print "$tv $varname\n"; #close (VL); #select (MW); $label =~ s/\s+$//; #remove trailing spaces $label = "\"" . $label . "\""; #print "$varname\n"; #print "$varname $label loc=$loc\n"; printf ("@%-4s %-10s %4s. \n", $start, $varname, $length); #print "$varname start=$start length=$length$label\n"; $lastline = ""; } #end if ($lastline #print "\n;\n"; # Step 2: Grab the parts of the .dct file elsif ($begin ==1 && $line =~ /^ ?\d{1,3}\./) { #($tv, $rest) = split(/\./, $line, 2); #$tv =~ s/^\s+//g; #$tv = "x" . $tv; #$tv =~ s/ //g; $lastline = $line; $label = substr($line, 5, 25); $label =~ s/\s+$//; #remove trailing spaces $loc = substr($line, 72, 8); $loc =~ s/ //g; if ($loc !~ /-/ ) { $length = 1 ; $start = $loc; } elsif ($loc =~ /-/ ) { ($start, $end) = split(/-/, $loc, 2); $length = $end - $start + 1; } # print "$label loc=$loc vl2=$vl2 varname=$varname\n"; # print "$lastline $label loc=$loc varname=$varname\n"; } #end elsif ($begin } #end while print "\n\n;\n"; #print "#delimit cr\n"; #close(MW); #select (STDOUT); # Step 3: Create SAS Labels open (FILE, "./mw_icpsr.txt"); print "\nLABEL\n"; # Begin processing the .txt file here $lastline=0; while ($line =) { chomp($line); if ($line =~ /MARCH CPS UNIFORM FILE CODEBOOK/ ) { $begin = 1; } if ($lastline =~ /^ ?\d{1,3}\./ ) { $label2 = substr($line, 5, 25); $label = $label . " " . $label2; $varname = $label; process_varname($varname); #open (VL, ">> ./varlist") or die "Can't write ./varllist"; #select (VL); #print "$tv $varname\n"; #close (VL); #select (MW); $label =~ s/\s+$//; #remove trailing spaces $label = "\"" . $label . "\""; #print "$varname\n"; #print "$varname $label loc=$loc\n"; $length = "\%" . $length . $fs ; printf ("\t%-10s= %-40s\n", $varname, $label); # printf ("_column\(%-4s\) %5s %-8s %4s %-40s %-4s\n", $start, $sfx, $varname, $length, $label); #print "$varname start=$start length=$length$label\n"; $lastline = ""; } #end if ($lastline # Step 2: Grab the parts of the .dct file elsif ($begin ==1 && $line =~ /^ ?\d{1,3}\./) { #($tv, $rest) = split(/\./, $line, 2); #$tv =~ s/^\s+//g; #$tv = "x" . $tv; #$tv =~ s/ //g; $lastline = $line; $label = substr($line, 5, 25); $label =~ s/\s+$//; #remove trailing spaces $loc = substr($line, 72, 8); $loc =~ s/ //g; if ($loc !~ /-/ ) { $length = 1 ; $start = $loc; } elsif ($loc =~ /-/ ) { ($start, $end) = split(/-/, $loc, 2); $length = $end - $start + 1; } $sfx = "int"; $fs = "f"; if ($length > 3 ) { $sfx = "float"; } #$vl2 = substr($line, 30, 41); # print "$label loc=$loc vl2=$vl2 varname=$varname\n"; # print "$lastline $label loc=$loc varname=$varname\n"; } #end elsif ($begin } #end while print "\n\n;\n"; # Step 4: Create SAS FORMAT statement print "\nFORMAT\n"; open (FILE, "./mw_varlist"); $i = 1; while ($line = ) { chomp($line); ($x, $varname) = split(/\s+/, $line, 2); $the_x[$i] = ($x); $the_varname[$i] = ($varname); #print "i=$i $the_x[$i] $the_varname[$i]\n"; $i++; } #end while $max_i = $i; open (FILE, "/homes/nber/jroth/text/mw_icpsr.txt"); $count = 0; $lasttv = 0; while ($line = ) { $print = 1; if ($line =~ /^ ?\d{1,3}\./) { chomp($line); ($tv, $rest) = split(/\./, $line, 2); $tv =~ s/ //g; } #end if ($line =~ /^ if ($line =~ /APPENDIX/ ) { $end = 1 } if ($line =~ / \d{1,12} =/ && $line !~ /variable 41/ && $end ne 1 && $tv ne 118 && $tv ne 122 ) { # print "$tv $the_varname[$tv]\n"; if ($lasttv != $tv ) { $firstval = 1 } else { $firstval = 0 } $lasttv = $tv; if ($the_varname[$tv] =~ /^smsa$|^state$|^typefam$|^mainrea$|^presenc$/ ) { $print = 0 } if ($print == 1 ) { if ($firstval == 1 ) { $count++; $name = "P" . $count . "L."; printf ("\t%-10s %-10s\n", $the_varname[$tv], $name); $firstval = 1; } #end if ($firstval } #end if ($print } #end if ($line } #end while print ";\n"; # Step 5: Print out Footing material print "proc print data=temp (obs=60);\n"; print "proc contents data=temp;\n"; sub process_varname { $varname =~ tr/A-Z/a-z/; #change to lowercase if ($varname =~ /family members under 18/ ) { $varname = "famu18" ; } if ($varname =~ /family members over 18/ ) { $varname = "famo18" ; } $varname =~ s/number of persons/nump/g; $varname =~ s/public assistance/pa/g; $varname =~ s/-iii/3/g; $varname =~ s/-ii/2/g; $varname =~ s/-i\b/1/g; $varname =~ s/-/ /g; $varname =~ s/person\'s total/pt/g; $varname =~ s/weeks looking or layed off work last/wll/g; $varname =~ s/weeks looking for work last year/wlly/g; $varname =~ s/weeks worked last year/wwly/g; $varname =~ s/household serial or segment number/hhsnum/g; $varname =~ s/serial number/snum/g; $varname =~ s/adc recipiency/adc/g; $varname =~ s/look for/lf/g; $varname =~ s/looking for/lf/g; $varname =~ s/looking or/lo/g; $varname =~ s/last work/lw/g; $varname =~ s/class of worker/cow/g; $varname =~ s/\.|,|\/|\'|\(|\)|\sto\s|\sin\s| of | f?or |//g; $varname =~ s/subfamily membership key/subfam/g; $varname =~ s/person supplemental weight/suppwgt/g; $varname =~ s/basic cps weight/baswgt/g; $varname =~ s/level/lev/g; $varname =~ s/normal/n/g; $varname =~ s/weeks/wks/g; $varname =~ s/recode/r/g; $varname =~ s/reason not at work last/rnawl/g; #$varname =~ s/reason not at work/rnaw/g; $varname =~ s/relationship/reln/g; $varname =~ s/household/hh/g; $varname =~ s/family type/famtyp/g; $varname =~ s/family/fam/g; $varname =~ s/version number/vn/g; $varname =~ s/number/num/g; $varname =~ s/current/cur/g; $varname =~ s/person/per/g; $varname =~ s/total/tot/g; $varname =~ s/\+/p/g; $varname =~ s/full time/ft/g; $varname =~ s/full/f/g; $varname =~ s/part time/pt/g; $varname =~ s/week/wk/g; $varname =~ s/year/yr/g; $varname =~ s/poverty/pov/g; $varname =~ s/weight/wgt/g; $varname =~ s/major/maj/g; $varname =~ s/minor/min/g; if ($varname =~ /cpi/ ) { $varname = "cpi" } $varname =~ s/noninterview cluster/nonint/g; if ($varname =~ /own children under 6/ ) { $varname = "ownch6" ; } $varname =~ s/own children under 18/ownch18/g; $varname =~ s/member/mem/g; $varname =~ s/mmem/mem/g; if ($varname =~ /^yr/ ) { $varname = "year" ; } $varname =~ s/ //g; $varname = substr($varname, 0, 7); } #end sub process_varname