#!/bin/perl # run the perl program with perl cpsmw_stata.prl # Check for these: # (1) add $ to character variable names in the input statement # (2) String variables are <= 200 chars wide #`delete varlist`; # Step 0: Print beginning of Stata .do file $date = `date`; chomp($date); open (CPS, "> ./cpsmw.do") or die "Can't write ./cpsmw.do"; select(CPS); print "log using cpsmw, replace\n"; print "set mem 50m\n"; print "*by Jean Roth $date\n*Please report errors to jroth\@nber.org\n"; print "*Change output file name/location as desired\n\n"; print "*run by opening stata, set mem 50m, \n"; print "*use cpsmwYY, where YY is the year of interest, do cpsmw\n"; print "\nclear\n"; print "quietly infile using cpsmw\n"; # Step 1: Print dct if done with line open (FILE, "/homes/nber/jroth/text/mw_icpsr.txt"); open (MW, "> ./cpsmw.dct") or die "Can't write ./cpsmw.dct"; select(MW); print "dictionary using /home/data/mare_winship/cpsmw64.raw {\n"; print "*Change the year and input file location as needed\n"; print "*by Jean Roth $date\n"; print "*Please report errors to jroth\@nber.org\n"; print "*save this unit as cpsmw.dct\n"; print "*to read in basic data, at Stata prompt type:\n"; print "* quietly infile using cpsmw \n"; # Begin processing the .txt file here $lastline=0; while ($line =) { chomp($line); if ($line =~ /MARCH CPS UNIFORM FILE CODEBOOK/ ) { $begin = 1; } if ($lastline =~ /^ ?\d{1,3}\./ ) { $label2 = substr($line, 5, 25); $label = $label . " " . $label2; $varname = $label; process_varname($varname); #open (VL, ">> ./varlist") or die "Can't write ./varllist"; #select (VL); #print "$tv $varname\n"; #close (VL); #select (MW); $label =~ s/\s+$//; #remove trailing spaces $label = "\"" . $label . "\""; #print "$varname\n"; #print "$varname $label loc=$loc\n"; $length = "\%" . $length . $fs ; printf ("_column\(%-4s\) %5s %-8s %4s %-40s %-4s\n", $start, $sfx, $varname, $length, $label); #print "$varname start=$start length=$length$label\n"; $lastline = ""; } #end if ($lastline # Step 2: Grab the parts of the .dct file elsif ($begin ==1 && $line =~ /^ ?\d{1,3}\./) { #($tv, $rest) = split(/\./, $line, 2); #$tv =~ s/^\s+//g; #$tv = "x" . $tv; #$tv =~ s/ //g; $lastline = $line; $label = substr($line, 5, 25); $label =~ s/\s+$//; #remove trailing spaces $loc = substr($line, 72, 8); $loc =~ s/ //g; if ($loc !~ /-/ ) { $length = 1 ; $start = $loc; } elsif ($loc =~ /-/ ) { ($start, $end) = split(/-/, $loc, 2); $length = $end - $start + 1; } $sfx = "int"; $fs = "f"; if ($length > 3 ) { $sfx = "float"; } #$vl2 = substr($line, 30, 41); # print "$label loc=$loc vl2=$vl2 varname=$varname\n"; # print "$lastline $label loc=$loc varname=$varname\n"; } #end elsif ($begin } #end while print "\n\n}\n"; #print "#delimit cr\n"; close(MW); select (STDOUT); # Step 3: Label the values of the variables #open (CPS, "> ./cpsmw.do") or die "Can't write ./cpsmw.do"; select(CPS); print "\n\n*All the rest are labels\n\n"; print "#delimit ;\n"; $count = 0; open (FILE, "./mw_varlist"); $i = 1; while ($line = ) { chomp($line); ($x, $varname) = split(/\s+/, $line, 2); $the_x[$i] = ($x); $the_varname[$i] = ($varname); #print "i=$i $the_x[$i] $the_varname[$i]\n"; $i++; } $max_i = $i; open (FILE, "/homes/nber/jroth/text/mw_icpsr.txt"); $count = 0; $lasttv = 0; while ($line = ) { $print = 1; if ($line =~ /^ ?\d{1,3}\./) { chomp($line); ($tv, $rest) = split(/\./, $line, 2); #$tv =~ s/^\s+//g; #remove leading spaces #$tv =~ s/\.//g; $tv =~ s/ //g; #$tv = "x" . $tv; #print "tv=$tv\n"; } if ($line =~ /APPENDIX/ ) { $end = 1 } if ($line =~ / \d{1,12} =/ && $line !~ /variable 41/ && $end ne 1 && $tv ne 118 && $tv ne 122 ) { $snip = substr($line, 30, 41); $snip =~ s/\s+$//g; ($value, $value_label) = split(/=/, $snip, 2); #print "v=$value vl=$value_label\n"; $value =~s/^\s+//; #remove leading spaces #print "v=$value vl=$value_label\n"; $value =~ s/\s+$//; #remove trailing spaces $value_label =~ s/^\s+//; #remove leading spaces $value_label =~ s/\s+$//; #remove trailing spaces $value =~ s/^00$/0/; $value =~ s/^01$/1/; $value =~ s/^02$/2/; $value =~ s/^03$/3/; $value =~ s/^04$/4/; $value =~ s/^05$/5/; $value =~ s/^06$/6/; $value =~ s/^07$/7/; $value =~ s/^08$/8/; $value =~ s/^09$/9/; $value =~ s/^000$/0/; $value =~ s/^000000$/0/; # print "$tv $the_varname[$tv]\n"; # print "xxx$tv yyy$the_varname[$tv] zzz$snip\n"; # print "tv=$tv lasttv=$lasttv v=$value vl=$value_label\n"; if ($lasttv != $tv ) { $firstval = 1 } else { $firstval = 0 } $lasttv = $tv; if ($the_varname[$tv] =~ /wwly2/ ) { $value_label = $value_label . " [1964-1975 only]" ; } $value_label = "\"" . $value_label . "\""; if ($the_varname[$tv] =~ /^smsa$|^state$|^typefam$|^mainrea$|^presenc$/ ) { $print = 0 } if ($print == 1 ) { if ($firstval == 1 ) { $count++; $name = "P" . $count . "L"; $lname = $name . ";"; printf (";\nlabel values %-8s %-8s\nlabel define %-8s\n", $the_varname[$tv], $lname, $name); $firstval = 0; } #end if ($firstval printf ("\t%-10s %-32s\n", $value, $value_label); } #end if ($print } #end if ($line } #end while sub process_varname { $varname =~ tr/A-Z/a-z/; #change to lowercase if ($varname =~ /family members under 18/ ) { $varname = "famu18" ; } if ($varname =~ /family members over 18/ ) { $varname = "famo18" ; } $varname =~ s/number of persons/nump/g; $varname =~ s/public assistance/pa/g; $varname =~ s/-iii/3/g; $varname =~ s/-ii/2/g; $varname =~ s/-i\b/1/g; $varname =~ s/-/ /g; $varname =~ s/person\'s total/pt/g; $varname =~ s/weeks looking or layed off work last/wll/g; $varname =~ s/weeks looking for work last year/wlly/g; $varname =~ s/weeks worked last year/wwly/g; $varname =~ s/household serial or segment number/hhsnum/g; $varname =~ s/serial number/snum/g; $varname =~ s/adc recipiency/adc/g; $varname =~ s/look for/lf/g; $varname =~ s/looking for/lf/g; $varname =~ s/looking or/lo/g; $varname =~ s/last work/lw/g; $varname =~ s/class of worker/cow/g; $varname =~ s/\.|,|\/|\'|\(|\)|\sto\s|\sin\s| of | f?or |//g; $varname =~ s/subfamily membership key/subfam/g; $varname =~ s/person supplemental weight/suppwgt/g; $varname =~ s/basic cps weight/baswgt/g; $varname =~ s/level/lev/g; $varname =~ s/normal/n/g; $varname =~ s/weeks/wks/g; $varname =~ s/recode/r/g; $varname =~ s/reason not at work last/rnawl/g; #$varname =~ s/reason not at work/rnaw/g; $varname =~ s/relationship/reln/g; $varname =~ s/household/hh/g; $varname =~ s/family type/famtyp/g; $varname =~ s/family/fam/g; $varname =~ s/version number/vn/g; $varname =~ s/number/num/g; $varname =~ s/current/cur/g; $varname =~ s/person/per/g; $varname =~ s/total/tot/g; $varname =~ s/\+/p/g; $varname =~ s/full time/ft/g; $varname =~ s/full/f/g; $varname =~ s/part time/pt/g; $varname =~ s/week/wk/g; $varname =~ s/year/yr/g; $varname =~ s/poverty/pov/g; $varname =~ s/weight/wgt/g; $varname =~ s/major/maj/g; $varname =~ s/minor/min/g; if ($varname =~ /cpi/ ) { $varname = "cpi" } $varname =~ s/noninterview cluster/nonint/g; if ($varname =~ /own children under 6/ ) { $varname = "ownch6" ; } $varname =~ s/own children under 18/ownch18/g; $varname =~ s/member/mem/g; $varname =~ s/mmem/mem/g; if ($varname =~ /^yr/ ) { $varname = "year" ; } $varname =~ s/ //g; $varname = substr($varname, 0, 7); } #end sub process_varname