#!/bin/perl # run the perl program with perl cpsmw_spss.prl # Check for these: # (0) Do not use tabs to indent in SPSS # (1) add $ to character variable names in the input statement # (2) String variables are <= 200 chars wide open (OUTFILE, "> ./cpsmw.sps") or die "Can't write ./cpsmw.sps"; select(OUTFILE); $date = `date`; chomp($date); print "*Change the input file location as needed.\n"; print "*by Jean Roth $date \n"; print "*Please report errors to jroth\@nber.org \n"; print "*Change output file name/location as desired.\n"; print "input program. \n"; print "data list file='c:\\cpsmw.raw' /\n"; # Begin processing the .txt file here open (FILE, "./mw_icpsr.txt"); $laststart=1; $lastlength=0; $lastline = ""; while ($line =) { chomp($line); if ($line =~ /MARCH CPS UNIFORM FILE CODEBOOK/ ) { $begin = 1; } if ($lastline =~ /^ ?\d{1,3}\./ ) { $label2 = substr($line, 5, 25); $label = $label . " " . $label2; $varname = $label; process_varname($varname); #open (VL, ">> ./varlist") or die "Can't write ./varlist"; #select (VL); #print "$tv $varname\n"; #close (VL); #select (MW); $label =~ s/\s+$//; #remove trailing spaces $label = "\"" . $label . "\""; #print "$varname\n"; #print "$varname $label loc=$loc\n"; printf (" %-10s %-12s\n", $varname, $loc); # printf ("@%-4s %-10s %4s. \n", $start, $varname, $length); #print "$varname start=$start length=$length$label\n"; $lastline = ""; } #end if ($lastline #print "\n;\n"; # Step 2: Grab the parts of the .dct file elsif ($begin ==1 && $line =~ /^ ?\d{1,3}\./) { #($tv, $rest) = split(/\./, $line, 2); #$tv =~ s/^\s+//g; #$tv = "x" . $tv; #$tv =~ s/ //g; $lastline = $line; $label = substr($line, 5, 25); $label =~ s/\s+$//; #remove trailing spaces $loc = substr($line, 72, 8); $loc =~ s/ //g; # print "$label loc=$loc vl2=$vl2 varname=$varname\n"; # print "$lastline $label loc=$loc varname=$varname\n"; } #end elsif ($begin } #end while print ".\n"; # Step 2: LABEL the variables # Begin processing the .txt file here print "variable labels\n"; #$value=clear; #$varname=filler; open (FILE, "./mw_icpsr.txt"); $lastline = 0; $lasttv = 0; $count = 0; while ($line =) { $print = 1; chomp($line); if ($line =~ /MARCH CPS UNIFORM FILE CODEBOOK/ ) { $begin = 1; } if ($lastline =~ /^ ?\d{1,3}\./ ) { $label2 = substr($line, 5, 25); $label = $label . " " . $label2; $label =~ s/\s+$//; #remove trailing spaces $varname = $label; process_varname($varname); $label =~ s/\s+$//; #remove trailing spaces $label = "\"" . $label . "\""; printf (" %-10s %-40s\n", $varname, $label); $lastline = ""; } #end if ($lastline # Step 2: Grab the parts of the .dct file elsif ($begin ==1 && $line =~ /^ ?\d{1,3}\./) { $lastline = $line; $label = substr($line, 5, 25); $label =~ s/\s+$//; #remove trailing spaces } #end elsif ($begin } #end while print ".\n"; # Step 3: LABEL the _values_ of the variables open (FILE, "./mw_varlist"); $i = 1; while ($line = ) { chomp($line); ($x, $varname) = split(/\s+/, $line, 2); $the_x[$i] = ($x); $the_varname[$i] = ($varname); #print "i=$i $the_x[$i] $the_varname[$i]\n"; $i++; } #end while # Begin processing the .txt file here print "value labels\n"; $slash = 0; open (FILE, "./mw_icpsr.txt"); $lastline=0; while ($line =) { $print = 1; chomp($line); #if ($line =~ /MARCH CPS UNIFORM FILE CODEBOOK/ ) { $begin = 1; } if ($line =~ /^ ?\d{1,3}\./ ) { $title = 0; ($tv, $rest) = split(/\./, $line, 2); $tv =~ s/ //g; $title = 0; #$varname = $label; $varname = substr($line, 5, 25); process_varname($varname); } #end if ($line if ($line =~ /APPENDIX/ ) {$end = 1 } if ($line =~ / \d{1,12} =/ && $line !~ /variable 41/ && $end ne 1 && $tv ne 118 && $tv ne 122 ) { $snip = substr($line, 30, 41); $snip =~ s/\s+$//g; ($value, $value_label) = split(/=/, $snip, 2); #print "v=$value vl=$value_label\n"; $value =~s/^\s+//; #remove leading spaces #print "v=$value vl=$value_label\n"; $value =~ s/\s+$//; #remove trailing spaces $value_label =~ s/^\s+//; #remove leading spaces $value_label =~ s/\s+$//; #remove trailing spaces $value =~ s/^00$/0/; $value =~ s/^01$/1/; $value =~ s/^02$/2/; $value =~ s/^03$/3/; $value =~ s/^04$/4/; $value =~ s/^05$/5/; $value =~ s/^06$/6/; $value =~ s/^07$/7/; $value =~ s/^08$/8/; $value =~ s/^09$/9/; $value =~ s/^000$/0/; $value =~ s/^000000$/0/; # print "$tv $the_varname[$tv]\n"; # print "xxx$tv yyy$the_varname[$tv] zzz$snip\n"; # print "tv=$tv lasttv=$lasttv v=$value vl=$value_label\n"; if ($lasttv != $tv ) { $firstval = 1 } else { $firstval = 0 } $lasttv = $tv; if ($the_varname[$tv] =~ /wwly2/ ) { $value_label = $value_label . " [1964-1975 only]" ; } $value_label = "\"" . $value_label . "\""; if ($the_varname[$tv] =~ /^smsa$|^state$|^typefam$|^mainrea$|^presenc$/ ) { $print = 0 } if ($print == 1 ) { if ($title == 0 ) { if ($slash !~ /^0$/ ) { #add a slash to variables 2 ... n $the_varname[$tv] = "/" . $the_varname[$tv]; } #end if $slash printf (" %-8s\n", $the_varname[$tv]); $title = 1; $slash = 1; #add a slash to variables 2 ... n } #end if ($title == 0 printf (" %-10s %-32s\n", $value, $value_label); $lastline = ""; } #end ($print } #end if ($lastline } #end while #} # Step 6: Print out footer print ".\n"; print "end input program.\n"; print "list.\n"; print "execute.\n"; print "save outfile = 'c:\\cpsmw.sav'.\n"; # Subroutines sub process_varname { $varname =~ tr/A-Z/a-z/; #change to lowercase if ($varname =~ /family members under 18/ ) { $varname = "famu18" ; } if ($varname =~ /family members over 18/ ) { $varname = "famo18" ; } $varname =~ s/number of persons/nump/g; $varname =~ s/public assistance/pa/g; $varname =~ s/-iii/3/g; $varname =~ s/-ii/2/g; $varname =~ s/-i\b/1/g; $varname =~ s/-/ /g; $varname =~ s/person\'s total/pt/g; $varname =~ s/weeks looking or layed off work last/wll/g; $varname =~ s/weeks looking for work last year/wlly/g; $varname =~ s/weeks worked last year/wwly/g; $varname =~ s/household serial or segment number/hhsnum/g; $varname =~ s/serial number/snum/g; $varname =~ s/adc recipiency/adc/g; $varname =~ s/look for/lf/g; $varname =~ s/looking for/lf/g; $varname =~ s/looking or/lo/g; $varname =~ s/last work/lw/g; $varname =~ s/class of worker/cow/g; $varname =~ s/\.|,|\/|\'|\(|\)|\sto\s|\sin\s| of | f?or |//g; $varname =~ s/subfamily membership key/subfam/g; $varname =~ s/person supplemental weight/suppwgt/g; $varname =~ s/basic cps weight/baswgt/g; $varname =~ s/level/lev/g; $varname =~ s/normal/n/g; $varname =~ s/weeks/wks/g; $varname =~ s/recode/r/g; $varname =~ s/reason not at work last/rnawl/g; #$varname =~ s/reason not at work/rnaw/g; $varname =~ s/relationship/reln/g; $varname =~ s/household/hh/g; $varname =~ s/family type/famtyp/g; $varname =~ s/family/fam/g; $varname =~ s/version number/vn/g; $varname =~ s/number/num/g; $varname =~ s/current/cur/g; $varname =~ s/person/per/g; $varname =~ s/total/tot/g; $varname =~ s/\+/p/g; $varname =~ s/full time/ft/g; $varname =~ s/full/f/g; $varname =~ s/part time/pt/g; $varname =~ s/week/wk/g; $varname =~ s/year/yr/g; $varname =~ s/poverty/pov/g; $varname =~ s/weight/wgt/g; $varname =~ s/major/maj/g; $varname =~ s/minor/min/g; if ($varname =~ /cpi/ ) { $varname = "cpi" } $varname =~ s/noninterview cluster/nonint/g; if ($varname =~ /own children under 6/ ) { $varname = "ownch6" ; } $varname =~ s/own children under 18/ownch18/g; $varname =~ s/member/mem/g; $varname =~ s/mmem/mem/g; if ($varname =~ /^yr/ ) { $varname = "year" ; } $varname =~ s/ //g; $varname = substr($varname, 0, 7); } #end sub process_varname