clear capture log close * set mem 3000m log using clean_natality_1940_1968.log, replace set more off ************************************************************************* /* program to clean us vital statistics births data, 1940-1968 */ * original data: http://www.nber.org/vital-stats-books/ * fields: county births by residence, by race*urbanicity*attendant as available * data documentation: see natality_documentation.xls * data entry: digital divide data (http://www.digitaldividedata.com/) * data development generously funded by nia grant number p30 ag012810, through the nber * nb: do not re-sort natality`year'.dta datasets before running this code ************************************************************************* ** *1940 data ** *http://nber15.nber.org/vital-stats-books/vsus_1940_2.cv.pdf *table 7 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1940.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att=0 if births_of_residents_of_area__att==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0=0 if births_of_residents_of_area__at0==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1=0 if births_of_residents_of_area__at1==. count if births_of_residents_of_area__at2==. replace births_of_residents_of_area__at2=0 if births_of_residents_of_area__at2==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors *none found as of yet *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county, ".", "",.) replace county=subinstr(county, "ste ", "st ",.) replace county="desoto" if county=="de soto" & state=="florida" replace county="cochrane" if county=="cochran" & state=="texas" replace county="davie" if county=="davis" & state=="north carolina" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="st louis city" if county=="st louis (city)" & state=="missouri" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+births_of_residents_of_area__at0+births_of_residents_of_area__at1+births_of_residents_of_area__at2 list if temp!=births__total *checked .pdf, these are data errors not data entry errors drop temp *clean and label variables rename page__of_pdf_ page_of_pdf label var page_of_pdf "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h "births by residence: physician (in hospital)" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician (not in hospital)" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" rename births_of_residents_of_area__at2 births_o label var births_o "births by residence: other and not specified" *generate year variable gen year=1940 label var year "year" *check that observations are unique egen tag=tag(state county sub_county) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1940.dta, replace clear ** *1941 data ** *http://cdc.gov/nchs/data/vsus/vsus_1941_2.pdf *table 7 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1941.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att=0 if births_of_residents_of_area__att==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0=0 if births_of_residents_of_area__at0==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1=0 if births_of_residents_of_area__at1==. count if births_of_residents_of_area__at2==. replace births_of_residents_of_area__at2=0 if births_of_residents_of_area__at2==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace births_of_residents_of_area__att=185 if state=="georgia" & county=="chatham" & city_balance_total=="outside city" *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county, ".", "",.) replace county=subinstr(county, "ste ", "st ",.) replace county="desoto" if county=="de soto" & state=="florida" replace county="cochrane" if county=="cochran" & state=="texas" replace county="davie" if county=="davis" & state=="north carolina" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="st louis city" if county=="st louis (city)" & state=="missouri" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+births_of_residents_of_area__at0+births_of_residents_of_area__at1+births_of_residents_of_area__at2 list if temp!=births__total *checked .pdf, these are data errors not data entry errors drop temp *clean and label variables rename page__of_pdf_ page_of_pdf label var page_of_pdf "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h "births by residence: physician (in hospital)" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician (not in hospital)" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" rename births_of_residents_of_area__at2 births_o label var births_o "births by residence: other and not specified" *generate year variable gen year=1941 label var year "year" *check that observations are unique egen tag=tag(state county sub_county) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1941.dta, replace clear ** *1942 data ** *http://nber15.nber.org/vital-stats-books/vsus_1942_2.cv.pdf *table 11 (section b, counties) *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1942.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att=0 if births_of_residents_of_area__att==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0=0 if births_of_residents_of_area__at0==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1=0 if births_of_residents_of_area__at1==. count if births_of_residents_of_area__at2==. replace births_of_residents_of_area__at2=0 if births_of_residents_of_area__at2==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace births__total=354 if state=="texas" & county=="nolan" & city_balance_total=="total" replace births_of_residents_of_area__at0=86 if state=="texas" & county=="nolan" & city_balance_total=="total" *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county, ".", "",.) replace county=subinstr(county, "ste ", "st ",.) replace county="desoto" if county=="de soto" & state=="florida" replace county="cochrane" if county=="cochran" & state=="texas" replace county="davie" if county=="davis" & state=="north carolina" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="st louis city" if county=="st louis (city)" & state=="missouri" replace county="cambria" if county=="cambri's" & state=="pennsylvania" replace county="pawnee" if county=="pawnac" & state=="oklahoma" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+births_of_residents_of_area__at0+births_of_residents_of_area__at1+births_of_residents_of_area__at2 list if temp!=births__total *no errors drop temp *clean and label variables rename page__of_pdf_ page_of_pdf label var page_of_pdf "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h "births by residence: physician (in hospital)" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician (not in hospital)" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" rename births_of_residents_of_area__at2 births_o label var births_o "births by residence: other and not specified" *generate year variable gen year=1942 label var year "year" *check that observations are unique egen tag=tag(state county sub_county) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1942.dta, replace clear ** *1943 data ** *http://nber15.nber.org/vital-stats-books/vsus_1943_2.cv.pdf *table 11 (section b, counties) *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1943.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att=0 if births_of_residents_of_area__att==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0=0 if births_of_residents_of_area__at0==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1=0 if births_of_residents_of_area__at1==. count if births_of_residents_of_area__at2==. replace births_of_residents_of_area__at2=0 if births_of_residents_of_area__at2==. *check that all pdf pages appear to be in the data replace page__of_pdf_=168 if state=="south carolina" & county=="total" replace page__of_pdf_=168 if state=="south carolina" & county=="edgefield" replace page__of_pdf_=169 if state=="texas" & county=="armstrong" sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace state="louisiana" if state=="louislana" *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county, ".", "",.) replace county=subinstr(county, "ste ", "st ",.) replace county="desoto" if county=="de soto" & state=="florida" replace county="cochrane" if county=="cochran" & state=="texas" replace county="davie" if county=="davis" & state=="north carolina" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="st louis city" if county=="st louis (city)" & state=="missouri" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+births_of_residents_of_area__at0+births_of_residents_of_area__at1+births_of_residents_of_area__at2 list if temp!=births__total *no errors drop temp *clean and label variables rename page__of_pdf_ page_of_pdf label var page_of_pdf "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h "births by residence: physician (in hospital)" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician (not in hospital)" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" rename births_of_residents_of_area__at2 births_o label var births_o "births by residence: other and not specified" *generate year variable gen year=1943 label var year "year" *check that observations are unique egen tag=tag(state county sub_county) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1943.dta, replace clear ** *1944 data ** *http://nber15.nber.org/vital-stats-books/vsus_1944_2.cv.pdf *table 11 (section b, counties) *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1944.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att=0 if births_of_residents_of_area__att==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0=0 if births_of_residents_of_area__at0==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1=0 if births_of_residents_of_area__at1==. count if births_of_residents_of_area__at2==. replace births_of_residents_of_area__at2=0 if births_of_residents_of_area__at2==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace state="indiana" if state=="illinois" & county=="total" & births__total==71354 replace state="indiana" if state=="illinois" & county=="adams" & births__total==443 replace state="indiana" if state=="illinois" & county=="allen" & (births__total==3503 | births__total==2716 | births__total==787) replace state="indiana" if state=="illinois" & county=="bartholomew" & (births__total==681 | births__total==356 | births__total==325) replace state="indiana" if state=="illinois" & county=="benton" & births__total==213 replace state="indiana" if state=="illinois" & county=="blackford" & births__total==230 replace state="indiana" if state=="illinois" & county=="boone" & births__total==450 replace state="indiana" if state=="illinois" & county=="brown" & births__total==91 replace state="indiana" if state=="illinois" & county=="carroll" & births__total==293 replace state="indiana" if state=="illinois" & county=="cass" & (births__total==683 | births__total==405 | births__total==278) replace state="indiana" if state=="illinois" & county=="clark" & (births__total==894 | births__total==367 | births__total==527) replace state="indiana" if state=="illinois" & county=="clay" & births__total==403 replace state="indiana" if state=="illinois" & county=="clinton" & (births__total==550 | births__total==276 | births__total==274) replace state="indiana" if state=="illinois" & county=="crawford" & births__total==164 replace state="indiana" if state=="illinois" & county=="daviess" & births__total==582 replace state="indiana" if state=="illinois" & county=="dearborn" & births__total==414 replace state="indiana" if state=="illinois" & county=="decatur" & births__total==358 replace state="indiana" if state=="illinois" & county=="de kalb" & births__total==501 replace state="iowa" if state=="indiana" & county=="total" & births__total==46564 replace state="iowa" if state=="indiana" & county=="adair" & births__total==235 replace state="iowa" if state=="indiana" & county=="adams" & births__total==172 replace state="iowa" if state=="indiana" & county=="allamakee" & births__total==309 replace state="iowa" if state=="indiana" & county=="appanoose" & births__total==402 replace state="iowa" if state=="indiana" & county=="audubon" & births__total==214 replace state="iowa" if state=="indiana" & county=="benton" & births__total==397 replace state="iowa" if state=="indiana" & county=="black hawk" & (births__total==1634 | births__total==1053 | births__total==581) replace state="iowa" if state=="indiana" & county=="boone" & (births__total==456 | births__total==198 | births__total==258) replace state="iowa" if state=="indiana" & county=="bremer" & births__total==310 replace state="iowa" if state=="indiana" & county=="buchanan" & births__total==376 replace state="iowa" if state=="indiana" & county=="buena vista" & births__total==324 replace state="iowa" if state=="indiana" & county=="butler" & births__total==285 replace state="iowa" if state=="indiana" & county=="calhoun" & births__total==342 replace state="iowa" if state=="indiana" & county=="carroll" & births__total==487 replace state="iowa" if state=="indiana" & county=="cass" & births__total==366 replace state="iowa" if state=="indiana" & county=="cedar" & births__total==278 replace state="iowa" if state=="indiana" & county=="cerro gordo" & (births__total==763 | births__total==461 | births__total==302) replace state="iowa" if state=="indiana" & county=="cherokee" & births__total==308 replace state="iowa" if state=="indiana" & county=="chickasaw" & births__total==274 replace state="iowa" if state=="indiana" & county=="clarke" & births__total==148 replace state="iowa" if state=="indiana" & county=="clay" & births__total==359 replace state="iowa" if state=="indiana" & county=="clayton" & births__total==398 replace state="iowa" if state=="indiana" & county=="clinton" & (births__total==861 | births__total==529 | births__total==332) replace state="iowa" if state=="indiana" & county=="crawford" & births__total==380 replace state="iowa" if state=="indiana" & county=="dallas" & births__total==412 replace state="iowa" if state=="indiana" & county=="davis" & births__total==164 replace state="iowa" if state=="indiana" & county=="decatur" & births__total==249 replace state="iowa" if state=="indiana" & county=="delaware" & births__total==350 replace state="iowa" if state=="indiana" & county=="des moines" & (births__total==786 | births__total==591 | births__total==195) replace state="iowa" if state=="indiana" & county=="dickinson" & births__total==191 replace state="iowa" if state=="indiana" & county=="dubuque" & (births__total==1245 | births__total==820 | births__total==425) replace state="iowa" if state=="indiana" & county=="emmet" & births__total==282 replace state="iowa" if state=="indiana" & county=="fayette" & births__total==494 replace state="iowa" if state=="indiana" & county=="floyd" & births__total==420 replace state="iowa" if state=="indiana" & county=="franklin" & births__total==302 replace state="iowa" if state=="indiana" & county=="fremont" & births__total==244 replace state="iowa" if state=="indiana" & county=="greene" & births__total==258 replace state="iowa" if state=="indiana" & county=="grundy" & births__total==226 replace state="south carolina" if state=="pennsylvania" & county=="total" & births__total==51467 replace state="south carolina" if state=="pennsylvania" & county=="abbeville" & births__total==425 replace state="south carolina" if state=="pennsylvania" & county=="aiken" & births__total==1293 replace state="south carolina" if state=="pennsylvania" & county=="allendale" & births__total==338 replace state="south carolina" if state=="pennsylvania" & county=="anderson" & (births__total==1935 | births__total==645 | births__total==1290) replace state="south carolina" if state=="pennsylvania" & county=="bamberg" & births__total==428 replace state="south carolina" if state=="pennsylvania" & county=="barnwell" & births__total==518 replace state="south carolina" if state=="pennsylvania" & county=="beaufort" & births__total==739 replace state="south carolina" if state=="pennsylvania" & county=="berkeley" & births__total==845 replace state="south carolina" if state=="pennsylvania" & county=="calhoun" & births__total==412 replace state="south carolina" if state=="pennsylvania" & county=="charleston" & (births__total==5392 | births__total==2621 | births__total==2771) replace state="south carolina" if state=="pennsylvania" & county=="cherokee" & births__total==786 replace state="south carolina" if state=="pennsylvania" & county=="chester" & births__total==735 replace state="south carolina" if state=="pennsylvania" & county=="chesterfield" & births__total==952 replace state="south carolina" if state=="pennsylvania" & county=="clarendon" & births__total==784 replace state="south carolina" if state=="pennsylvania" & county=="colleton" & births__total==791 replace state="south carolina" if state=="pennsylvania" & county=="darlington" & births__total==1258 replace state="south carolina" if state=="pennsylvania" & county=="dillon" & births__total==819 replace state="south carolina" if state=="pennsylvania" & county=="dorchester" & births__total==684 replace state="south carolina" if state=="pennsylvania" & county=="edgefield" & births__total==418 replace state="south carolina" if state=="pennsylvania" & county=="fairfield" & births__total==590 replace state="south carolina" if state=="pennsylvania" & county=="florence" & (births__total==2064 | births__total==652 | births__total==1412) replace state="south carolina" if state=="pennsylvania" & county=="georgetown" & births__total==840 replace state="south carolina" if state=="pennsylvania" & county=="greenville" & (births__total==3382 | births__total==1483 | births__total==1899) replace state="south carolina" if state=="pennsylvania" & county=="greenwood" & (births__total==884 | births__total==354 | births__total==530) replace state="south carolina" if state=="pennsylvania" & county=="hampton" & births__total==506 replace state="south carolina" if state=="pennsylvania" & county=="horry" & births__total==1513 replace state="south carolina" if state=="pennsylvania" & county=="jasper" & births__total==331 replace state="south carolina" if state=="pennsylvania" & county=="kershaw" & births__total==784 replace state="south carolina" if state=="pennsylvania" & county=="lancaster" & births__total==801 replace state="south carolina" if state=="pennsylvania" & county=="laurens" & births__total==775 replace state="south carolina" if state=="pennsylvania" & county=="lee" & births__total==666 replace state="south carolina" if state=="pennsylvania" & county=="lexington" & births__total==957 replace state="south carolina" if state=="pennsylvania" & county=="mccormick" & births__total==269 replace state="south carolina" if state=="pennsylvania" & county=="marion" & births__total==849 replace state="south carolina" if state=="pennsylvania" & county=="marlboro" & births__total==891 replace state="south carolina" if state=="pennsylvania" & county=="newberry" & births__total==765 replace state="south carolina" if state=="pennsylvania" & county=="oconee" & births__total==831 replace state="south carolina" if state=="pennsylvania" & county=="orangeburg" & (births__total==1917 | births__total==373 | births__total==1544) replace state="south carolina" if state=="pennsylvania" & county=="pickens" & births__total==864 replace state="south carolina" if state=="pennsylvania" & county=="richland" & (births__total==3162 | births__total==2175 | births__total==987) replace state="south carolina" if state=="pennsylvania" & county=="saluda" & births__total==309 replace state="south carolina" if state=="pennsylvania" & county=="spartanburg" & (births__total==3057 | births__total==929 | births__total==2128) replace state="south carolina" if state=="pennsylvania" & county=="sumter" & (births__total==1427 | births__total==466 | births__total==961) replace state="south carolina" if state=="pennsylvania" & county=="union" & births__total==690 replace state="south carolina" if state=="pennsylvania" & county=="williamsburg" & births__total==1265 replace state="south carolina" if state=="pennsylvania" & county=="york" & (births__total==1526 | births__total==474 | births__total==1052) replace state="tennessee" if state=="south dakota" & county=="total" & births__total==68272 replace state="tennessee" if state=="south dakota" & county=="anderson" & births__total==862 replace state="tennessee" if state=="south dakota" & county=="bedford" & births__total==515 replace state="tennessee" if state=="south dakota" & county=="benton" & births__total==230 replace state="tennessee" if state=="south dakota" & county=="bledsoe" & births__total==220 replace state="tennessee" if state=="south dakota" & county=="blount" & births__total==1349 replace state="tennessee" if state=="south dakota" & county=="bradley" & (births__total==737 | births__total==323 | births__total==414) replace state="tennessee" if state=="south dakota" & county=="campbell" & births__total==999 replace state="tennessee" if state=="south dakota" & county=="cannon" & births__total==236 replace state="tennessee" if state=="south dakota" & county=="carroll" & births__total==565 replace state="tennessee" if state=="south dakota" & county=="carter" & births__total==906 replace state="tennessee" if state=="south dakota" & county=="cheatham" & births__total==200 replace state="tennessee" if state=="south dakota" & county=="chester" & births__total==237 replace state="tennessee" if state=="south dakota" & county=="claiborne" & births__total==623 replace state="tennessee" if state=="south dakota" & county=="clay" & births__total==208 replace state="tennessee" if state=="south dakota" & county=="cocke" & births__total==609 replace state="tennessee" if state=="south dakota" & county=="coffee" & births__total==539 replace state="tennessee" if state=="south dakota" & county=="crockett" & births__total==385 replace state="tennessee" if state=="south dakota" & county=="cumberland" & births__total==466 replace state="tennessee" if state=="south dakota" & county=="davidson" & (births__total==5543 | births__total==4411 | births__total==1132) replace state="tennessee" if state=="south dakota" & county=="decatur" & births__total==239 replace state="tennessee" if state=="south dakota" & county=="de kalb" & births__total==240 replace state="tennessee" if state=="south dakota" & county=="dickson" & births__total==448 replace state="tennessee" if state=="south dakota" & county=="dyer" & (births__total==801 | births__total==215 | births__total==586) replace state="tennessee" if state=="south dakota" & county=="fayette" & births__total==793 replace state="tennessee" if state=="south dakota" & county=="fentress" & births__total==426 replace state="tennessee" if state=="south dakota" & county=="franklin" & births__total==595 replace state="tennessee" if state=="south dakota" & county=="gibson" & births__total==1074 replace state="tennessee" if state=="south dakota" & county=="giles" & births__total==609 replace state="tennessee" if state=="south dakota" & county=="grainger" & births__total==294 replace state="tennessee" if state=="south dakota" & county=="greene" & births__total==834 replace state="tennessee" if state=="south dakota" & county=="grundy" & births__total==349 replace state="tennessee" if state=="south dakota" & county=="hamblen" & births__total==466 replace state="tennessee" if state=="south dakota" & county=="hamilton" & (births__total==4247 | births__total==3219 | births__total==1028) replace state="tennessee" if state=="south dakota" & county=="hancock" & births__total==228 replace state="tennessee" if state=="south dakota" & county=="hardeman" & births__total==540 replace state="tennessee" if state=="south dakota" & county=="hardin" & births__total==380 replace state="tennessee" if state=="south dakota" & county=="hawkins" & births__total==726 replace state="tennessee" if state=="south dakota" & county=="haywood" & births__total==661 replace state="tennessee" if state=="south dakota" & county=="henderson" & births__total==389 replace state="tennessee" if state=="south dakota" & county=="henry" & births__total==485 replace state="tennessee" if state=="south dakota" & county=="hickman" & births__total==272 replace state="tennessee" if state=="south dakota" & county=="houston" & births__total==129 replace state="tennessee" if state=="south dakota" & county=="humphreys" & births__total==236 replace state="tennessee" if state=="south dakota" & county=="jackson" & births__total==346 replace state="tennessee" if state=="south dakota" & county=="jefferson" & births__total==426 replace state="tennessee" if state=="south dakota" & county=="johnson" & births__total==288 replace state="tennessee" if state=="south dakota" & county=="knox" & (births__total==4359 | births__total==2794 | births__total==1565) replace state="tennessee" if state=="south dakota" & county=="lake" & births__total==313 replace state="tennessee" if state=="south dakota" & county=="lauderdale" & births__total==609 replace state="tennessee" if state=="south dakota" & county=="lawrence" & births__total==765 replace state="tennessee" if state=="south dakota" & county=="lewis" & births__total==125 replace state="tennessee" if state=="south dakota" & county=="lincoln" & births__total==563 replace state="tennessee" if state=="south dakota" & county=="loudon" & births__total==548 replace state="tennessee" if state=="south dakota" & county=="mcminn" & births__total==745 replace state="tennessee" if state=="south dakota" & county=="mcnairy" & births__total==455 replace state="tennessee" if state=="south dakota" & county=="macon" & births__total==289 replace state="tennessee" if state=="south dakota" & county=="madison" & (births__total==1255 | births__total==587 | births__total==668) replace state="tennessee" if state=="south dakota" & county=="marion" & births__total==530 replace state="tennessee" if state=="south dakota" & county=="marshall" & births__total==357 replace state="tennessee" if state=="south dakota" & county=="maury" & (births__total==831 | births__total==231 | births__total==600) replace state="tennessee" if state=="south dakota" & county=="meigs" & births__total==148 replace state="tennessee" if state=="south dakota" & county=="monroe" & births__total==672 replace state="tennessee" if state=="south dakota" & county=="montgomery" & (births__total==701 | births__total==259 | births__total==442) replace state="tennessee" if state=="south dakota" & county=="moore" & births__total==71 replace state="tennessee" if state=="south dakota" & county=="morgan" & births__total==384 replace state="tennessee" if state=="south dakota" & county=="obion" & births__total==556 replace state="tennessee" if state=="south dakota" & county=="overton" & births__total==397 replace state="tennessee" if state=="south dakota" & county=="perry" & births__total==155 replace state="tennessee" if state=="south dakota" & county=="pickett" & births__total==104 replace state="tennessee" if state=="south dakota" & county=="polk" & births__total==380 replace state="tennessee" if state=="south dakota" & county=="putnam" & births__total==565 replace state="tennessee" if state=="south dakota" & county=="rhea" & births__total==421 replace state="tennessee" if state=="south dakota" & county=="roane" & births__total==859 replace state="tennessee" if state=="south dakota" & county=="robertson" & births__total==561 replace state="tennessee" if state=="south dakota" & county=="rutherford" & births__total==785 replace state="tennessee" if state=="south dakota" & county=="scott" & births__total==457 replace state="tennessee" if state=="south dakota" & county=="sequatchie" & births__total==151 replace state="tennessee" if state=="south dakota" & county=="sevier" & births__total==610 replace state="tennessee" if state=="south dakota" & county=="shelby" & (births__total==8103 | births__total==6254 | births__total==1849) replace state="tennessee" if state=="south dakota" & county=="smith" & births__total==251 replace state="tennessee" if state=="south dakota" & county=="stewart" & births__total==222 replace state="tennessee" if state=="south dakota" & county=="sullivan" & (births__total==2058 | births__total==359 | births__total==689 | births__total==1010) replace state="tennessee" if state=="south dakota" & county=="sumner" & births__total==595 replace state="tennessee" if state=="south dakota" & county=="tipton" & births__total==789 replace state="tennessee" if state=="south dakota" & county=="trousdale" & births__total==132 replace state="tennessee" if state=="south dakota" & county=="unicoi" & births__total==379 replace state="tennessee" if state=="south dakota" & county=="union" & births__total==178 replace state="tennessee" if state=="south dakota" & county=="van buren" & births__total==91 replace state="tennessee" if state=="south dakota" & county=="warren" & births__total==455 replace state="tennessee" if state=="south dakota" & county=="washington" & (births__total==1178 | births__total==504 | births__total==874) replace state="tennessee" if state=="south dakota" & county=="wayne" & births__total==316 replace state="tennessee" if state=="south dakota" & county=="weakley" & births__total==472 replace state="tennessee" if state=="south dakota" & county=="white" & births__total==411 replace state="tennessee" if state=="south dakota" & county=="williamson" & births__total==491 replace state="tennessee" if state=="south dakota" & county=="wilson" & births__total==480 replace state="texas" if state=="south dakota" & county=="total" & births__total==165900 replace state="texas" if state=="south dakota" & county=="anderson" & (births__total==664 | births__total==272 | births__total==392) replace state="texas" if state=="south dakota" & county=="andrews" & births__total==34 replace state="texas" if state=="south dakota" & county=="angelina" & births__total==739 replace state="texas" if state=="south dakota" & county=="aransas" & births__total==74 replace state="texas" if state=="south dakota" & county=="archer" & births__total==118 replace state="texas" if state=="south dakota" & county=="armstrong" & births__total==51 replace state="texas" if state=="south dakota" & county=="atascosa" & births__total==607 replace state="texas" if state=="south dakota" & county=="austin" & births__total==282 replace state="texas" if state=="south dakota" & county=="bailey" & births__total==188 replace state="texas" if state=="south dakota" & county=="bandera" & births__total==46 replace state="texas" if state=="south dakota" & county=="bastrop" & births__total==575 replace state="texas" if state=="south dakota" & county=="baylor" & births__total==147 replace state="texas" if state=="south dakota" & county=="bee" & births__total==457 replace state="texas" if state=="south dakota" & county=="bell" & (births__total==1400 | births__total==551 | births__total==849) replace state="texas" if state=="south dakota" & county=="bexar" & (births__total==11515 | births__total==10064 | births__total==1451) replace state="texas" if state=="south dakota" & county=="blanco" & births__total==74 replace state="texas" if state=="south dakota" & county=="borden" & births__total==12 replace state="texas" if state=="south dakota" & county=="bosque" & births__total==248 replace state="rhode island" if state=="pennsylvania" & county=="total" & births__total==13754 replace state="rhode island" if state=="pennsylvania" & county=="bristol" & (births__total==481 | births__total==212 | births__total==269) replace state="rhode island" if state=="pennsylvania" & county=="kent" & (births__total==1290 | births__total==664 | births__total==359 | births__total==267) replace state="rhode island" if state=="pennsylvania" & county=="newport" & (births__total==1256 | births__total==792 | births__total==464) replace state="rhode island" if state=="pennsylvania" & county=="providence" & (births__total==9872 | births__total==461 | births__total==818 | births__total==178 | births__total==627 | births__total==211 | births__total==168 | births__total==260 | births__total==1319| births__total==4510 | births__total==894 | births__total==426) replace state="rhode island" if state=="pennsylvania" & county=="washington" & (births__total==855 | births__total==302 | births__total==553) replace state="south dakota" if state=="pennsylvania" & county=="total" & births__total==12769 replace state="south dakota" if state=="pennsylvania" & county=="armstrong" & births__total==0 replace state="south dakota" if state=="pennsylvania" & county=="aurora" & births__total==96 replace state="south dakota" if state=="pennsylvania" & county=="beadle" & (births__total==370 | births__total==207 | births__total==163) replace state="south dakota" if state=="pennsylvania" & county=="bennett" & births__total==60 replace state="south dakota" if state=="pennsylvania" & county=="bon homme" & births__total==148 replace state="south dakota" if state=="pennsylvania" & county=="brookings" & births__total==304 replace state="south dakota" if state=="pennsylvania" & county=="brown" & (births__total==590 | births__total==370 | births__total==220) replace state="south dakota" if state=="pennsylvania" & county=="brule" & births__total==120 replace state="south dakota" if state=="pennsylvania" & county=="buffalo" & births__total==64 replace state="south dakota" if state=="pennsylvania" & county=="butte" & births__total==168 replace state="south dakota" if state=="pennsylvania" & county=="campbell" & births__total==91 replace state="south dakota" if state=="pennsylvania" & county=="charles mix" & births__total==224 replace state="south dakota" if state=="pennsylvania" & county=="clark" & births__total==180 replace state="south dakota" if state=="pennsylvania" & county=="clay" & births__total==139 replace state="south dakota" if state=="pennsylvania" & county=="codington" & (births__total==384 | births__total==237 | births__total==147) replace state="south dakota" if state=="pennsylvania" & county=="corson" & births__total==156 replace state="south dakota" if state=="pennsylvania" & county=="custer" & births__total==84 replace state="south dakota" if state=="pennsylvania" & county=="davison" & (births__total==280 | births__total==189 | births__total==91) replace state="south dakota" if state=="pennsylvania" & county=="day" & births__total==299 replace state="south dakota" if state=="pennsylvania" & county=="deuel" & births__total==139 replace state="south dakota" if state=="pennsylvania" & county=="dewey" & births__total==117 replace state="south dakota" if state=="pennsylvania" & county=="douglas" & births__total==139 replace state="south dakota" if state=="pennsylvania" & county=="edmunds" & births__total==143 replace state="south dakota" if state=="pennsylvania" & county=="fall river" & births__total==222 replace state="south dakota" if state=="pennsylvania" & county=="faulk" & births__total==84 replace state="south dakota" if state=="pennsylvania" & county=="grant" & births__total==213 replace state="south dakota" if state=="pennsylvania" & county=="gregory" & births__total==199 replace state="south dakota" if state=="pennsylvania" & county=="haakon" & births__total==83 replace state="south dakota" if state=="pennsylvania" & county=="hamlin" & births__total==119 replace state="south dakota" if state=="pennsylvania" & county=="hand" & births__total==130 replace state="south dakota" if state=="pennsylvania" & county=="hanson" & births__total==110 replace state="south dakota" if state=="pennsylvania" & county=="harding" & births__total==69 replace state="south dakota" if state=="pennsylvania" & county=="hughes" & births__total==149 replace state="south dakota" if state=="pennsylvania" & county=="hutchinson" & births__total==226 replace state="south dakota" if state=="pennsylvania" & county=="hyde" & births__total==53 replace state="south dakota" if state=="pennsylvania" & county=="jackson" & births__total==27 replace state="south dakota" if state=="pennsylvania" & county=="jerauld" & births__total==85 replace state="south dakota" if state=="pennsylvania" & county=="jones" & births__total==50 replace state="south dakota" if state=="pennsylvania" & county=="kingsbury" & births__total==209 replace state="south dakota" if state=="pennsylvania" & county=="lake" & births__total==245 replace state="south dakota" if state=="pennsylvania" & county=="lawrence" & births__total==241 replace state="south dakota" if state=="pennsylvania" & county=="lincoln" & births__total==233 replace state="south dakota" if state=="pennsylvania" & county=="lyman" & births__total==93 replace state="south dakota" if state=="pennsylvania" & county=="mccook" & births__total==172 replace state="south dakota" if state=="pennsylvania" & county=="mcpherson" & births__total==126 replace state="south dakota" if state=="pennsylvania" & county=="marshall" & births__total==148 replace state="south dakota" if state=="pennsylvania" & county=="meade" & births__total==170 replace births_of_residents_of_area__att=328 if state=="tennessee" & county=="washington" & city_balance_total=="balance of county" & births_of_residents_of_area__att==528 replace births__total=674 if state=="tennessee" & county=="washington" & city_balance_total=="balance of county" & births__total==874 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county, ".", "",.) replace county=subinstr(county, "ste ", "st ",.) replace county="desoto" if county=="de soto" & state=="florida" replace county="cochrane" if county=="cochran" & state=="texas" replace county="davie" if county=="davis" & state=="north carolina" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="st louis city" if county=="st louis (city)" & state=="missouri" *correct data entry errors found while checking that county totals sum to state totals replace births__total=76265 if state=="new jersey" & county=="total" & city_balance_total=="total" & births__total==76285 *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+births_of_residents_of_area__at0+births_of_residents_of_area__at1+births_of_residents_of_area__at2 list if temp!=births__total *no errors drop temp *clean and label variables rename page__of_pdf_ page_of_pdf label var page_of_pdf "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h "births by residence: physician (in hospital)" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician (not in hospital)" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" rename births_of_residents_of_area__at2 births_o label var births_o "births by residence: other and not specified" *generate year variable gen year=1944 label var year "year" *check that observations are unique egen tag=tag(state county sub_county) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1944.dta, replace clear ** *1945 data ** *http://nber15.nber.org/vital-stats-books/vsus_1945_2.pdf *table 28 (section b, counties) *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1945.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att=0 if births_of_residents_of_area__att==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0=0 if births_of_residents_of_area__at0==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1=0 if births_of_residents_of_area__at1==. count if births_of_residents_of_area__at2==. replace births_of_residents_of_area__at2=0 if births_of_residents_of_area__at2==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace births_of_residents_of_area__at0=111 if state=="mississippi" & county=="lafayette" & city_balance_total=="total" replace births_of_residents_of_area__at1=192 if state=="mississippi" & county=="lafayette" & city_balance_total=="total" *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county, ".", "",.) replace county=subinstr(county, "ste ", "st ",.) replace county="desoto" if county=="de soto" & state=="florida" replace county="cochrane" if county=="cochran" & state=="texas" replace county="davie" if county=="davis" & state=="north carolina" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="st louis city" if county=="st louis (city)" & state=="missouri" *correct data entry errors found while checking that county totals sum to state totals replace births_of_residents_of_area__att=203 if births_of_residents_of_area__att==200 & state=="mississippi" & county=="alcorn" & city_balance_total=="total" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+births_of_residents_of_area__at0+births_of_residents_of_area__at1+births_of_residents_of_area__at2 list if temp!=births__total *checked .pdf, these are data errors not data entry errors drop temp *clean and label variables rename page__of_pdf_ page_of_pdf label var page_of_pdf "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h "births by residence: physician (in hospital)" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician (not in hospital)" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" rename births_of_residents_of_area__at2 births_o label var births_o "births by residence: other and not specified" *generate year variable gen year=1945 label var year "year" *check that observations are unique egen tag=tag(state county sub_county) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1945.dta, replace clear ** *1946 data ** *http://nber15.nber.org/vital-stats-books/vsus_1946_2.cv.pdf *table 2 (section b, counties) *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1946.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att=0 if births_of_residents_of_area__att==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0=0 if births_of_residents_of_area__at0==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1=0 if births_of_residents_of_area__at1==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace county="oscoda" if county=="osceola" & state=="michigan" & births__total==63 replace city_balance_total="atlanta (part)" if city_balance_total=="atlanta" & county=="fulton" & state=="georgia" & race=="white" & births__total==6020 replace city_balance_total="atlanta (part)" if city_balance_total=="atlanta" & county=="fulton" & state=="georgia" & race=="nonwhite" & births__total==2582 replace city_balance_total="atlanta (total)" if city_balance_total=="atlanta" & county=="fulton" & state=="georgia" & race=="white" & births__total==6951 replace city_balance_total="atlanta (total)" if city_balance_total=="atlanta" & county=="fulton" & state=="georgia" & race=="nonwhite" & births__total==2648 replace city_balance_total="lafayette" if city_balance_total=="total" & county=="lafayette" & state=="louisiana" & race=="white" & births__total==474 replace city_balance_total="lafayette" if city_balance_total=="total" & county=="lafayette" & state=="louisiana" & race=="nonwhite" & births__total==214 replace city_balance_total="rocky mount (part)" if city_balance_total=="total" & county=="edgecombe" & state=="north carolina" & race=="white" & births__total==160 replace city_balance_total="rocky mount (part)" if city_balance_total=="total" & county=="edgecombe" & state=="north carolina" & race=="nonwhite" & births__total==132 replace city_balance_total="balance of county" if city_balance_total=="total" & county=="gaston" & state=="north carolina" & race=="white" & births__total==1686 replace city_balance_total="balance of county" if city_balance_total=="total" & county=="gaston" & state=="north carolina" & race=="nonwhite" & births__total==228 replace city_balance_total="florence" if city_balance_total=="total" & county=="florence" & state=="south carolina" & race=="white" & births__total==365 replace city_balance_total="florence" if city_balance_total=="total" & county=="florence" & state=="south carolina" & race=="nonwhite" & births__total==214 replace city_balance_total="navarro" if city_balance_total=="total" & county=="navarro" & state=="texas" & race=="white" & births__total==339 replace city_balance_total="navarro" if city_balance_total=="total" & county=="navarro" & state=="texas" & race=="nonwhite" & births__total==138 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==4037 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==2778 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==1259 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5272 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==3745 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==1527 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==1628 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="white" & births__total==1317 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="nonwhite" & births__total==311 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county, ".", "",.) replace county=subinstr(county, "ste ", "st ",.) replace county="desoto" if county=="de soto" & state=="florida" replace county="cochrane" if county=="cochran" & state=="texas" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="st louis city" if county=="st louis (city)" & state=="missouri" replace county="chisago" if county=="chicago" & state=="minnesota" replace county="clark" if county=="clarke" & state=="ohio" replace county="cook" if county=="cooke" & state=="illinois" replace county="delaware" if county=="delavare" & state=="oklahoma" replace county="hancock" if county=="hencock" & state=="mississippi" replace county="love" if county=="lowe" & state=="oklahoma" replace county="nobles" if county=="nohles" & state=="minnesota" replace county="orleans" if county=="orleane" & state=="vermont" replace county="otoe" if county=="otos" & state=="nebraska" replace county="penobscot" if county=="penchscot" & state=="maine" replace county="pittsburg" if county=="pittaburg" & state=="oklahoma" replace county="platte" if county=="platts" & state=="nebraska" replace county="sherburne" if county=="shorburne" & state=="minnesota" replace county="texas" if county=="teras" & state=="oklahoma" replace county="wagoner" if county=="wegoner" & state=="oklahoma" replace county="washington" if county=="weshington" & state=="oklahoma" *correct data entry errors found while checking that county totals sum to state totals replace births__total=1878 if births__total==1678 & state=="alabama" & county=="talladega" & city_balance_total=="total" & race=="total" replace births__total=418 if births__total==416 & state=="arkansas" & county=="drew" & city_balance_total=="total" & race=="total" replace births__total=13787 if births__total==13767 & state=="idaho" & county=="total" & city_balance_total=="total" & race=="total" replace births__total=482 if births__total==492 & state=="illinois" & county=="perry" & city_balance_total=="total" & race=="total" replace births__total=478 if births__total==476 & state=="iowa" & county=="dallas" & city_balance_total=="total" & race=="total" replace births__total=265 if births__total==285 & state=="kentucky" & county=="green" & city_balance_total=="total" & race=="total" replace births__total=379 if births__total==378 & state=="kentucky" & county=="rowan" & city_balance_total=="total" & race=="total" replace births__total=862 if births__total==962 & state=="michigan" & county=="gratiot" & city_balance_total=="total" & race=="total" replace births__total=2228 if births__total==2226 & state=="missouri" & county=="greene" & city_balance_total=="total" & race=="total" replace births__total=734 if births__total==754 & state=="new york" & county=="tioga" & city_balance_total=="total" & race=="total" replace births__total=1182 if births__total==1162 & state=="oregon" & county=="klamath" & city_balance_total=="total" & race=="total" replace births__total=887 if births__total==987 & state=="texas" & county=="williamson" & city_balance_total=="total" & race=="total" replace births__total=563 if births__total==1126 & state=="utah" & county=="box elder" & city_balance_total=="total" & race=="total" replace births__total=1296 if births__total==1295 & state=="wisconsin" & county=="eau claire" & city_balance_total=="total" & race=="total" replace births__total=86 if births__total==85 & state=="wisconsin" & county=="florence" & city_balance_total=="total" & race=="total" replace births__total=551 if births__total==651 & state=="wyoming" & county=="sweetwater" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=138 if births_of_residents_of_area__att==139 & state=="florida" & county=="washington" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=123 if births_of_residents_of_area__att==125 & state=="georgia" & county=="mcduffie" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=288 if births_of_residents_of_area__att==298 & state=="georgia" & county=="wayne" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=428 if births_of_residents_of_area__att==426 & state=="iowa" & county=="dallas" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=258 if births_of_residents_of_area__att==259 & state=="iowa" & county=="humboldt" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=365 if births_of_residents_of_area__att==366 & state=="louisiana" & county=="franklin" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=1328 if births_of_residents_of_area__att==1329 & state=="maryland" & county=="washington" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=7105 if births_of_residents_of_area__att==7106 & state=="massachusetts" & county=="norfolk" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=318 if births_of_residents_of_area__att==319 & state=="minnesota" & county=="mille lacs" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=628 if births_of_residents_of_area__att==629 & state=="minnesota" & county=="morrison" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=66 if births_of_residents_of_area__att==86 & state=="montana" & county=="mccone" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=126 if births_of_residents_of_area__att==128 & state=="montana" & county=="phillips" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=175 if births_of_residents_of_area__att==176 & state=="montana" & county=="pondera" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=438 if births_of_residents_of_area__att==439 & state=="nebraska" & county=="madison" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=9448 if births_of_residents_of_area__att==9449 & state=="new jersey" & county=="bergen" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=37 if births_of_residents_of_area__att==57 & state=="new mexico" & county=="catron" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=119 if births_of_residents_of_area__att==118 & state=="new mexico" & county=="union" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=4135 if births_of_residents_of_area__att==4136 & state=="new york" & county=="broome" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=2423 if births_of_residents_of_area__att==2425 & state=="new york" & county=="chautauqua" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=386 if births_of_residents_of_area__att==366 & state=="north dakota" & county=="barnes" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=106 if births_of_residents_of_area__att==108 & state=="oklahoma" & county=="dewey" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=483 if births_of_residents_of_area__att==493 & state=="south carolina" & county=="lexington" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=580 if births_of_residents_of_area__att==590 & state=="south carolina" & county=="marion" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=5738 if births_of_residents_of_area__att==5758 & state=="tennessee" & county=="davidson" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=38 if births_of_residents_of_area__att==39 & state=="texas" & county=="kimble" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=561 if births_of_residents_of_area__att==563 & state=="utah" & county=="box elder" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=289 if births_of_residents_of_area__att==299 & state=="west virginia" & county=="greenbrier" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=338 if births_of_residents_of_area__att==339 & state=="wyoming" & county=="big horn" & city_balance_total=="total" & race=="total" *correct data entry errors found while checking that white+nonwhite=total replace births__total=228 if births__total==226 & state=="mississippi" & county=="yalobusha" & race=="nonwhite" & city_balance_total=="total" replace births__total=105 if births__total==103 & state=="montana" & county=="rosebud" & race=="white" & city_balance_total=="total" replace births__total=368 if births__total==369 & state=="new jersey" & county=="atlantic" & race=="nonwhite" & city_balance_total=="total" replace births_of_residents_of_area__att=56 if births_of_residents_of_area__att==66 & state=="louisiana" & county=="assumption" & race=="nonwhite" & city_balance_total=="total" replace births_of_residents_of_area__att=268 if births_of_residents_of_area__att==269 & state=="south carolina" & county=="colleton" & race=="white" & city_balance_total=="total" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf label var page_of_pdf "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h "births by residence: physician (in hospital)" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician (not in hospital)" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1946 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1946.dta, replace clear ** *1947 data ** *http://nber15.nber.org/vital-stats-books/vsus_1947_2.cv.pdf *table 1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1947.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total==. count if births_attended_by_physician_in_==. replace births_attended_by_physician_in_=0 if births_attended_by_physician_in_==. count if births_attended_by_physician_not==. replace births_attended_by_physician_not=0 if births_attended_by_physician_not==. count if births_attended_by_midwife==. replace births_attended_by_midwife=0 if births_attended_by_midwife==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors append using natality1947_append.dta *one county that was not originally entered replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) replace county="oscoda" if county=="ouceola" & state=="michigan" & births__total==84 replace state="virginia" if state=="vieginia" replace state="virginia" if state=="virginia-cont." replace state="virginia" if state=="independent cities" replace state="virginia" if state=="independent cities-cont." replace state="west virginia" if state=="west vitginia" replace city_balance_total="district 1511, center hill" if state=="georgia" & county=="fulton" & city_balance_total=="total" & race=="white" & births__total==0 replace city_balance_total="district 1511, center hill" if state=="georgia" & county=="fulton" & city_balance_total=="total" & race=="nonwhite" & births__total==0 replace city_balance_total="centralia" if city_balance_total=="centrulia" & county=="clinton" & state=="illinois" replace births__total=834 if state=="massachusetts" & county=="middlesex" & city_balance_total=="arlington (town)" & race=="total" replace births__total=786 if state=="california" & county=="san mateo" & city_balance_total=="san mateo" & race=="total" replace births_attended_by_physician_in_=439 if state=="michigan" & county=="macomb" & city_balance_total=="st. clair shores" & race=="total" replace births__total=382 if state=="illinois" & county=="boone" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=866 if state=="indiana" & county=="bartholomew" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=332 if state=="iowa" & county=="chickasew" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=1832 if state=="michigan" & county=="monroe" & city_balance_total=="total" & race=="total" replace births_attended_by_midwife=72 if state=="arizona" & county=="santa cruz" & city_balance_total=="total" & race=="total" replace births_attended_by_midwife=5 if state=="arizona" & county=="yavapai" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_not=38 if state=="michigan" & county=="kalkaska" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=4891 if state=="michigan" & county=="kent" & city_balance_total=="grend rapids" & race=="total" replace births_attended_by_physician_in_=266 if state=="ohio" & county=="mahoning" & city_balance_total=="struthers" & race=="total" replace births_attended_by_physician_not=40 if state=="oklahoma" & county=="kay" & city_balance_total=="total" & race=="total" replace births__total=820 if state=="illinois" & county=="kane" & city_balance_total=="elgin" & race=="total" & births__total==620 replace births__total=1473 if state=="florida" & city_balance_total=="pensacola" & race=="total" & births__total==1472 replace births__total=738 if state=="louisiana" & county=="avoyelles" & city_balance_total=="total" & births__total==739 replace births__total=1387 if state=="louisiana" & county=="caddo" & city_balance_total=="shreveport" & births__total==1397 replace births__total=86 if state=="michigan" & county=="lake" & city_balance_total=="total" & race=="white" & births__total==96 replace births__total=10539 if state=="ohio" & county=="hamilton" & city_balance_total=="cincinnati" & race=="white" & births__total==10639 replace births__total=1288 if state=="south carolina" & county=="orangeburg" & city_balance_total=="balance" & race=="nonwhite" & births__total==1298 replace births_attended_by_physician_in_=482 if county=="lawrence" & state=="alabama" & city_balance_total=="total" & race=="white" replace births__total=2318 if state=="california" & county=="fresno" & city_balance_total=="fresno" & race=="total" replace births__total=1080 if state=="california" & county=="los angeles" & city_balance_total=="south gate" & race=="total" replace births__total=1528 if state=="california" & county=="monterey" & city_balance_total=="balance" & race=="total" replace births__total=1143 if state=="colorado" & county=="weld" & city_balance_total=="balance" & race=="total" replace births__total=418 if state=="connecticut" & county=="new haven" & city_balance_total=="ansonia" & race=="total" replace births__total=582 if state=="idaho" & county=="ada" & city_balance_total=="balance" & race=="total" replace births__total=865 if state=="illinois" & county=="champaign" & city_balance_total=="champaign" & race=="total" replace births__total=914 if state=="illinois" & county=="champaign" & city_balance_total=="balance" & race=="total" replace births__total=284 if state=="illinois" & county=="fulton" & city_balance_total=="canton" & race=="total" replace births__total=816 if state=="michigan" & county=="monroe" & city_balance_total=="monroe" & race=="total" replace births_attended_by_physician_in_=803 if state=="michigan" & county=="monroe" & city_balance_total=="monroe" & race=="total" replace births__total=275 if state=="new jersey" & county=="passaic" & city_balance_total=="hawthorne" & race=="total" replace births__total=1739 if state=="new york" & county=="saratoga" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=468 if state=="california" & county=="los angeles" & city_balance_total=="total" & race=="nonwhite" & births_attended_by_physician_in_==458 replace births_attended_by_physician_in_=38 if state=="florida" & county=="polk" & city_balance_total=="lakeland" & race=="nonwhite" & births_attended_by_physician_in_==39 replace births_attended_by_physician_in_=18 if state=="georgia" & county=="colquitt" & city_balance_total=="total" & race=="nonwhite" & births_attended_by_physician_in_==16 replace births_attended_by_physician_in_=486 if state=="georgia" & county=="colquitt" & city_balance_total=="balance" & race=="total" & births_attended_by_physician_in_==466 replace births_attended_by_physician_in_=628 if state=="illinois" & county=="st. clair" & city_balance_total=="total" & race=="nonwhite" & births_attended_by_physician_in_==629 replace births_attended_by_physician_in_=853 if state=="louisiana" & county=="st. landry" & city_balance_total=="total" & race=="white" & births_attended_by_physician_in_==653 replace births_attended_by_physician_in_=218 if state=="mississippi" & county=="coahoma" & city_balance_total=="clarksdele" & race=="total" & births_attended_by_physician_in_==219 replace births_attended_by_physician_in_=189 if state=="texas" & county=="anderson" & city_balance_total=="palestine" & race=="white" & births_attended_by_physician_in_==129 replace births_attended_by_physician_in_=1086 if state=="california" & county=="los angeles" & city_balance_total=="compton" & race=="total" & births_attended_by_physician_in_==1096 replace births_attended_by_physician_in_=502 if state=="california" & county=="los angeles" & city_balance_total=="san gabriel" & race=="total" & births_attended_by_physician_in_==602 replace births_attended_by_physician_in_=2895 if state=="california" & county=="san mateo" & city_balance_total=="balance" & race=="total" & births_attended_by_physician_in_==2695 replace births_attended_by_physician_in_=611 if state=="illinois" & county=="jefferson" & city_balance_total=="total" & race=="total" & births_attended_by_physician_in_==811 replace births_attended_by_physician_in_=1148 if state=="iowa" & county=="cerro gordo" & city_balance_total=="total" & race=="total" & births_attended_by_physician_in_==1149 replace births_attended_by_physician_in_=491 if state=="massachusetts" & county=="middlesex" & city_balance_total=="wakefield (town)" & race=="total" & births_attended_by_physician_in_==493 replace births_attended_by_physician_in_=889 if state=="michigan" & county=="kalamazoo" & city_balance_total=="balance" & race=="total" & births_attended_by_physician_in_==899 replace births_attended_by_physician_in_=4881 if state=="michigan" & county=="kent" & city_balance_total=="grend rapids" & race=="total" & births_attended_by_physician_in_==4891 replace births_attended_by_physician_in_=498 if state=="michigan" & county=="midlend" & city_balance_total=="balance" & race=="total" & births_attended_by_physician_in_==499 replace births_attended_by_physician_in_=1888 if state=="michigan" & county=="muskegon" & city_balance_total=="muskegon" & race=="total" & births_attended_by_physician_in_==1968 replace births_attended_by_physician_in_=1584 if state=="minnesota" & county=="st. loufa" & city_balance_total=="balance" & race=="total" & births_attended_by_physician_in_==1684 replace births_attended_by_physician_in_=158 if state=="nebraska" & county=="hall" & city_balance_total=="balance" & race=="total" & births_attended_by_physician_in_==159 replace births_attended_by_physician_in_=319 if state=="south dakota" & county=="davison" & city_balance_total=="mitohell" & race=="total" & births_attended_by_physician_in_==318 replace births_attended_by_physician_in_=1468 if state=="washington" & county=="snohomish" & city_balance_total=="balance" & race=="total" & births_attended_by_physician_in_==1469 replace city_balance_total="balance" if race=="nonwhite" & county=="pima" & births__total==147 replace city_balance_total="balance" if race=="white" & county=="pima" & births__total==403 replace city_balance_total="el centro" if race=="nonwhite" & county=="imperial" & births__total==49 replace city_balance_total="el centro" if race=="white" & county=="imperial" & births__total==337 replace city_balance_total="balance" if race=="nonwhite" & county=="imperial" & births__total==110 replace city_balance_total="balance" if race=="white" & county=="imperial" & births__total==1027 replace city_balance_total="balance" if race=="nonwhite" & county=="los angeles" & births__total==524 replace city_balance_total="balance" if race=="white" & county=="los angeles" & births__total==17919 replace city_balance_total="los angeles" if race=="nonwhite" & county=="los angeles" & births__total==4971 replace city_balance_total="los angeles" if race=="white" & county=="los angeles" & births__total==42064 replace city_balance_total="moultric" if race=="nonwhite" & county=="colquitt" & births__total==133 replace city_balance_total="moultric" if race=="white" & county=="colquitt" & births__total==261 replace city_balance_total="balance" if race=="nonwhite" & county=="colquitt" & births__total==120 replace city_balance_total="balance" if race=="white" & county=="colquitt" & births__total==563 replace city_balance_total="atlanta (part)" if race=="white" & county=="de kalb" & births__total==859 replace city_balance_total="atlanta (part)" if race=="nonwhite" & county=="de kalb" & births__total==82 replace city_balance_total="decatur" if race=="white" & county=="de kalb" & births__total==464 replace city_balance_total="decatur" if race=="nonwhite" & county=="de kalb" & births__total==106 replace city_balance_total="balance" if race=="white" & county=="de kalb" & births__total==1355 replace city_balance_total="balance" if race=="nonwhite" & county=="de kalb" & births__total==251 replace city_balance_total="albany" if race=="white" & county=="dougherty" & births__total==491 replace city_balance_total="albany" if race=="nonwhite" & county=="dougherty" & births__total==338 replace city_balance_total="balance" if race=="white" & county=="dougherty" & births__total==90 replace city_balance_total="balance" if race=="nonwhite" & county=="dougherty" & births__total==111 replace city_balance_total="rome" if race=="white" & county=="floyd" & births__total==801 replace city_balance_total="rome" if race=="nonwhite" & county=="floyd" & births__total==172 replace city_balance_total="balance" if race=="white" & county=="floyd" & births__total==692 replace city_balance_total="balance" if race=="nonwhite" & county=="floyd" & births__total==49 replace city_balance_total="atlanta (part)" if race=="white" & county=="fulton" & births__total==5204 replace city_balance_total="atlanta (part)" if race=="nonwhite" & county=="fulton" & births__total==3020 replace city_balance_total="atlanta (total)" if race=="white" & county=="fulton" & births__total==6063 replace city_balance_total="atlanta (total)" if race=="nonwhite" & county=="fulton" & births__total==3102 replace city_balance_total="east point" if race=="white" & county=="fulton" & births__total==434 replace city_balance_total="east point" if race=="nonwhite" & county=="fulton" & births__total==75 replace city_balance_total="balance" if race=="white" & county=="fulton" & births__total==3379 replace city_balance_total="balance" if race=="nonwhite" & county=="fulton" & births__total==639 replace city_balance_total="brunswick" if race=="white" & county=="glynn" & births__total==415 replace city_balance_total="brunswick" if race=="nonwhite" & county=="glynn" & births__total==217 replace city_balance_total="balance" if race=="white" & county=="glynn" & births__total==158 replace city_balance_total="balance" if race=="nonwhite" & county=="glynn" & births__total==50 replace city_balance_total="gainesville" if race=="white" & county=="hall" & births__total==302 replace city_balance_total="gainesville" if race=="nonwhite" & county=="hall" & births__total==95 replace city_balance_total="valdosta" if race=="white" & county=="lowndos" & births__total==369 replace city_balance_total="valdosta" if race=="nonwhite" & county=="lowndos" & births__total==217 replace city_balance_total="balance" if race=="white" & county=="lowndos" & births__total==254 replace city_balance_total="balance" if race=="nonwhite" & county=="lowndos" & births__total==202 replace city_balance_total="elgin (part)" if race=="total" & county=="kane" & births__total==820 replace city_balance_total="elgin (total)" if race=="total" & county=="kane" & births__total==832 replace city_balance_total="centralia (part)" if race=="total" & county=="marion" & births__total==323 replace city_balance_total="centralia (part)" if race=="total" & county=="clinton" & births__total==6 replace city_balance_total="centralia (total)" if race=="total" & county=="marion" & births__total==329 replace city_balance_total="monroe" if race=="white" & county=="ouachita" & births__total==589 replace city_balance_total="monroe" if race=="nonwhite" & county=="ouachita" & births__total==378 replace city_balance_total="balance" if race=="white" & county=="ouachita" & births__total==615 replace city_balance_total="balance" if race=="nonwhite" & county=="ouachita" & births__total==303 replace city_balance_total="alexandria" if race=="white" & county=="rapides" & births__total==663 replace city_balance_total="alexandria" if race=="nonwhite" & county=="rapides" & births__total==424 replace city_balance_total="balance" if race=="white" & county=="rapides" & births__total==1041 replace city_balance_total="balance" if race=="nonwhite" & county=="rapides" & births__total==405 replace city_balance_total="st. cloud (part)" if race=="total" & county=="sherburne" & births__total==37 replace city_balance_total="st. cloud (part)" if race=="total" & county=="stearns" & births__total==727 replace city_balance_total="st. cloud (part)" if race=="total" & county=="benton" & births__total==80 replace city_balance_total="st. cloud (total)" if race=="total" & county=="stearns" & births__total==844 replace city_balance_total="new rochelle" if race=="white" & county=="westchester" & births__total==1070 replace city_balance_total="new rochelle" if race=="nonwhite" & county=="westchester" & births__total==170 replace city_balance_total="ossining" if race=="white" & county=="westchester" & births__total==283 replace city_balance_total="ossining" if race=="nonwhite" & county=="westchester" & births__total==28 replace city_balance_total="galveston" if race=="white" & county=="galveston" & births__total==1524 replace city_balance_total="galveston" if race=="nonwhite" & county=="galveston" & births__total==503 replace city_balance_total="denison" if race=="white" & county=="grayson" & births__total==501 replace city_balance_total="denison" if race=="nonwhite" & county=="grayson" & births__total==73 replace city_balance_total="sherman" if race=="white" & county=="grayson" & births__total==450 replace city_balance_total="sherman" if race=="nonwhite" & county=="grayson" & births__total==48 replace city_balance_total="longview" if race=="white" & county=="gregg" & births__total==521 replace city_balance_total="longview" if race=="nonwhite" & county=="gregg" & births__total==126 replace city_balance_total="salinas" if race=="white" & county=="monterey" & births__total==1042 replace city_balance_total="salinas" if race=="nonwhite" & county=="monterey" & births__total==67 replace city_balance_total="balance" if race=="white" & county=="sacramento" & births__total==2838 replace city_balance_total="balance" if race=="nonwhite" & county=="sacramento" & births__total==207 replace city_balance_total="balance" if race=="white" & county=="san joaquin" & births__total==2713 replace city_balance_total="balance" if race=="nonwhite" & county=="san joaquin" & births__total==257 replace city_balance_total="pleasantville" if race=="white" & county=="atlantic" & births__total==253 & city_balance_total=="do" replace city_balance_total="pleasantville" if race=="nonwhite" & county=="atlantic" & births__total==47 & city_balance_total=="do" replace city_balance_total="hackensack" if race=="white" & county=="bergen" & births__total==535 & city_balance_total=="do" replace city_balance_total="hackensack" if race=="nonwhite" & county=="bergen" & births__total==63 & city_balance_total=="do" replace city_balance_total="greenville" if race=="white" & county=="pitt" & births__total==217 replace city_balance_total="greenville" if race=="nonwhite" & county=="pitt" & births__total==157 replace city_balance_total="balance" if city_balance_total=="balence" replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==4442 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==3000 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==1442 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5771 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==3985 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==1786 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st. ","st ",.) replace county=subinstr(county,"ste. ", "st ",.) replace county="monroe" if county=="monros" & state=="alabama" replace county="lassen" if county=="lasson" & state=="california" replace county="mendocino" if county=="mandocino" & state=="california" replace county="orange" if county=="orango" & state=="california" replace county="san francisco" if county=="san francisco, coextensive with san francisco" & state=="california" replace county="san luis obispo" if county=="san luis obiapo" & state=="california" replace county="siskiyou" if county=="siekiyou" & state=="california" replace county="stanislaus" if county=="stenislaus" & state=="california" replace county="baca" if county=="beca" & state=="colorado" replace county="denver" if county=="denver, coertensive with denver" & state=="colorado" replace county="hinsdale" if county=="hinodale" & state=="colorado" replace county="yuma" if county=="yume" & state=="colorado" replace county="sussex" if county=="susser" & state=="delaware" replace county="citrus" if county=="citrue" & state=="florida" replace county="desoto" if county=="de soto" & state=="florida" replace county="escambia" if county=="escembia" & state=="florida" replace county="gadsden" if county=="gadaden" & state=="florida" replace county="gilchrist" if county=="gilchriet" & state=="florida" replace county="santa rosa" if county=="santa roan" & state=="florida" replace county="sarasota" if county=="sarascta" & state=="florida" replace county="dawson" if county=="davson" & state=="georgia" replace county="jenkins" if county=="jenkine" & state=="georgia" replace county="lowndes" if county=="lowndos" & state=="georgia" replace county="lumpkin" if county=="lampkin" & state=="georgia" replace county="meriwether" if county=="merivether" & state=="georgia" replace county="putnam" if county=="putnem" & state=="georgia" replace county="franklin" if county=="frenklin" & state=="idaho" replace county="cass" if county=="case" & state=="illinois" replace county="de witt" if county=="de hitt" & state=="illinois" replace county="du page" if county=="du pags" & state=="illinois" replace county="gallatin" if county=="gallstin" & state=="illinois" replace county="lawrence" if county=="lewrence" & state=="illinois" replace county="mclean" if county=="mcleen" & state=="illinois" replace county="grant" if county=="grent" & state=="indiana" replace county="buchanan" if county=="buchanen" & state=="iowa" replace county="chickasaw" if county=="chickasew" & state=="iowa" replace county="hamilton" if county=="hemilton" & state=="iowa" replace county="jasper" if county=="jeaper" & state=="iowa" replace county="marion" if county=="marison" & state=="iowa" replace county="osceola" if county=="oscaola" & state=="iowa" replace county="wabaunsee" if county=="webaunsee" & state=="kansas" replace county="marshall" if county=="marshell" & state=="kentucky" replace county="oldham" if county=="oldhem" & state=="kentucky" replace county="owsley" if county=="owaley" & state=="kentucky" replace county="claiborne" if county=="claihorne" & state=="louisiana" replace county="orleans" if county=="orleans, coertensive with new orleans" & state=="louisiana" replace county="st helena" if county=="st helema" & state=="louisiana" replace county="webster" if county=="webater" & state=="louisiana" replace county="west feliciana" if county=="west feliciane" & state=="louisiana" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="ingham" if county=="inghem" & state=="michigan" replace county="manistee" if county=="menistee" & state=="michigan" replace county="marquette" if county=="merquette" & state=="michigan" replace county="midland" if county=="midlend" & state=="michigan" replace county="missaukee" if county=="missankee" & state=="michigan" replace county="newaygo" if county=="newaygb" & state=="michigan" replace county="ontonagon" if county=="ontanagon" & state=="michigan" replace county="osceola" if county=="ouceola" & state=="michigan" replace county="chisago" if county=="chicago" & state=="minnesota" replace county="isanti" if county=="isenti" & state=="minnesota" replace county="jackson" if county=="jeckson" & state=="minnesota" replace county="pipestone" if county=="pipeotone" & state=="minnesota" replace county="st louis" if county=="st loufa" & state=="minnesota" replace county="traverse" if county=="traveroe" & state=="minnesota" replace county="wabasha" if county=="weheoha" & state=="minnesota" replace county="wadena" if county=="wedena" & state=="minnesota" replace county="waseca" if county=="weseca" & state=="minnesota" replace county="noxubee" if county=="norubee" & state=="mississippi" replace county="st louis city" if county=="st louis (city)" & state=="missouri" replace county="deer lodge" if county=="deer lode" & state=="montana" replace county="jefferson" if county=="kefferson" & state=="montana" replace county="holt" if county=="halt" & state=="nebraska" replace county="knox" if county=="knor" & state=="nebraska" replace county="morrill" if county=="norrill" & state=="nebraska" replace county="pawnee" if county=="paunee" & state=="nebraska" replace county="webster" if county=="webater" & state=="nebraska" replace county="lea" if county=="loa" & state=="new mexico" replace county="chenango" if county=="chemango" & state=="new york" replace county="forsyth" if county=="foreyth" & state=="north carolina" replace county="gates" if county=="getes" & state=="north carolina" replace county="greene" if county=="greens" & state=="north carolina" replace county="halifax" if county=="halifar" & state=="north carolina" replace county="hyde" if county=="hydo" & state=="north carolina" replace county="onslow" if county=="onslov" & state=="north carolina" replace county="randolph" if county=="rendolph" & state=="north carolina" replace county="swain" if county=="svain" & state=="north carolina" replace county="transylvania" if county=="trannylvania" & state=="north carolina" replace county="cavalier" if county=="cavaliver" & state=="north dakota" replace county="bottineau" if county=="bottineeu" & state=="north dakota" replace county="adams" if county=="adame" & state=="ohio" replace county="clark" if county=="clarke" & state=="ohio" replace county="ross" if county=="roso" & state=="ohio" replace county="mcintosh" if county=="mcintoch" & state=="oklahoma" replace county="pushmataha" if county=="puahmataha" & state=="oklahoma" replace county="linn" if county=="lim" & state=="oregon" replace county="marion" if county=="narion" & state=="oregon" replace county="multnomah" if county=="multnomeh" & state=="oregon" replace county="sherman" if county=="shermen" & state=="oregon" replace county="umatilla" if county=="umatille" & state=="oregon" replace county="wheeler" if county=="whecler" & state=="oregon" replace county="adams" if county=="adame" & state=="pennsylvania" replace county="chester" if county=="chenter" & state=="pennsylvania" replace county="greene" if county=="greens" & state=="pennsylvania" replace county="indiana" if county=="indiena" & state=="pennsylvania" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="lancaster" if county=="lanceater" & state=="south carolina" replace county="lexington" if county=="lerington" & state=="south carolina" replace county="mccormick" if county=="mccomaick" & state=="south carolina" replace county="oconee" if county=="oconoe" & state=="south carolina" replace county="haakon" if county=="heakon" & state=="south dakota" replace county="lyman" if county=="lymen" & state=="south dakota" replace county="mcpherson" if county=="mcphernon" & state=="south dakota" replace county="mellette" if county=="melletts" & state=="south dakota" replace county="minnehaha" if county=="minnehahe" & state=="south dakota" replace county="decatur" if county=="decetur" & state=="tennessee" replace county="fentress" if county=="fentreas" & state=="tennessee" replace county="hamblen" if county=="hemblen" & state=="tennessee" replace county="sumner" if county=="summer" & state=="tennessee" replace county="cass" if county=="cano" & state=="texas" replace county="cochrane" if county=="cochran" & state=="texas" replace county="gillespie" if county=="gilleapie" & state=="texas" replace county="grimes" if county=="crimes" & state=="texas" replace county="hutchinson" if county=="butchinson" & state=="texas" replace county="kendall" if county=="kendell" & state=="texas" replace county="nacogdoches" if county=="nacogdochee" & state=="texas" replace county="rains" if county=="raina" & state=="texas" replace county="randall" if county=="randell" & state=="texas" replace county="roberts" if county=="roberto" & state=="texas" replace county="pleasants" if county=="pleasante" & state=="west virginia" replace county="raleigh" if county=="releigh" & state=="west virginia" replace county="bayfield" if county=="beyfield" & state=="wisconsin" replace county="eau claire" if county=="eau clairs" & state=="wisconsin" replace county="albany" if county=="albeny" & state=="wyoming" replace county="campbell" if county=="compbell" & state=="wyoming" replace county="laramie" if county=="laremie" & state=="wyoming" *correct data entry errors found while checking that county totals sum to state totals replace births__total=1367 if births__total==1357 & state=="alabama" & county=="marshall" & city_balance_total=="total" & race=="total" replace births__total=8468 if births__total==8469 & state=="california" & county=="contra costa" & city_balance_total=="total" & race=="total" replace births__total=780 if births__total==790 & state=="colorado" & county=="adams" & city_balance_total=="total" & race=="total" replace births__total=168 if births__total==166 & state=="georgia" & county=="charlton" & city_balance_total=="total" & race=="total" replace births__total=261 if births__total==251 & state=="kentucky" & county=="larue" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=458 if births_attended_by_physician_in_==459 & state=="illinois" & county=="shelby" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=281 if births_attended_by_physician_in_==291 & state=="iowa" & county=="shelby" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=187 if births_attended_by_physician_in_==197 & state=="iowa" & county=="van buren" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=81 if births_attended_by_physician_in_==91 & state=="kentucky" & county=="carlisle" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=138 if births_attended_by_physician_in_==139 & state=="kentucky" & county=="crittenden" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=228 if births_attended_by_physician_in_==229 & state=="louisiana" & county=="bienville" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=143 if births_attended_by_physician_in_==145 & state=="nebraska" & county=="johnson" & city_balance_total=="total" & race=="total" replace births_attended_by_physician_in_=500 if births_attended_by_physician_in_==600 & state=="tennessee" & county=="hamblen" & city_balance_total=="total" & race=="total" *correct data entry errors found while checking that white+nonwhite=total replace births__total=108 if births__total==106 & state=="georgia" & county=="charlton" & race=="white" & city_balance_total=="total" replace births_attended_by_physician_in_=68 if births_attended_by_physician_in_==69 & state=="louisiana" & county=="bienville" & race=="nonwhite" & city_balance_total=="total" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf label var page_of_pdf "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_attended_by_physician_in_ births_h_p label var births_h "births by residence: physician in hospital" rename births_attended_by_physician_not births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_attended_by_midwife births_m label var births_m "births by residence: midwife" *generate year variable gen year=1947 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1947.dta, replace clear ** *1948 data ** *http://nber15.nber.org/vital-stats-books/vsus_1948_2.cv.pdf *table 1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1948.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace county="oscoda" if state=="michigan" & county=="osceola" & births__total==64 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==4234 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==2645 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==1589 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5415 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==3425 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==1990 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==1816 replace births__total=1241 if state=="ohio" & county=="lawrence" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=1102 if state=="ohio" & county=="lawrence" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=125 if state=="ohio" & county=="lawrence" & city_balance_total=="total" & race=="total" replace births__total=364 if state=="ohio" & county=="lawrence" & city_balance_total=="ironton" & race=="total" replace births_of_residents_of_area__att=352 if state=="ohio" & county=="lawrence" & city_balance_total=="ironton" & race=="total" replace births_of_residents_of_area__at0=12 if state=="ohio" & county=="lawrence" & city_balance_total=="ironton" & race=="total" replace births__total=877 if state=="ohio" & county=="lawrence" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__att=750 if state=="ohio" & county=="lawrence" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__at0=113 if state=="ohio" & county=="lawrence" & city_balance_total=="balance of county" & race=="total" replace births__total=1508 if state=="ohio" & county=="licking" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=1442 if state=="ohio" & county=="licking" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=66 if state=="ohio" & county=="licking" & city_balance_total=="total" & race=="total" replace births__total=857 if state=="ohio" & county=="licking" & city_balance_total=="newark" & race=="total" replace births_of_residents_of_area__att=840 if state=="ohio" & county=="licking" & city_balance_total=="newark" & race=="total" replace births_of_residents_of_area__at0=17 if state=="ohio" & county=="licking" & city_balance_total=="newark" & race=="total" replace births__total=651 if state=="ohio" & county=="licking" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__att=602 if state=="ohio" & county=="licking" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__at0=49 if state=="ohio" & county=="licking" & city_balance_total=="balance of county" & race=="total" replace births__total=680 if state=="ohio" & county=="logan" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=640 if state=="ohio" & county=="logan" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=40 if state=="ohio" & county=="logan" & city_balance_total=="total" & race=="total" replace births__total=3691 if state=="ohio" & county=="lorain" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=3653 if state=="ohio" & county=="lorain" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=35 if state=="ohio" & county=="lorain" & city_balance_total=="total" & race=="total" replace births__total=816 if state=="ohio" & county=="lorain" & city_balance_total=="elyria" & race=="total" replace births_of_residents_of_area__att=807 if state=="ohio" & county=="lorain" & city_balance_total=="elyria" & race=="total" replace births_of_residents_of_area__at0=7 if state=="ohio" & county=="lorain" & city_balance_total=="elyria" & race=="total" replace births__total=1432 if state=="ohio" & county=="lorain" & city_balance_total=="lorain" & race=="total" replace births_of_residents_of_area__att=1422 if state=="ohio" & county=="lorain" & city_balance_total=="lorain" & race=="total" replace births_of_residents_of_area__at0=10 if state=="ohio" & county=="lorain" & city_balance_total=="lorain" & race=="total" replace births__total=1443 if state=="ohio" & county=="lorain" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__att=1424 if state=="ohio" & county=="lorain" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__at0=18 if state=="ohio" & county=="lorain" & city_balance_total=="balance of county" & race=="total" replace births__total=9250 if state=="ohio" & county=="lucas" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=8998 if state=="ohio" & county=="lucas" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=239 if state=="ohio" & county=="lucas" & city_balance_total=="total" & race=="total" replace births__total=8027 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="total" replace births_of_residents_of_area__att=7865 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="total" replace births_of_residents_of_area__at0=154 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="total" replace births_of_residents_of_area__at1=2 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="total" replace births__total=7325 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="white" replace births_of_residents_of_area__att=7199 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="white" replace births_of_residents_of_area__at0=118 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="white" replace births_of_residents_of_area__at1=2 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="white" replace births__total=702 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="nonwhite" replace births_of_residents_of_area__att=666 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="nonwhite" replace births_of_residents_of_area__at0=36 if state=="ohio" & county=="lucas" & city_balance_total=="toledo" & race=="nonwhite" replace births__total=1223 if state=="ohio" & county=="lucas" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__att=1133 if state=="ohio" & county=="lucas" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__at0=85 if state=="ohio" & county=="lucas" & city_balance_total=="balance of county" & race=="total" replace births__total=549 if state=="ohio" & county=="madison" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=343 if state=="ohio" & county=="madison" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=206 if state=="ohio" & county=="madison" & city_balance_total=="total" & race=="total" replace births__total=5894 if state=="ohio" & county=="mahoning" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=5780 if state=="ohio" & county=="mahoning" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=106 if state=="ohio" & county=="mahoning" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=4255 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (part)" & race=="total" replace births_of_residents_of_area__at0=67 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (part)" & race=="total" replace births__total=3772 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (part)" & race=="white" replace births_of_residents_of_area__att=3756 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (part)" & race=="white" replace births_of_residents_of_area__at1=2 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (part)" & race=="white" replace births_of_residents_of_area__att=499 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (part)" & race=="nonwhite" replace births_of_residents_of_area__at0=55 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (part)" & race=="nonwhite" replace births_of_residents_of_area__att=4272 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (total)" & race=="total" replace births_of_residents_of_area__at0=67 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (total)" & race=="total" replace births__total=3789 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (total)" & race=="white" replace births_of_residents_of_area__att=3773 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (total)" & race=="white" replace births_of_residents_of_area__at0=12 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (total)" & race=="white" replace births_of_residents_of_area__at1=2 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (total)" & race=="white" replace births__total=555 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (total)" & race=="nonwhite" replace births_of_residents_of_area__att=499 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (total)" & race=="nonwhite" replace births_of_residents_of_area__at0=55 if state=="ohio" & county=="mahoning" & city_balance_total=="youngstown (total)" & race=="nonwhite" replace births__total=271 if state=="ohio" & county=="mahoning" & city_balance_total=="campbell" & race=="total" replace births_of_residents_of_area__att=261 if state=="ohio" & county=="mahoning" & city_balance_total=="campbell" & race=="total" replace births_of_residents_of_area__at0=8 if state=="ohio" & county=="mahoning" & city_balance_total=="campbell" & race=="total" replace births__total=239 if state=="ohio" & county=="mahoning" & city_balance_total=="campbell" & race=="white" replace births_of_residents_of_area__att=235 if state=="ohio" & county=="mahoning" & city_balance_total=="campbell" & race=="white" replace births__total=32 if state=="ohio" & county=="mahoning" & city_balance_total=="campbell" & race=="nonwhite" replace births_of_residents_of_area__at0=5 if state=="ohio" & county=="mahoning" & city_balance_total=="campbell" & race=="nonwhite" replace births__total=280 if state=="ohio" & county=="mahoning" & city_balance_total=="struthers" & race=="total" replace births_of_residents_of_area__at0=5 if state=="ohio" & county=="mahoning" & city_balance_total=="struthers" & race=="total" replace births__total=1016 if state=="ohio" & county=="mahoning" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__att=989 if state=="ohio" & county=="mahoning" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__at0=26 if state=="ohio" & county=="mahoning" & city_balance_total=="balance of county" & race=="total" replace births__total=1213 if state=="ohio" & county=="marion" & city_balance_total=="total" & race=="total" replace births__total=879 if state=="ohio" & county=="marion" & city_balance_total=="marion" & race=="total" replace births_of_residents_of_area__att=271 if state=="ohio" & county=="meigs" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=186 if state=="ohio" & county=="meigs" & city_balance_total=="total" & race=="total" replace births__total=720 if state=="ohio" & county=="mercer" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=710 if state=="ohio" & county=="mercer" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=8 if state=="ohio" & county=="mercer" & city_balance_total=="total" & race=="total" replace births__total=1530 if state=="ohio" & county=="miami" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=149 if state=="ohio" & county=="monroe" & city_balance_total=="total" & race=="total" replace births__total=10606 if state=="ohio" & county=="montgomery" & city_balance_total=="total" & race=="total" replace births__total=9009 if state=="ohio" & county=="montgomery" & city_balance_total=="dayton" & race=="total" replace births_of_residents_of_area__att=8901 if state=="ohio" & county=="montgomery" & city_balance_total=="dayton" & race=="total" replace births__total=7954 if state=="ohio" & county=="montgomery" & city_balance_total=="dayton" & race=="white" replace births_of_residents_of_area__att=7896 if state=="ohio" & county=="montgomery" & city_balance_total=="dayton" & race=="white" replace births__total=1055 if state=="ohio" & county=="montgomery" & city_balance_total=="dayton" & race=="nonwhite" replace births__total=244 if state=="ohio" & county=="morgan" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=165 if state=="ohio" & county=="morgan" & city_balance_total=="total" & race=="total" replace births__total=309 if state=="ohio" & county=="morrow" & city_balance_total=="total" & race=="total" replace births__total=1053 if state=="ohio" & county=="muskingum" & city_balance_total=="zanesville" & race=="total" replace births__total=693 if state=="ohio" & county=="muskingum" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__at0=229 if state=="ohio" & county=="pike" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=74 if state=="ohio" & county=="richland" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=1210 if state=="ohio" & county=="richland" & city_balance_total=="mansfield" & race=="total" replace births_of_residents_of_area__at1=55 if state=="ohio" & county=="richland" & city_balance_total=="balance of county" & race=="total" replace births__total=531 if state=="ohio" & county=="ross" & city_balance_total=="chillicothe" & race=="total" replace births_of_residents_of_area__att=416 if state=="ohio" & county=="ross" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__att=35 if state=="ohio" & county=="sandusky" & city_balance_total=="total" & race=="total" replace births__total=910 if state=="south carolina" & county=="berkeley" & city_balance_total=="total" & race=="total" replace births__total=675 if state=="south carolina" & county=="berkeley" & city_balance_total=="total" & race=="nonwhite" replace births__total=469 if state=="south carolina" & county=="calhoun" & city_balance_total=="total" & race=="total" replace births__total=80 if state=="south carolina" & county=="calhoun" & city_balance_total=="total" & race=="white" replace births__total=389 if state=="south carolina" & county=="calhoun" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__at0=735 if state=="south carolina" & county=="charleston" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=60 if state=="south carolina" & county=="charleston" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__at1=40 if state=="south carolina" & county=="charleston" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__att=657 if state=="south carolina" & county=="charleston" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__at0=675 if state=="south carolina" & county=="charleston" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__att=1681 if state=="south carolina" & county=="charleston" & city_balance_total=="charleston" & race=="total" replace births_of_residents_of_area__at0=662 if state=="south carolina" & county=="charleston" & city_balance_total=="charleston" & race=="total" replace births_of_residents_of_area__at1=132 if state=="south carolina" & county=="charleston" & city_balance_total=="charleston" & race=="total" replace births__total=1447 if state=="south carolina" & county=="charleston" & city_balance_total=="charleston" & race=="white" replace births_of_residents_of_area__at0=28 if state=="south carolina" & county=="charleston" & city_balance_total=="charleston" & race=="white" replace births__total=1028 if state=="south carolina" & county=="charleston" & city_balance_total=="charleston" & race=="nonwhite" replace births_of_residents_of_area__at0=634 if state=="south carolina" & county=="charleston" & city_balance_total=="charleston" & race=="nonwhite" replace births_of_residents_of_area__at1=130 if state=="south carolina" & county=="charleston" & city_balance_total=="charleston" & race=="nonwhite" replace births__total=2380 if state=="south carolina" & county=="charleston" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__att=1253 if state=="south carolina" & county=="charleston" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__at0=73 if state=="south carolina" & county=="charleston" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__at1=1053 if state=="south carolina" & county=="charleston" & city_balance_total=="balance of county" & race=="total" replace births__total=930 if state=="south carolina" & county=="charleston" & city_balance_total=="balance of county" & race=="white" replace births_of_residents_of_area__att=860 if state=="south carolina" & county=="charleston" & city_balance_total=="balance of county" & race=="white" replace births_of_residents_of_area__at0=32 if state=="south carolina" & county=="charleston" & city_balance_total=="balance of county" & race=="white" replace births_of_residents_of_area__at1=38 if state=="south carolina" & county=="charleston" & city_balance_total=="balance of county" & race=="white" replace births__total=1450 if state=="south carolina" & county=="charleston" & city_balance_total=="balance of county" & race=="nonwhite" replace births_of_residents_of_area__att=393 if state=="south carolina" & county=="charleston" & city_balance_total=="balance of county" & race=="nonwhite" replace births__total=851 if state=="south carolina" & county=="cherokee" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=610 if state=="south carolina" & county=="cherokee" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=208 if state=="south carolina" & county=="cherokee" & city_balance_total=="total" & race=="total" replace births__total=627 if state=="south carolina" & county=="cherokee" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__att=492 if state=="south carolina" & county=="cherokee" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__at0=130 if state=="south carolina" & county=="cherokee" & city_balance_total=="total" & race=="white" replace births__total=224 if state=="south carolina" & county=="cherokee" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__att=118 if state=="south carolina" & county=="cherokee" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__at0=78 if state=="south carolina" & county=="cherokee" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__at1=28 if state=="south carolina" & county=="cherokee" & city_balance_total=="total" & race=="nonwhite" replace births__total=831 if state=="south carolina" & county=="chester" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=393 if state=="south carolina" & county=="chester" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=115 if state=="south carolina" & county=="chester" & city_balance_total=="total" & race=="total" replace births__total=411 if state=="south carolina" & county=="chester" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__att=365 if state=="south carolina" & county=="chester" & city_balance_total=="total" & race=="white" replace births__total=420 if state=="south carolina" & county=="chester" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__att=28 if state=="south carolina" & county=="chester" & city_balance_total=="total" & race=="nonwhite" replace births__total=988 if state=="south carolina" & county=="chesterfield" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=287 if state=="south carolina" & county=="chesterfield" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=568 if state=="south carolina" & county=="chesterfield" & city_balance_total=="total" & race=="total" replace births__total=545 if state=="south carolina" & county=="chesterfield" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__att=262 if state=="south carolina" & county=="chesterfield" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__at0=276 if state=="south carolina" & county=="chesterfield" & city_balance_total=="total" & race=="white" replace births__total=443 if state=="south carolina" & county=="chesterfield" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__att=25 if state=="south carolina" & county=="chesterfield" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__at0=292 if state=="south carolina" & county=="chesterfield" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__at1=126 if state=="south carolina" & county=="chesterfield" & city_balance_total=="total" & race=="nonwhite" replace births__total=929 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=160 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=71 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at1=697 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="total" replace births__total=212 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__att=140 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__at0=55 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__at1=17 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="white" replace births__total=717 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__att=20 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__at0=16 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__at1=680 if state=="south carolina" & county=="clarendon" & city_balance_total=="total" & race=="nonwhite" replace county="colleton" if _n==5825 replace city_balance_total="total" if _n==5825 replace race="total" if _n==5825 replace births__total=777 if _n==5825 replace births_of_residents_of_area__att=336 if _n==5825 replace births_of_residents_of_area__at0=74 if _n==5825 replace births_of_residents_of_area__at1=366 if _n==5825 replace county="colleton" if _n==5826 replace city_balance_total="total" if _n==5826 replace race="white" if _n==5826 replace births__total=335 if _n==5826 replace births_of_residents_of_area__att=298 if _n==5826 replace births_of_residents_of_area__at0=28 if _n==5826 replace births_of_residents_of_area__at1=9 if _n==5826 replace county="colleton" if _n==5827 replace city_balance_total="total" if _n==5827 replace race="nonwhite" if _n==5827 replace births__total=442 if _n==5827 replace births_of_residents_of_area__att=38 if _n==5827 replace births_of_residents_of_area__at0=46 if _n==5827 replace births_of_residents_of_area__at1=357 if _n==5827 replace county="darlington" if _n==5828 replace city_balance_total="total" if _n==5828 replace race="total" if _n==5828 replace births__total=1512 if _n==5828 replace births_of_residents_of_area__att=610 if _n==5828 replace births_of_residents_of_area__at0=487 if _n==5828 replace births_of_residents_of_area__at1=415 if _n==5828 replace county="darlington" if _n==5829 replace city_balance_total="total" if _n==5829 replace race="white" if _n==5829 replace births__total=774 if _n==5829 replace births_of_residents_of_area__att=533 if _n==5829 replace births_of_residents_of_area__at0=237 if _n==5829 replace births_of_residents_of_area__at1=4 if _n==5829 replace county="darlington" if _n==5830 replace city_balance_total="total" if _n==5830 replace race="nonwhite" if _n==5830 replace births__total=738 if _n==5830 replace births_of_residents_of_area__att=77 if _n==5830 replace births_of_residents_of_area__at0=250 if _n==5830 replace births_of_residents_of_area__at1=411 if _n==5830 replace county="dillon" if _n==5831 replace city_balance_total="total" if _n==5831 replace race="total" if _n==5831 replace births__total=1048 if _n==5831 replace births_of_residents_of_area__att=616 if _n==5831 replace births_of_residents_of_area__at0=72 if _n==5831 replace births_of_residents_of_area__at1=359 if _n==5831 replace county="dillon" if _n==5832 replace city_balance_total="total" if _n==5832 replace race="white" if _n==5832 replace births__total=519 if _n==5832 replace births_of_residents_of_area__att=461 if _n==5832 replace births_of_residents_of_area__at0=36 if _n==5832 replace births_of_residents_of_area__at1=22 if _n==5832 replace county="dillon" if _n==5833 replace city_balance_total="total" if _n==5833 replace race="nonwhite" if _n==5833 replace births__total=529 if _n==5833 replace births_of_residents_of_area__att=155 if _n==5833 replace births_of_residents_of_area__at0=36 if _n==5833 replace births_of_residents_of_area__at1=337 if _n==5833 replace county="dorchester" if _n==5834 replace city_balance_total="total" if _n==5834 replace race="total" if _n==5834 replace births__total=670 if _n==5834 replace births_of_residents_of_area__att=199 if _n==5834 replace births_of_residents_of_area__at0=111 if _n==5834 replace births_of_residents_of_area__at1=360 if _n==5834 replace county="dorchester" if _n==5835 replace city_balance_total="total" if _n==5835 replace race="white" if _n==5835 replace births__total=245 if _n==5835 replace births_of_residents_of_area__att=170 if _n==5835 replace births_of_residents_of_area__at0=51 if _n==5835 replace births_of_residents_of_area__at1=24 if _n==5835 replace county="dorchester" if _n==5836 replace city_balance_total="total" if _n==5836 replace race="nonwhite" if _n==5836 replace births__total=425 if _n==5836 replace births_of_residents_of_area__att=29 if _n==5836 replace births_of_residents_of_area__at0=60 if _n==5836 replace births_of_residents_of_area__at1=336 if _n==5836 replace county="edgefield" if _n==5837 replace city_balance_total="total" if _n==5837 replace race="total" if _n==5837 replace births__total=485 if _n==5837 replace births_of_residents_of_area__att=90 if _n==5837 replace births_of_residents_of_area__at0=291 if _n==5837 replace births_of_residents_of_area__at1=104 if _n==5837 replace county="edgefield" if _n==5838 replace city_balance_total="total" if _n==5838 replace race="white" if _n==5838 replace births__total=123 if _n==5838 replace births_of_residents_of_area__att=76 if _n==5838 replace births_of_residents_of_area__at0=45 if _n==5838 replace births_of_residents_of_area__at1=2 if _n==5838 replace county="edgefield" if _n==5839 replace city_balance_total="total" if _n==5839 replace race="nonwhite" if _n==5839 replace births__total=362 if _n==5839 replace births_of_residents_of_area__att=14 if _n==5839 replace births_of_residents_of_area__at0=246 if _n==5839 replace births_of_residents_of_area__at1=102 if _n==5839 replace county="fairfield" if _n==5840 replace city_balance_total="total" if _n==5840 replace race="total" if _n==5840 replace births__total=603 if _n==5840 replace births_of_residents_of_area__att=132 if _n==5840 replace births_of_residents_of_area__at0=211 if _n==5840 replace births_of_residents_of_area__at1=260 if _n==5840 replace county="fairfield" if _n==5841 replace city_balance_total="total" if _n==5841 replace race="white" if _n==5841 replace births__total=219 if _n==5841 replace births_of_residents_of_area__att=107 if _n==5841 replace births_of_residents_of_area__at0=109 if _n==5841 replace births_of_residents_of_area__at1=3 if _n==5841 replace county="fairfield" if _n==5842 replace city_balance_total="total" if _n==5842 replace race="nonwhite" if _n==5842 replace births__total=384 if _n==5842 replace births_of_residents_of_area__att=25 if _n==5842 replace births_of_residents_of_area__at0=102 if _n==5842 replace births_of_residents_of_area__at1=257 if _n==5842 replace births_of_residents_of_area__att=917 if state=="south carolina" & county=="florence" & city_balance_total=="total" & race=="total" replace births__total=1209 if state=="south carolina" & county=="florence" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__att=831 if state=="south carolina" & county=="florence" & city_balance_total=="total" & race=="white" replace births__total=802 if state=="south carolina" & county=="florence" & city_balance_total=="balance of county" & race=="white" replace births_of_residents_of_area__at0=184 if state=="tennessee" & county=="macon" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at1=27 if state=="texas" & county=="uvalde" & city_balance_total=="total" & race=="total" replace births__total=6513 if state=="west virginia" & county=="kanawha" & city_balance_total=="total" & race=="total" *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="desoto" if county=="de soto" & state=="florida" replace county="cochrane" if county=="cochran" & state=="texas" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="orleans" if county=="orleans, coartenaive with new orleans" & state=="louisiana" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="st louis city" if county=="st louis (city)" & state=="missouri" replace county="chisago" if county=="chicago" & state=="minnesota" *correct data entry errors found while checking that county totals sum to state totals replace births__total=1038 if births__total==1039 & state=="kentucky" & county=="mccracken" & city_balance_total=="total" & race=="total" replace births__total=187 if births__total==167 & state=="missouri" & county=="cedar" & city_balance_total=="total" & race=="total" replace births__total=833 if births__total==633 & state=="missouri" & county=="st francois" & city_balance_total=="total" & race=="total" replace births__total=2735 if births__total==2736 & state=="ohio" & county=="clark" & city_balance_total=="total" & race=="total" replace births__total=1869 if births__total==1669 & state=="tennessee" & county=="anderson" & city_balance_total=="total" & race=="total" replace births__total=398 if births__total==396 & state=="tennessee" & county=="grundy" & city_balance_total=="total" & race=="total" replace births__total=2528 if births__total==2529 & state=="washington" & county=="snohomish" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=6309 if births_of_residents_of_area__att==6308 & state=="california" & county=="san bernardino" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=408 if births_of_residents_of_area__att==409 & state=="kansas" & county=="dickinson" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=80 if births_of_residents_of_area__att==60 & state=="kentucky" & county=="lewis" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=859 if births_of_residents_of_area__att==659 & state=="louisiana" & county=="webster" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=2879 if births_of_residents_of_area__att==2679 & state=="massachusetts" & county=="berkshire" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=2137 if births_of_residents_of_area__att==2157 & state=="mississippi" & county=="hinds" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=686 if births_of_residents_of_area__att==688 & state=="north carolina" & county=="henderson" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=1035 if births_of_residents_of_area__att==35 & state=="ohio" & county=="sandusky" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=10560 if births_of_residents_of_area__att==11560 & state=="oregon" & county=="multnomah" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=968 if births_of_residents_of_area__att==868 & state=="texas" & county=="ector" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=849 if births_of_residents_of_area__att==649 & state=="west virginia" & county=="logan" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=468 if births_of_residents_of_area__att==469 & state=="wisconsin" & county=="door" & city_balance_total=="total" & race=="total" *correct data entry errors found while checking that white+nonwhite=total replace births_of_residents_of_area__att=432 if births_of_residents_of_area__att==452 & state=="mississippi" & county=="hinds" & race=="nonwhite" & city_balance_total=="total" replace births_of_residents_of_area__att=818 if births_of_residents_of_area__att==618 & state=="west virginia" & county=="logan" & race=="white" & city_balance_total=="total" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1948 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1948.dta,replace clear ** *1949 data ** *http://nber15.nber.org/vital-stats-books/vsus_1949_2.cv.pdf *table 1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1949.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. count if births_of_residents_of_area__at2==. replace births_of_residents_of_area__at2 =0 if births_of_residents_of_area__at2 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace county="oscoda" if county=="osceola" & state=="michigan" & births__total==69 replace births_of_residents_of_area__att=889 if county=="tolland" & state=="connecticut" & births_of_residents_of_area__att==869 replace births_of_residents_of_area__at2=1 if county=="bay" & state=="florida" & city_balance_total=="panama city" & race=="total" & births_of_residents_of_area__at2==0 replace births_of_residents_of_area__at2=1 if county=="bay" & state=="florida" & city_balance_total=="panama city" & race=="nonwhite" & births_of_residents_of_area__at2==0 replace births__total=448 if county=="sullivan" & state=="indiana" & births__total==440 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==5051 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==3337 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==1714 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5280 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==3282 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==1998 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==1939 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="white" & births__total==1517 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="nonwhite" & births__total==422 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="desoto" if county=="de soto" & state=="florida" replace county="cochrane" if county=="cochran" & state=="texas" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="orleans" if county=="orleans, coartenaive with new orleans" & state=="louisiana" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="st louis city" if county=="st louis (city)" & state=="missouri" replace county="chisago" if county=="chicago" & state=="minnesota" *correct data entry errors found while checking that white+nonwhite=total replace births__total=345 if births__total==63 & state=="arkansas" & county=="hot spring" & race=="white" & city_balance_total=="total" replace births__total=63 if births__total==334 & state=="arkansas" & county=="hot spring" & race=="nonwhite" & city_balance_total=="total" replace births_of_residents_of_area__att=282 if births_of_residents_of_area__att==3 & state=="arkansas" & county=="hot spring" & race=="white" & city_balance_total=="total" replace births_of_residents_of_area__att=3 if births_of_residents_of_area__att==221 & state=="arkansas" & county=="hot spring" & race=="nonwhite" & city_balance_total=="total" replace births_of_residents_of_area__at0=61 if births_of_residents_of_area__at0==16 & state=="arkansas" & county=="hot spring" & race=="white" & city_balance_total=="total" replace births_of_residents_of_area__at0=16 if births_of_residents_of_area__at0==71 & state=="arkansas" & county=="hot spring" & race=="nonwhite" & city_balance_total=="total" replace births_of_residents_of_area__at1=1 if births_of_residents_of_area__at1==42 & state=="arkansas" & county=="hot spring" & race=="white" & city_balance_total=="total" replace births_of_residents_of_area__at1=42 if births_of_residents_of_area__at1==35 & state=="arkansas" & county=="hot spring" & race=="nonwhite" & city_balance_total=="total" replace births_of_residents_of_area__at2=1 if births_of_residents_of_area__at2==2 & state=="arkansas" & county=="hot spring" & race=="white" & city_balance_total=="total" replace births_of_residents_of_area__at2=2 if births_of_residents_of_area__at2==7 & state=="arkansas" & county=="hot spring" & race=="nonwhite" & city_balance_total=="total" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+ births_of_residents_of_area__at0 + births_of_residents_of_area__at1 + births_of_residents_of_area__at2 assert temp==births__total drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" rename births_of_residents_of_area__at2 births_o label var births_o "births by residence: other and not specified" *generate year variable gen year=1949 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1949.dta,replace clear ** *1950 data ** *http://nber15.nber.org/vital-stats-books/vsus_1950_2.cv.pdf *table 13 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1950.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==5041 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==3349 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==1692 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==4937 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2919 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2018 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2019 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="cochrane" if county=="cochran" & state=="texas" replace county="desoto" if county=="de soto" & state=="florida" replace county="de kalb" if county=="dekalb" & state=="tennessee" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="st louis city" if county=="st louis (city)" & state=="missouri" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" *correct data entry errors found while checking that county totals sum to state totals replace births__total=288 if births__total==289 & state=="arizona" & county=="santa cruz" & city_balance_total=="total" & race=="total" replace births__total=289 if births__total==299 & state=="idaho" & county=="payette" & city_balance_total=="total" & race=="total" replace births__total=64 if births__total==84 & state=="kentucky" & county=="robertson" & city_balance_total=="total" & race=="total" replace births__total=568 if births__total==566 & state=="michigan" & county=="gogebic" & city_balance_total=="total" & race=="total" replace births__total=2935 if births__total==2936 & state=="new york" & county=="orange" & city_balance_total=="total" & race=="total" replace births__total=402 if births__total==102 & state=="ohio" & county=="harrison" & city_balance_total=="total" & race=="total" replace births__total=388 if births__total==386 & state=="ohio" & county=="hocking" & city_balance_total=="total" & race=="total" replace births__total=688 if births__total==686 & state=="oklahoma" & county=="mccurtain" & city_balance_total=="total" & race=="total" replace births__total=162 if births__total==134 & state=="oregon" & county=="wallowa" & city_balance_total=="total" & race=="total" replace births__total=305 if births__total==306 & state=="texas" & county=="erath" & city_balance_total=="total" & race=="total" replace births__total=373 if births__total==375 & state=="texas" & county=="frio" & city_balance_total=="total" & race=="total" replace births__total=686 if births__total==696 & state=="texas" & county=="nacogdoches" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=293 if births_of_residents_of_area__att==295 & state=="illinois" & county=="wayne" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=1736 if births_of_residents_of_area__att==1738 & state=="new york" & county=="oswego" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=161 if births_of_residents_of_area__att==133 & state=="oregon" & county=="wallowa" & city_balance_total=="total" & race=="total" *correct data entry errors found while checking that white+nonwhite=total replace births__total=235 if births__total==236 & state=="alabama" & county=="lauderdale" & race=="nonwhite" & city_balance_total=="total" replace births__total=388 if births__total==386 & state=="kentucky" & county=="boyle" & race=="white" & city_balance_total=="total" replace births__total=88296 if births__total==86296 & state=="new jersey" & county=="total" & race=="white" & city_balance_total=="total" replace births__total=613 if births__total==513 & state=="north carolina" & county=="columbus" & race=="nonwhite" & city_balance_total=="total" replace births__total=264 if births__total==284 & state=="texas" & county=="robertson" & race=="nonwhite" & city_balance_total=="total" replace births_of_residents_of_area__att=87 if births_of_residents_of_area__att==67 & state=="georgia" & county=="lamar" & race=="white" & city_balance_total=="total" replace births_of_residents_of_area__att=246 if births_of_residents_of_area__att==245 & state=="north carolina" & county=="bladen" & race=="white" & city_balance_total=="total" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1950 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1950.dta,replace clear ** *1951 data ** *http://nber15.nber.org/vital-stats-books/vsus_1951_1.cv.pdf *table 17 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1951.dta sum desc replace county=lower(county) replace state=lower(state) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. count if births_of_residents_of_area__at2==. replace births_of_residents_of_area__at2 =0 if births_of_residents_of_area__at2 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==5684 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==3754 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==1930 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5084 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2938 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2146 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2210 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="chisago" if county=="chicago" & state=="minnesota" replace county="cochrane" if county=="cochran" & state=="texas" replace county="desoto" if county=="de soto" & state=="florida" replace county="st louis city" if county=="st louis (city)" & state=="missouri" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+ births_of_residents_of_area__at0 + births_of_residents_of_area__at1 + births_of_residents_of_area__at2 assert temp==births__total drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" rename births_of_residents_of_area__at2 births_o label var births_o "births by residence: other and not specified" *generate year variable gen year=1951 label var year "year" *check that observations are unique egen tag=tag(state county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1951.dta,replace clear ** *1952 data ** *http://nber15.nber.org/vital-stats-books/vsus_1952_1.cv.pdf *table 18 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1952.dta sum desc replace county=lower(county) replace state=lower(state) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__==6434 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==4514 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==1920 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5050 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2872 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2178 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2312 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="baltimore city" if county=="baltimore (independent city)" & state=="maryland" replace county="chisago" if county=="chicago" & state=="minnesota" replace county="cochrane" if county=="cochran" & state=="texas" replace county="desoto" if county=="de soto" & state=="florida" replace county="schuyler" if county=="shuyler" & state=="missouri" replace county="st louis city" if county=="st louis (independent city)" & state=="missouri" *correct data entry errors found while checking that county totals sum to state totals replace births__total=368 if births__total==366 & state=="colorado" & county=="montrose" & race=="total" replace births__total=1448 if births__total==1446 & state=="oregon" & county=="coos" & race=="total" replace births_of_residents_of_area__att=834 if births_of_residents_of_area__att==934 & state=="louisiana" & county=="vernon" & race=="total" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1952 label var year "year" *check that observations are unique egen tag=tag(state county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1952.dta,replace clear ** *1953 data ** *http://nber15.nber.org/vital-stats-books/vsus_1953_1.pdf *table 18 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1953.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace city_balance_total="jersey city" if state=="new jersey" & county=="hudson" & race=="white" & births__total==5532 replace city_balance_total="jersey city" if state=="new jersey" & county=="hudson" & race=="nonwhite" & births__total==802 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==6668 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==4722 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==1946 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5040 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2828 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2212 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2288 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="chisago" if county=="chicago" & state=="minnesota" replace county="cochrane" if county=="cochran" & state=="texas" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="st louis city" if county=="st louis (city)" & state=="missouri" *correct data entry errors found while checking that county totals sum to state totals replace births__total=1086 if births__total==1096 & state=="missouri" & county=="dunklin" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=360 if births_of_residents_of_area__att==380 & state=="west virginia" & county=="putnam" & city_balance_total=="total" & race=="total" *correct data entry errors found while checking that white+nonwhite=total replace births__total=402 if births__total==420 & state=="kentucky" & county=="logan" & race=="white" & city_balance_total=="total" replace births_of_residents_of_area__att=50084 if births_of_residents_of_area__att==5084 & state=="louisiana" & county=="total" & race=="white" & city_balance_total=="total" *check that county names are consistent with 1970 census, except for known deviations *the 1additional contradictions are counties in massachusetts, since data for massachusetts is only shown for the state as a whole in the year 1953, as specified in footnote 8 on page 179 preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc"|state=="massachusetts" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1953 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1953.dta,replace clear ** *1954 data ** *http://nber15.nber.org/vital-stats-books/vsus_1954_1.cv.pdf *table 18 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1954.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace city_balance_total="jersey city" if state=="new jersey" & county=="hudson" & race=="white" & births__total==5562 replace city_balance_total="jersey city" if state=="new jersey" & county=="hudson" & race=="nonwhite" & births__total==914 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==6688 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==4614 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==2074 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5178 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2622 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2556 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2226 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="chisago" if county=="chicago" & state=="minnesota" replace county="cochrane" if county=="cochran" & state=="texas" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="st louis city" if county=="st louis (city)" & state=="missouri" *check that county names are consistent with 1970 census, except for known deviations *the additional contradictions are counties in massachusetts, since data for massachusetts is only shown for the state as a whole in the year 1954, as specified in footnote 8 on page 179 preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc"|state=="massachusetts" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1954 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1954.dta,replace clear ** *1955 data ** *http://nber15.nber.org/vital-stats-books/vsus_1955_1.cv.pdf *table 19 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1955.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace births_of_residents_of_area__att=299 if state=="illinois" & county=="monroe" & city_balance_total=="total" & race=="total" replace race="white" if state=="arkansas" & county=="union" & births__total==333 replace race="nonwhite" if state=="arkansas" & county=="union" & births__total==270 replace city_balance_total="total" if state=="arkansas" & county=="van buren" replace race="total" if state=="arkansas" & county=="van buren" replace city_balance_total="total" if state=="arkansas" & county=="washington" & births__total==1046 replace race="total" if state=="arkansas" & county=="washington" & city_balance_total=="total" replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==8325 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==5985 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==2340 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5133 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2579 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2554 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2321 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="cochrane" if county=="cochran" & state=="texas" replace county="desoto" if county=="de soto" & state=="florida" replace county="de kalb" if county=="dekalb" & state=="tennessee" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="st louis city" if county=="st louis (city)" & state=="missouri" *correct data entry errors found while checking that county totals sum to state totals replace births__total=2196 if births__total==2195 & state=="new york" & county=="ulster" & city_balance_total=="total" & race=="total" replace births__total=275 if births__total==273 & state=="north dakota" & county=="emmons" & city_balance_total=="total" & race=="total" replace births__total=316 if births__total==318 & state=="utah" & county=="sanpete" & city_balance_total=="total" & race=="total" replace births__total=348 if births__total==349 & state=="west virginia" & county=="barbour" & city_balance_total=="total" & race=="total" replace births__total=881 if births__total==681 & state=="wisconsin" & county=="columbia" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=221 if births_of_residents_of_area__att==220 & state=="florida" & county=="holmes" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=279 if births_of_residents_of_area__att==379 & state=="indiana" & county=="orange" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=482 if births_of_residents_of_area__att==282 & state=="iowa" & county=="buchanan" & city_balance_total=="total" & race=="total" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1955 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1955.dta,replace clear ** *1956 data ** *http://nber15.nber.org/vital-stats-books/vsus_1956_1.cv.pdf *table 19 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1956.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==8320 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==5844 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==2476 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5128 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2470 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2658 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2368 replace city_balance_total="cairo" if county=="cairo" & births__total==230 replace city_balance_total="cairo" if county=="cairo" & births__total==140 & births_of_residents_of_area__att==130 replace city_balance_total="cairo" if county=="cairo" & births__total==90 replace county="alexander" if city_balance_total=="cairo" replace county="alexander" if state=="illinois" & race=="total" & births__total==140 & births_of_residents_of_area__att==88 replace county="alexander" if state=="illinois" & race=="white" & births__total==80 replace county="alexander" if state=="illinois" & race=="nonwhite" & births__total==60 replace county="liberty" if state=="florida" & city_balance_total =="total" & race =="white" & births__total ==64 & births_of_residents_of_area__att ==56 replace county="liberty" if state=="florida" & city_balance_total =="total" & race =="nonwhite" & births__total ==20 & births_of_residents_of_area__att ==6 replace county="liberty" if state=="florida" & city_balance_total =="total" & race =="total" & births__total ==84 & births_of_residents_of_area__att ==62 replace state="montana" if state=="missouri" & county=="liberty" replace state="montana" if state=="missouri" & county=="park" replace county="thomas" if state=="georgia" & county=="thomasville" & city_balance_total=="balance of county" replace county="thomas" if state=="georgia" & county=="thomasville" & city_balance_total=="total" replace city_balance_total="thomasville" if state=="georgia" & county=="thomas" & city_balance_total=="total" & race=="total" & births__total==536 replace city_balance_total="thomasville" if state=="georgia" & county=="thomas" & city_balance_total=="total" & race=="white" & births__total==280 replace city_balance_total="thomasville" if state=="georgia" & county=="thomas" & city_balance_total=="total" & race=="nonwhite" & births__total==256 replace city_balance_total="indianapolis" if state=="indiana" & county=="marion" & city_balance_total=="total" & births__total==10084 replace city_balance_total="indianapolis" if state=="indiana" & county=="marion" & city_balance_total=="total" & births__total==3072 replace city_balance_total="independence" if state=="kansas" & county=="montgomery" & city_balance_total=="total" & race=="white" & births__total==210 replace city_balance_total="independence" if state=="kansas" & county=="montgomery" & city_balance_total=="total" & race=="nonwhite" & births__total==48 replace city_balance_total="balance of county" if state=="maryland" & county=="montgomery" & city_balance_total=="total" & race=="white" & births__total==6366 replace city_balance_total="balance of county" if state=="maryland" & county=="montgomery" & city_balance_total=="total" & race=="nonwhite" & births__total==396 replace city_balance_total="balance of county" if state=="mississippi" & county=="adams" & city_balance_total=="total" & race=="white" & births__total==172 replace city_balance_total="balance of county" if state=="mississippi" & county=="adams" & city_balance_total=="total" & race=="nonwhite" & births__total==208 replace city_balance_total="balance of county" if state=="mississippi" & county=="coahoma" & city_balance_total=="total" & race=="white" & births__total==124 replace city_balance_total="balance of county" if state=="mississippi" & county=="coahoma" & city_balance_total=="total" & race=="nonwhite" & births__total==912 replace city_balance_total="balance of county" if state=="mississippi" & county=="forrest" & city_balance_total=="total" & race=="white" & births__total==338 replace city_balance_total="balance of county" if state=="mississippi" & county=="forrest" & city_balance_total=="total" & race=="nonwhite" & births__total==138 replace city_balance_total="balance of county" if state=="mississippi" & county=="hinds" & city_balance_total=="total" & race=="white" & births__total==962 replace city_balance_total="balance of county" if state=="mississippi" & county=="hinds" & city_balance_total=="total" & race=="nonwhite" & births__total==892 replace state="montana" if state=="missouri" & county=="total" & city_balance_total=="total" & race=="total" & births__total==17732 replace state="montana" if state=="missouri" & county=="total" & city_balance_total=="total" & race=="white" & births__total==16702 replace state="montana" if state=="missouri" & county=="total" & city_balance_total=="total" & race=="nonwhite" & births__total==1030 replace state="montana" if state=="missouri" & county=="carter" & page__of_pdf_==135 replace state="montana" if state=="missouri" & county=="jefferson" & city_balance_total=="total" & race=="total" & births__total==66 replace state="montana" if state=="missouri" & county=="lincoln" & city_balance_total=="total" & race=="total" & births__total==360 replace state="montana" if state=="missouri" & county=="madison" & city_balance_total=="total" & race=="total" & births__total==96 replace city_balance_total="roselle" if state=="new jersey" & county=="union" & city_balance_total=="total" & race=="white" & births__total==360 replace city_balance_total="roselle" if state=="new jersey" & county=="union" & city_balance_total=="total" & race=="nonwhite" & births__total==74 replace county="alexander" if state=="north carolina" & county=="alamance" & city_balance_total=="total" & race=="total" & births__total==340 replace county="onslow" if state=="north carolina" & county=="northampton" & city_balance_total=="total" & race=="total" & births__total==3264 replace county="onslow" if state=="north carolina" & county=="northampton" & city_balance_total=="total" & race=="white" & births__total==2868 replace county="onslow" if state=="north carolina" & county=="northampton" & city_balance_total=="total" & race=="nonwhite" & births__total==396 replace county="orange" if state=="north carolina" & county=="northampton" & city_balance_total=="total" & race=="total" & births__total==974 replace county="orange" if state=="north carolina" & county=="northampton" & city_balance_total=="total" & race=="white" & births__total==638 replace county="orange" if state=="north carolina" & county=="northampton" & city_balance_total=="total" & race=="nonwhite" & births__total==336 replace county="pamlico" if state=="north carolina" & county=="northampton" & city_balance_total=="total" & race=="total" & births__total==232 replace county="pamlico" if state=="north carolina" & county=="northampton" & city_balance_total=="total" & race=="white" & births__total==98 replace county="pamlico" if state=="north carolina" & county=="northampton" & city_balance_total=="total" & race=="nonwhite" & births__total==134 replace county="union" if state=="north carolina" & county=="tyrrell" & city_balance_total=="total" & race=="total" & births__total==1142 replace county="union" if state=="north carolina" & county=="tyrrell" & city_balance_total=="total" & race=="white" & births__total==800 replace county="union" if state=="north carolina" & county=="tyrrell" & city_balance_total=="total" & race=="nonwhite" & births__total==342 replace city_balance_total="coffeyville" if state=="kansas" & county=="montgomery" & city_balance_total=="total" & race=="white" & births__total==368 replace city_balance_total="coffeyville" if state=="kansas" & county=="montgomery" & city_balance_total=="total" & race=="nonwhite" & births__total==58 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="cochrane" if county=="cochran" & state=="texas" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="st louis city" if county=="st louis (city)" & state=="missouri" replace county="acadia" if county=="acadie" & state=="louisiana" replace county="adams" if county=="adems" & state=="illinois" replace county="allegheny" if county=="allegbeny" & state=="pennsylvania" replace county="emanuel" if county=="eeanuel" & state=="georgia" replace county="manatee" if county=="manstee" & state=="florida" replace county="maricopa" if county=="mari copa" & state=="arizona" replace county="mississippi" if county=="missicsippi" & state=="missouri" replace county="o'brien" if county=="obrien" & state=="iowa" replace county="pulaski" if county=="pulacki" & state=="indiana" replace county="sangamon" if county=="sangemon" & state=="illinois" replace county="santa rosa" if county=="senta rosa" & state=="florida" replace county="suwannee" if county=="suwennee" & state=="florida" replace county="tallahatchie" if county=="tallahatchfe" & state=="mississippi" replace county="umatilla" if county=="umstilla" & state=="oregon" replace county="wabash" if county=="wabach" & state=="illinois" replace county="yamhill" if county=="yambill" & state=="oregon" replace county="vernon" if county=="vernoh" & state=="louisiana" replace state="montana" if county=="beaverhead" & state=="missouri" replace state="montana" if county=="big horn" & state=="missouri" replace state="montana" if county=="blaine" & state=="missouri" replace state="montana" if county=="broadwater" & state=="missouri" replace state="montana" if county=="carbon" & state=="missouri" replace state="montana" if county=="cascade" & state=="missouri" replace state="montana" if county=="chouteau" & state=="missouri" replace state="montana" if county=="custer" & state=="missouri" replace state="montana" if county=="daniels" & state=="missouri" replace state="montana" if county=="dawson" & state=="missouri" replace state="montana" if county=="deer lodge" & state=="missouri" replace state="montana" if county=="fallon" & state=="missouri" replace state="montana" if county=="fergus" & state=="missouri" replace state="montana" if county=="flathead" & state=="missouri" replace state="montana" if county=="gallatin" & state=="missouri" replace state="montana" if county=="garfield" & state=="missouri" replace state="montana" if county=="glacier" & state=="missouri" replace state="montana" if county=="golden valley" & state=="missouri" replace state="montana" if county=="granite" & state=="missouri" replace state="montana" if county=="hill" & state=="missouri" replace state="montana" if county=="judith basin" & state=="missouri" replace state="montana" if county=="lake" & state=="missouri" replace state="montana" if county=="lewis and clark" & state=="missouri" replace state="montana" if county=="mccone" & state=="missouri" replace state="montana" if county=="meagher" & state=="missouri" replace state="montana" if county=="mineral" & state=="missouri" replace state="montana" if county=="missoula" & state=="missouri" replace state="montana" if county=="musselshell" & state=="missouri" *correct data entry errors found while checking that county totals sum to state totals replace births_of_residents_of_area__att=408 if births_of_residents_of_area__att==409 & state=="georgia" & county=="gordon" & city_balance_total=="total" & race=="total" *check that county names are consistent with 1970 census, except for known deviations *the additional contradictions are counties in massachusetts, since data for massachusetts is only shown for the state as a whole in the year 1956, as specified in footnote 8 on page 185 preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc"|state=="massachusetts" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1956 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1956.dta,replace clear ** *1957 data ** *http://nber15.nber.org/vital-stats-books/vsus_1957_1.cv.pdf *table 24 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1957.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace births__total=196 if state=="georgia" & county=="lee" & city_balance_total=="total" & race=="total" replace births__total=44 if state=="georgia" & county=="lee" & city_balance_total=="total" & race=="white" replace births__total=3466 if state=="indiana" & county=="lake" & city_balance_total=="balance of county" & race=="total" replace births__total=2164 if state=="indiana" & county=="la porte" & city_balance_total=="total" & race=="total" replace births__total=796 if state=="indiana" & county=="la porte" & city_balance_total=="michigan city" & race=="total" replace births__total=550 if state=="indiana" & county=="la porte" & city_balance_total=="la porte" & race=="total" replace births__total=332 if state=="indiana" & county=="lawrence" & city_balance_total=="bedford" & race=="total" replace births__total=554 if state=="indiana" & county=="lawrence" & city_balance_total=="balance of county" & race=="total" drop if state=="" *drops one empty observation replace county="ellis" if county=="ell?" duplicates report duplicates list duplicates drop *drops one observation which appears to have been entered twice replace city_balance_total="heyward" if state=="california" & county=="alameda" & city_balance_total=="albany" & births__total==2546 replace race="nonwhite" if state=="mississippi" & county=="claiborne" & city_balance_total=="total" & race=="total" & births__total==252 replace race="white" if state=="mississippi" & county=="claiborne" & city_balance_total=="total" & race=="total" & births__total==48 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==5550 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5106 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==8198 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2732 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2374 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==2648 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2520 replace city_balance_total="milwaukee" if state=="wisconsin" & county=="milwaukee" & city_balance_total=="total" & race=="white" & births__total==17318 replace city_balance_total="milwaukee" if state=="wisconsin" & county=="milwaukee" & city_balance_total=="total" & race=="total" & births__total==19850 replace city_balance_total="milwaukee" if state=="wisconsin" & county=="milwaukee" & city_balance_total=="total" & race=="nonwhite" & births__total==2532 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="cochrane" if county=="cochran" & state=="texas" replace county="calhoun" if state=="illinois" & county=="calboun" replace county="cross" if state=="arkansas" & county=="crosa" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="duval" if state=="texas" & county=="duvel" replace county="hubbard" if state=="minnesota" & county=="houbbard" replace county="st john the baptist" if state=="louisiana" & county=="john the baptist" replace county="merrick" if state=="nebraska" & county=="marrick" replace county="mcintosh" if state=="north dakota" & county=="mcintoch" replace county="nemaha" if state=="kansas" & county=="nemaba" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="rooks" if state=="kansas" & county=="roocks" replace county="san francisco" if county=="san francisco coextensive with san francisco (city)" & state=="california" replace county="st louis city" if county=="st louis (city)" & state=="missouri" replace county="upshur" if state=="texas" & county=="upahur" *correct data entry errors found while checking that county totals sum to state totals replace city_balance_total="total" if state=="california" & county=="san francisco" & city_balance_total=="totol" replace births__total=162 if births__total==138 & state=="georgia" & county=="lincoln" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=146 if births_of_residents_of_area__att==148 & state=="colorado" & county=="eagle" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=80 if births_of_residents_of_area__att==90 & state=="idaho" & county=="valley" & city_balance_total=="total" & race=="total" *correct data entry errors found while checking that white+nonwhite=total replace births__total=272 if births__total==172 & state=="georgia" & county=="liberty" & race=="white" & city_balance_total=="total" replace births__total=214 if births__total==819 & state=="georgia" & county=="liberty" & race=="nonwhite" & city_balance_total=="total" replace births__total=6748 if births__total==6743 & state=="oklahoma" & county=="total" & race=="nonwhite" & city_balance_total=="total" replace births_of_residents_of_area__att=266 if births_of_residents_of_area__att==286 & state=="north carolina" & county=="hertford" & race=="nonwhite" & city_balance_total=="total" *check that county names are consistent with 1970 census, except for known deviations *the additional contradictions are counties in massachusetts, since data for massachusetts is only shown for the state as a whole in the year 1957, as specified in footnote 7 on page 201 preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc"|state=="massachusetts" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1957 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1957.dta,replace clear ** *1958 data ** *http://nber15.nber.org/vital-stats-books/vsus_1958_1.cv.pdf *table 25 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1958.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors replace births_of_residents_of_area__att=3016 if state=="kansas" & county=="johnson" & city_balance_total=="total" & race=="total" replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==7760 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==5314 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==2446 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==5062 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2306 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2756 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2372 replace city_balance_total="total" if state=="california" & county=="santa clara" & births__total==14320 replace city_balance_total="balance of city" if state=="north carolina" & county=="rockingham" & births__total==992 replace city_balance_total="balance of city" if state=="north carolina" & county=="rockingham" & births__total==300 // add in observation for nonwhite, Woodford, KY, which was completely left out local onemore = _N+1 set obs `onemore' replace page__of_pdf_ = 150 if _n==_N replace state = "kentucky" if _n==_N replace county = "woodford" if _n==_N replace city_balance_total = "total" if _n==_N replace race = "nonwhite" if _n==_N replace births__total = 50 if _n==_N replace births_of_residents_of_area__att = 48 if _n==_N replace births_of_residents_of_area__at0 = 2 if _n==_N replace births_of_residents_of_area__at1 = 0 if _n==_N gen order_var = _n sort state order_var, stable drop order_var *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="appanoose" if state=="iowa" & county=="appanocse" replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="cherokee" if state=="iowa" & county=="cherckee" replace county="gloucester" if state=="new jersey" & county=="cloucester" replace county="cochrane" if county=="cochran" & state=="texas" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coextensive with denver (city)" & state=="colorado" replace county="franklin" if state=="idaho" & county=="frenklin" replace county="lincoln" if state=="north carolina" & county=="idncoln" replace county="oconee" if state=="georgia" & county=="oconec" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coextensive with philadelphia (city)" & state=="pennsylvania" replace county="richardson" if state=="nebraska" & county=="richerdson" replace county="rooks" if state=="kansas" & county=="rocks" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="st louis city" if county=="st louis (city)" & state=="missouri" replace county="unicoi" if state=="tennessee" & county=="unici" replace county="washita" if state=="oklahoma" & county=="washite" replace county="yakima" if state=="washington" & county=="yakina" replace county="union" if state=="new jersey" & county=="new jersey" & births__total==192 replace county="union" if state=="new jersey" & county=="new jersey" & births__total==406 replace county="union" if state=="new jersey" & county=="new jersey" & births__total==2018 replace county="union" if state=="new jersey" & county=="new jersey" & births__total==418 replace county="union" if state=="new jersey" & county=="new jersey" & births__total==480 replace county="union" if state=="new jersey" & county=="new jersey" & births__total==74 replace county="union" if state=="new jersey" & county=="new jersey" & births__total==524 replace county="hinds" if state=="mississippi" & county=="harrison" & births__total==5242 replace county="hinds" if state=="mississippi" & county=="harrison" & births__total==2664 replace county="hinds" if state=="mississippi" & county=="harrison" & births__total==2578 replace county="hinds" if state=="mississippi" & county=="harrison" & births__total==3242 replace county="hinds" if state=="mississippi" & county=="harrison" & births__total==1506 replace county="hinds" if state=="mississippi" & county=="harrison" & births__total==1736 replace county="hinds" if state=="mississippi" & county=="harrison" & births__total==2000 replace county="hinds" if state=="mississippi" & county=="harrison" & births__total==1158 replace county="hinds" if state=="mississippi" & county=="harrison" & births__total==842 replace county="madison" if state=="louisiana" & county=="livingston" & city_balance_total=="lison" replace city_balance_total="total" if state=="louisiana" & county=="madison" *correct data entry errors found while checking that county totals sum to state totals replace births_of_residents_of_area__att=186 if births_of_residents_of_area__att==185 & state=="georgia" & county=="wilkinson" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=126 if births_of_residents_of_area__att==128 & state=="kansas" & county=="gove" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=886 if births_of_residents_of_area__att==986 & state=="north dakota" & county=="burleigh" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=66 if births_of_residents_of_area__att==86 & state=="north dakota" & county=="slope" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=1268 if births_of_residents_of_area__att==1269 & state=="washington" & county=="cowlitz" & city_balance_total=="total" & race=="total" *check that county names are consistent with 1970 census, except for known deviations *the additional contradictions are counties in massachusetts, since data for massachusetts is only shown for the state as a whole in the year 1958, as specified in footnote 7 on page 217 preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc"|state=="massachusetts" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1958 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1958.dta,replace clear ** *1959 data ** *http://nber15.nber.org/vital-stats-books/vsus_1959_1.cv.pdf *table 25 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1959.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. count if births_of_residents_of_area__at1==. replace births_of_residents_of_area__at1 =0 if births_of_residents_of_area__at1 ==. *check that all pdf pages appear to be in the data sort page__of_pdf_ gen temp=page__of_pdf_[_n]-page__of_pdf_[_n-1] assert temp==0|temp==1|temp==. drop temp *clean data entry errors append using natality1959_append.dta *one county that was not originally entered replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) replace births_of_residents_of_area__at0=6 if state=="texas" & county=="gregg" & city_balance_total=="total" & race=="nonwhite" duplicates report duplicates list duplicates drop *drops three observations which appear to have been entered twice replace state="iowa" if state=="indiana" & county=="adair" replace state="iowa" if state=="indiana" & county=="allamakee" replace state="iowa" if state=="indiana" & county=="appanoose" replace state="iowa" if state=="indiana" & county=="audubon" replace state="iowa" if state=="indiana" & county=="black hawk" replace state="iowa" if state=="indiana" & county=="bremer" replace state="iowa" if state=="indiana" & county=="buchanan" replace state="iowa" if state=="indiana" & county=="buena vista" replace state="iowa" if state=="indiana" & county=="butler" replace state="iowa" if state=="indiana" & county=="calhoun" replace state="iowa" if state=="indiana" & county=="cedar" replace state="iowa" if state=="indiana" & county=="cerro gordo" replace state="iowa" if state=="indiana" & county=="cherokee" replace state="iowa" if state=="indiana" & county=="chickasaw" replace state="iowa" if state=="indiana" & county=="clarke" replace state="iowa" if state=="indiana" & county=="clayton" replace state="iowa" if state=="indiana" & county=="dallas" replace state="iowa" if state=="indiana" & county=="davis" replace state="iowa" if state=="indiana" & county=="des moines" replace state="iowa" if state=="indiana" & county=="dickinson" replace state="iowa" if state=="indiana" & county=="dubuque" replace state="iowa" if state=="indiana" & county=="emmet" replace state="iowa" if state=="indiana" & county=="fremont" replace state="iowa" if state=="indiana" & county=="adams" & births__total==142 replace state="iowa" if state=="indiana" & county=="benton" & births__total==512 replace state="iowa" if state=="indiana" & county=="boone" & births__total==494 replace state="iowa" if state=="indiana" & county=="boone" & births__total==252 replace state="iowa" if state=="indiana" & county=="boone" & births__total==242 replace state="iowa" if state=="indiana" & county=="cass" & births__total==386 replace state="iowa" if state=="indiana" & county=="clay" & births__total==430 replace state="iowa" if state=="indiana" & county=="clinton" & births__total==1264 replace state="iowa" if state=="indiana" & county=="clinton" & births__total==744 replace state="iowa" if state=="indiana" & county=="clinton" & births__total==520 replace state="iowa" if state=="indiana" & county=="crawford" & births__total==436 replace state="iowa" if state=="indiana" & county=="decatur" & births__total==192 replace state="iowa" if state=="indiana" & county=="delaware" & births__total==532 replace state="iowa" if state=="indiana" & county=="fayette" & births__total==628 replace state="iowa" if state=="indiana" & county=="floyd" & births__total==492 replace state="iowa" if state=="indiana" & county=="floyd" & births__total==210 replace state="iowa" if state=="indiana" & county=="floyd" & births__total==282 replace state="iowa" if state=="indiana" & county=="franklin" & births__total==316 replace state="iowa" if state=="indiana" & county=="greene" & births__total==312 replace state="iowa" if state=="indiana" & county=="carroll" & births__total==684 replace state="wisconsin" if state=="west virginia" & county=="adams" replace state="wisconsin" if state=="west virginia" & county=="ashland" replace state="wisconsin" if state=="west virginia" & county=="barron" replace state="wisconsin" if state=="west virginia" & county=="bayfield" replace state="wisconsin" if state=="west virginia" & county=="brown" replace state="wisconsin" if state=="west virginia" & county=="buffalo" replace state="wisconsin" if state=="west virginia" & county=="burnett" replace state="wisconsin" if state=="west virginia" & county=="calumet" replace state="wisconsin" if state=="west virginia" & county=="chippewa" replace state="wisconsin" if state=="west virginia" & county=="clark" replace state="wisconsin" if state=="west virginia" & county=="columbia" replace state="wisconsin" if state=="west virginia" & county=="crawford" replace state="wisconsin" if state=="west virginia" & county=="dane" replace state="wisconsin" if state=="west virginia" & county=="dodge" replace state="wisconsin" if state=="west virginia" & county=="door" replace state="wisconsin" if state=="west virginia" & county=="douglas" replace state="wisconsin" if state=="west virginia" & county=="dunn" replace state="wisconsin" if state=="west virginia" & county=="eau claire" replace state="wisconsin" if state=="west virginia" & county=="florence" replace state="wisconsin" if state=="west virginia" & county=="fond du lac" replace state="wisconsin" if state=="west virginia" & county=="forest" replace state="wisconsin" if state=="west virginia" & county=="green lake" replace state="wisconsin" if state=="west virginia" & county=="green" replace state="wisconsin" if state=="west virginia" & county=="iowa" replace state="wisconsin" if state=="west virginia" & county=="iron" replace state="wisconsin" if state=="west virginia" & county=="juneau" replace state="wisconsin" if state=="west virginia" & county=="kenosha" replace state="wisconsin" if state=="west virginia" & county=="kewaunee" replace state="wisconsin" if state=="west virginia" & county=="la crosse" replace state="wisconsin" if state=="west virginia" & county=="grant" & births__total==1194 replace state="wisconsin" if state=="west virginia" & county=="jackson" & births__total==320 replace state="wisconsin" if state=="west virginia" & county=="jefferson" & births__total==1134 replace state="wisconsin" if state=="west virginia" & county=="jefferson" & births__total==234 replace state="wisconsin" if state=="west virginia" & county=="jefferson" & births__total==288 replace state="wisconsin" if state=="west virginia" & county=="jefferson" & births__total==900 replace state="louisiana" if state=="kentucky" & county=="acadia" replace state="louisiana" if state=="kentucky" & county=="ascension" replace state="louisiana" if state=="kentucky" & county=="allen" & births__total==580 replace state="louisiana" if state=="kentucky" & county=="allen" & births__total==436 replace state="louisiana" if state=="kentucky" & county=="allen" & births__total==144 replace county="mclennan" if state=="texas" & county=="waco" replace city_balance_total="waco" if state=="texas" & county=="mclennan" & births__total==544 replace city_balance_total="waco" if state=="texas" & county=="mclennan" & births__total==1970 replace city_balance_total="waco" if state=="texas" & county=="mclennan" & births__total==2514 replace city_balance_total="san diego" if state=="california" & county=="san diego" & city_balance_total=="total" & births__total==13198 replace city_balance_total="san diego" if state=="california" & county=="san diego" & city_balance_total=="total" & births__total==1460 replace state="iowa" if state=="indiana" & births__total==64616 replace state="iowa" if state=="indiana" & births__total==63632 replace state="iowa" if state=="indiana" & births__total==984 replace state="louisiana" if state=="kentucky" & births__total==90968 replace state="louisiana" if state=="kentucky" & births__total==55296 replace state="louisiana" if state=="kentucky" & births__total==35672 replace city_balance_total="total" if state=="new jersey" & county=="essex" & city_balance_total=="balance of county" & births__total==20028 replace city_balance_total="total" if state=="new jersey" & county=="essex" & city_balance_total=="balance of county" & births__total==14102 replace city_balance_total="total" if state=="new jersey" & county=="essex" & city_balance_total=="balance of county" & births__total==5926 replace city_balance_total="greenville" if state=="texas" & county=="hunt" & births__total==334 replace city_balance_total="greenville" if state=="texas" & county=="hunt" & births__total==64 replace city_balance_total="balance of county" if state=="texas" & county=="hunt" & births__total==300 replace city_balance_total="balance of county" if state=="texas" & county=="hunt" & births__total==52 replace city_balance_total="beaumont" if state=="texas" & county=="jefferson" & births__total==1952 replace city_balance_total="beaumont" if state=="texas" & county=="jefferson" & births__total==1022 replace city_balance_total="port arthur" if state=="texas" & county=="jefferson" & births__total==1006 replace city_balance_total="port arthur" if state=="texas" & county=="jefferson" & births__total==522 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==9300 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==6610 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==2690 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==4994 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2220 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2774 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2020 replace state="wisconsin" if state=="west virginia" & births__total==94934 replace state="wisconsin" if state=="west virginia" & births__total==3698 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county="baltimore city" if county=="baltimore (city)" & state=="maryland" replace county="brooke" if state=="west virginia" & county=="brocke" replace county="cochrane" if county=="cochran" & state=="texas" replace county="desoto" if county=="de soto" & state=="florida" replace county="dickinson" if state=="kansas" & county=="dickincon" replace county="du page" if state=="illinois" & county=="dul page" replace county="morgan" if state=="west virginia" & county=="morgun" replace county="mclean" if state=="north dakota" & county=="mciean" replace county="neosho" if state=="kansas" & county=="necsho" replace county="orleans" if county=="orleans, coextensive with new orleans" & state=="louisiana" replace county="san francisco" if county=="san francisco, coextensive with san francisco (city)" & state=="california" replace county="somerset" if state=="maryland" & county=="scmerset" replace county="st louis city" if county=="st louis (city)" & state=="missouri" replace county="st genevieve" if state=="missouri" & county=="ste genevieve" replace county="philadelphia" if county=="philadelphia coextensive with philadelphia (city)" & state=="pennsylvania" *correct data entry errors found while checking that white+nonwhite=total replace births__total=1898 if births__total==1899 & state=="florida" & county=="hillsborough" & race=="nonwhite" & city_balance_total=="total" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total *not applicable; see footnote 1 *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_p label var births_nh_p "births by residence: physician not in hospital" rename births_of_residents_of_area__at1 births_m label var births_m "births by residence: midwife" *generate year variable gen year=1959 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1959.dta,replace clear ** *1960 data ** *http://nber15.nber.org/vital-stats-books/nat60_1.cv.pdf *table 3-1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1960.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. *check that all pdf pages appear to be in the data gen temp=strpos(page__of_pdf_, "-") gen newpagenumber=substr(page__of_pdf_,temp+1,.) destring newpagenumber, replace drop temp sort newpagenumber gen temp=newpagenumber[_n]-newpagenumber[_n-1] assert temp==0|temp==1|temp==. drop temp newpagenumber *clean data entry errors replace births__total=314 if state=="georgia" & county=="terrell" & city_balance_total=="total" & race=="nonwhite" replace county="parmer" if state=="texas" & county=="parker" & births__total==242 replace county="white" if state=="illinois" & county=="wayne" & births__total==370 replace county="white" if state=="indiana" & county=="wells" & births__total==464 replace births_of_residents_of_area__at0=196 if state=="florida" & county=="escambia" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__att=134 if state=="florida" & county=="putnam" & city_balance_total =="palatka" & race =="nonwhite" replace births_of_residents_of_area__att=188 if state=="georgia" & county=="grady" & city_balance_total =="total" & race =="white" replace births__total =312 if state=="georgia" & county=="terrell" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__at0=2 if state=="illinois" & county=="cook" & city_balance_total =="evergreen park" & race=="total" replace births_of_residents_of_area__at0=66 if state=="louisiana" & county=="natchitoches" & city_balance_total == "natchitoches" & race=="total" replace births__total =320 if state=="louisiana" & county=="tangipahoa" & city_balance_total =="hammond" & race=="total" replace births_of_residents_of_area__at0=62 if state=="maryland" & county=="anne arundel" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__att=382 if state=="nevada" & county=="washoe" & city_balance_total=="balance of county" & race=="total" replace births__total =6286 if state=="new york" & county=="monroe" & city_balance_total=="rochester" & race=="white" replace births__total =108 if state=="south dakota" & county=="faulk" & city_balance_total =="total" & race=="total" replace births_of_residents_of_area__att=188 if state=="texas" & county=="angelina" & city_balance_total =="total" & race=="nonwhite" replace births_of_residents_of_area__att=468 if state=="texas" & county=="brown" & city_balance_total=="total" & race=="total" replace births__total =168 if state=="texas" & county=="panola" & city_balance_total=="total" & race=="white" replace births__total=9398 if state=="vermont" & county=="total" & city_balance_total=="total" & race=="white" replace births__total =1146 if state=="vermont" & county=="chittenden" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__att=1138 if state=="vermont" & county=="chittenden" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__at0=8 if state=="vermont" & county=="chittenden" & city_balance_total=="balance of county" & race=="total" replace births__total =380 if state=="virginia" & county=="fauquier" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__att=6416 if state=="south carolina" & county=="charleston" & city_balance_total=="total" & race=="total" replace county="essex" if state=="vermont" & county=="essx" replace births__total =160 if state=="vermont" & county=="essex" & city_balance_total =="total" & race=="total" replace births_of_residents_of_area__att=156 if state=="vermont" & county=="essex" & city_balance_total =="total" & race=="total" replace births_of_residents_of_area__at0=4 if state=="vermont" & county=="essex" & city_balance_total =="total" & race=="total" replace county="fairfax" if state=="virginia" & county=="fsirfis" replace births__total=318 if state=="virginia" & county=="fairfax" & city_balance_total=="balance of county" & race=="nonwhite" replace city_balance_total="balance of county" if state=="texas" & county=="collin" & city_balance_total=="total" & race=="white" & births__total==518 replace city_balance_total="balance of county" if state=="texas" & county=="collin" & city_balance_total=="total" & race=="nonwhite" & births__total==72 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==8554 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==6030 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==2524 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==4830 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2234 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2596 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==2028 replace city_balance_total="sandusky" if state=="ohio" & county=="erie" & city_balance_total=="total" & race=="white" & births__total==690 replace city_balance_total="sandusky" if state=="ohio" & county=="erie" & city_balance_total=="total" & race=="nonwhite" & births__total==136 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="okfuskee" if state=="oklahoma" & births__total==76 & births_of_residents_of_area__att==52 replace county="okfuskee" if state=="oklahoma" & births__total==140 & births_of_residents_of_area__att==136 replace county="adams" if state=="colorado" & county=="adahs" replace county="baldwin" if state=="georgia" & county=="balowin" replace county="baylor" if state=="texas" & county=="bayldr" replace county="boone" if state=="west virginia" & county=="bcone" replace county="beauregard" if state=="louisiana" & county=="bealregard" replace county="bladen" if state=="north carolina" & county=="blanden" replace county="bosque" if state=="texas" & county=="bosgue" replace county="bowie" if state=="texas" & county=="bowte" replace county="catoosa" if state=="georgia" & county=="catodsa" replace county="cochrane" if county=="cochran" & state=="texas" replace county="colquitt" if state=="georgia" & county=="colouitt" replace county="dane" if state=="wisconsin" & county=="dame" replace county="de witt" if state=="texas" & county=="de sitt" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coex. with denver city" & state=="colorado" replace county="floyd" if state=="texas" & county=="floyo" replace county="fort bend" if state=="texas" & county=="fort bead" replace county="edwards" if state=="texas" & county=="edxaeds" replace county="franklin" if state=="vermont" & county=="franslin" replace county="hardeman" if state=="texas" & county=="haroeman" replace county="hillsdale" if state=="michigan" & county=="hillsoale" replace county="hopkins" if state=="kentucky" & county=="hopins" replace county="iroquois" if state=="illinois" & county=="iroguois" replace county="mcclain" if state=="oklahoma" & county=="mc clain" replace county="mccone" if state=="montana" & county=="mc cone" replace county="mccook" if state=="south dakota" & county=="mc cook" replace county="mccormick" if state=="south carolina" & county=="mc cormick" replace county="mccracken" if state=="kentucky" & county=="mc cracken" replace county="mccreary" if state=="kentucky" & county=="mc creary" replace county="mcculloch" if state=="texas" & county=="mc culloch" replace county="mccurtain" if state=="oklahoma" & county=="mc curtain" replace county="mcdonald" if state=="missouri" & county=="mc donald" replace county="mcdonough" if state=="illinois" & county=="mc donough" replace county="mcdowell" if state=="north carolina" & county=="mc dowell" replace county="mcdowell" if state=="west virginia" & county=="mc dowell" replace county="mcduffie" if state=="georgia" & county=="mc duffie" replace county="mchenry" if state=="illinois" & county=="mc henry" replace county="mchenry" if state=="north dakota" & county=="mc henry" replace county="mcintosh" if state=="georgia" & county=="mc intosh" replace county="mcintosh" if state=="north dakota" & county=="mc intosh" replace county="mcintosh" if state=="oklahoma" & county=="mc intosh" replace county="mckean" if state=="pennsylvania" & county=="mc kean" replace county="mckenzie" if state=="north dakota" & county=="mc kenzie" replace county="mckinley" if state=="new mexico" & county=="mc kinley" replace county="mclean" if state=="illinois" & county=="mc lean" replace county="mclean" if state=="kentucky" & county=="mc lean" replace county="mclean" if state=="north dakota" & county=="mc lean" replace county="mclennan" if state=="texas" & county=="mc lennan" replace county="mcleod" if state=="minnesota" & county=="mc leod" replace county="mcminn" if state=="tennessee" & county=="mc minn" replace county="mcmullen" if state=="texas" & county=="mc mullen" replace county="mcnairy" if state=="tennessee" & county=="mc nairy" replace county="mcpherson" if state=="kansas" & county=="mc pherson" replace county="mcpherson" if state=="nebraska" & county=="mc pherson" replace county="mcpherson" if state=="south dakota" & county=="mc pherson" replace county="milam" if state=="texas" & county=="milan" replace county="newton" if state=="texas" & county=="nexton" replace county="o'brien" if state=="iowa" & county=="o brien" replace county="orleans" if county=="orleans, coex. with new orleans city" & state=="louisiana" replace county="parker" if state=="texas" & county=="parkcr" replace county="philadelphia" if county=="philadelphia, coex. with philadelphia city" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coex. with san francisco city" & state=="california" replace county="schoolcraft" if state=="michigan" & county=="school craft" replace county="seminole" if state=="oklahoma" & county=="semincle" replace county="somerset" if state=="maine" & county=="someret" replace county="valencia" if state=="new mexico" & county=="valengia" replace county="wyoming" if state=="west virginia" & county=="wydming" replace county="erath" if state=="texas" & county=="esath" replace county="fisher" if state=="texas" & county=="fishee" replace state="district of columbia" if state=="dist. of columbia" replace county="franklin" if state=="massachusetts" & county=="frankl in" *correct data entry errors found while checking that county totals sum to state totals replace race = "total" if state=="illinois" & county=="white" & city_balance_total=="total" replace race = "total" if state=="indiana" & county=="white" & city_balance_total=="total" // changes for essex county, vermont, were made above (they had been changed, but incorrectly, above) *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park, part" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+ births_of_residents_of_area__at0 assert temp==births__total drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_ns label var births_nh_ns "births by residence: attendant not in hospital and not specified" *generate year variable gen year=1960 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1960.dta,replace clear ** *1961 data ** *http://nber15.nber.org/vital-stats-books/vsus_1961_1.pdf *table 3-1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1961.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0=0 if births_of_residents_of_area__at0 ==. *check that all pdf pages appear to be in the data gen temp=strpos(page__of_pdf_,"-") gen newpagenumber=substr(page__of_pdf_,temp+1,.) destring newpagenumber, replace drop temp sort newpagenumber gen temp=newpagenumber[_n]-newpagenumber[_n-1] assert temp==0|temp==1|temp==. drop temp newpagenumber *clean data entry errors replace city_balance_total="balance of city" if state=="texas" & county=="gregg" & births__total==272 replace city_balance_total="balance of city" if state=="texas" & county=="gregg" & births__total==172 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==8770 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==6234 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==2536 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==4836 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2156 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2680 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==1932 replace births_of_residents_of_area__at0=730 if state=="alabama" & county=="dallas" & city_balance_total =="total" & race=="total" replace births__total=464 if state=="arkansas" & county=="jefferson" & city_balance_total=="pine bluff" & race=="nonwhite" replace births_of_residents_of_area__at0=18 if state=="colorado" & county=="jefferson" & city_balance_total=="balance of county" & race=="total" replace births_of_residents_of_area__at0=132 if state=="florida" & county=="leon" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=58 if state=="georgia" & county=="spalding" & city_balance_total=="balance of county" & race=="nonwhite" replace births_of_residents_of_area__at0=266 if state=="georgia" & county=="sumter" & city_balance_total=="total" & race=="nonwhite" replace births__total=6246 if state=="hawaii" & county=="honolulu" & city_balance_total=="honolulu" & race=="nonwhite" replace births_of_residents_of_area__att=6234 if state=="hawaii" & county=="honolulu" & city_balance_total=="honolulu" & race=="nonwhite" replace births_of_residents_of_area__at0=12 if state=="hawaii" & county=="honolulu" & city_balance_total=="honolulu" & race=="nonwhite" replace births_of_residents_of_area__at0=18 if state=="indiana" & county=="madison" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=218 if state=="louisiana" & county=="total" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__at0=8 if state=="maine" & county=="cumberland" & city_balance_total=="westbrook" & race=="total" replace births__total =248 if state=="maryland" & county=="calvert" & city_balance_total=="total" & race=="nonwhite" replace births_of_residents_of_area__att=3138 if state=="michigan" & county=="jackson" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__at0=6 if state=="missouri" & county=="texas" & city_balance_total=="total" & race=="total" replace births_of_residents_of_area__att=558 if state=="south carolina" & county=="greenville" & city_balance_total=="balance of county" & race=="nonwhite" *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="st louis" if state=="missouri" & county=="bellefontaine" replace city_balance_total="bellefontaine neighbors" if state=="missouri" & city_balance_total=="neighbors" replace county="cherokee" if state=="iowa" & county=="cheroxee" replace county="catoosa" if state=="georgia" & county=="catodsa" replace county="clearfield" if state=="pennsylvania" & county=="clearfielo" replace county="cochrane" if county=="cochran" & state=="texas" replace county="colquitt" if state=="georgia" & county=="colouitt" replace county="crawford" if state=="iowa" & county=="crawforo" replace county="desoto" if county=="de soto" & state=="florida" replace county="dickinson" if state=="iowa" & county=="dicxinson" replace county="flathead" if state=="montana" & county=="fuathead" replace county="george" if state=="mississippi" & county=="gcorge" replace county="guilford" if state=="north carolina" & county=="guilforo" replace county="izard" if state=="arkansas" & county=="izaro" replace county="jackson" if state=="iowa" & county=="jacxson" replace county="kalkaska" if state=="michigan" & county=="kalxaska" replace county="kidder" if state=="north dakota" & county=="kioder" replace county="koochiching" if state=="minnesota" & county=="kodchiching" replace county="mcclain" if state=="oklahoma" & county=="mc clain" replace county="mccone" if state=="montana" & county=="mc cone" replace county="mccook" if state=="south dakota" & county=="mc cook" replace county="mccormick" if state=="south carolina" & county=="mc cormick" replace county="mccracken" if state=="kentucky" & county=="mc cracken" replace county="mccreary" if state=="kentucky" & county=="mc creary" replace county="mcculloch" if state=="texas" & county=="mc culloch" replace county="mccurtain" if state=="oklahoma" & county=="mc curtain" replace county="mcdonald" if state=="missouri" & county=="mc donald" replace county="mcdonough" if state=="illinois" & county=="mc donough" replace county="mcdowell" if state=="north carolina" & county=="mc dowell" replace county="mcdowell" if state=="west virginia" & county=="mc dowell" replace county="mcduffie" if state=="georgia" & county=="mc duffie" replace county="mchenry" if state=="illinois" & county=="mc henry" replace county="mchenry" if state=="north dakota" & county=="mc henry" replace county="mcintosh" if state=="georgia" & county=="mc intosh" replace county="mcintosh" if state=="north dakota" & county=="mc intosh" replace county="mcintosh" if state=="oklahoma" & county=="mc intosh" replace county="mckean" if state=="pennsylvania" & county=="mc kean" replace county="mckenzie" if state=="north dakota" & county=="mc kenzie" replace county="mckinley" if state=="new mexico" & county=="mc kinley" replace county="mclean" if state=="illinois" & county=="mc lean" replace county="mclean" if state=="kentucky" & county=="mc lean" replace county="mclean" if state=="north dakota" & county=="mc lean" replace county="mclennan" if state=="texas" & county=="mc lennan" replace county="mcleod" if state=="minnesota" & county=="mc leod" replace county="mcminn" if state=="tennessee" & county=="mc minn" replace county="mcmullen" if state=="texas" & county=="mc mullen" replace county="mcnairy" if state=="tennessee" & county=="mc nairy" replace county="mcpherson" if state=="kansas" & county=="mc pherson" replace county="mcpherson" if state=="nebraska" & county=="mc pherson" replace county="mcpherson" if state=="south dakota" & county=="mc pherson" replace county="o'brien" if state=="iowa" & county=="o brien" replace county="orleans" if county=="orleans, coex. with new orleans city" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coex. with philadelphia city" & state=="pennsylvania" replace county="polk" if state=="iowa" & county=="polx" replace county="pontotoc" if state=="mississippi" & county=="pontotdc" replace county="presque isle" if state=="michigan" & county=="presoue isle" replace county="poweshiek" if state=="iowa" & county=="powesriek" replace county="stoddard" if state=="missouri" & county=="stoddaro" replace county="montgomery" if state=="tennessee" & county=="tennessee" replace county="winona" if state=="minnesota" & county=="windna" replace county="denver" if county=="denver, coex, with denver city" & state=="colorado" replace county="floyd" if state=="iowa" & county=="floyo" replace county="jersey" if state=="illinois" & county=="jersev" replace county="san francisco" if county=="san francisco, coex with san francisco city" & state=="california" replace county="bayfield" if state=="wisconsin" & county=="bayfielo" replace county="parke" if state=="indiana" & county=="parxe" replace county="white" if state=="georgia" & county=="wheeler" & births__total==166 replace state="district of columbia" if state=="dist. of columbia" *correct data entry errors found while checking that county totals sum to state totals replace race = "total" if state=="georgia" & county=="white" & city_balance_total=="total" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park, part" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park," & state=="wyoming" |county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+ births_of_residents_of_area__at0 assert temp==births__total drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_ns label var births_nh_ns "births by residence: attendant not in hospital and not specified" *generate year variable gen year=1961 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1961.dta,replace clear ** *1962 data ** *http://nber15.nber.org/vital-stats-books/vsus_1962_1.pdf *table 2-1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1962.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. *check that all pdf pages appear to be in the data gen temp=strpos(page__of_pdf_, "-") gen newpagenumber=substr(page__of_pdf_,temp+1,.) destring newpagenumber, replace drop temp sort newpagenumber gen temp=newpagenumber[_n]-newpagenumber[_n-1] assert temp==0|temp==1|temp==. drop temp newpagenumber *clean data entry errors drop if state=="" *drops two empty observations duplicates report duplicates list duplicates drop *drops one observation which appears to have been entered twice replace state="north carolina" if state=="n. carolina" replace state="north carolina" if state=="n.carolina" replace births_of_residents_of_area__att=134 if state=="georgia" & county=="henry" & city_balance_total =="total" & race=="nonwhite" replace births__total=3198 if state=="missouri" & county=="jackson" & city_balance_total=="kansas city, total" & race=="nonwhite" replace births_of_residents_of_area__att=52 if state=="virginia" & county=="craig" & city_balance_total=="total" & race=="total" replace city_balance_total="balance of county" if state=="alabama" & county=="madison" & city_balance_total=="total" & race=="white" & births__total==946 replace city_balance_total="balance of county" if state=="alabama" & county=="lee" & city_balance_total=="total" & race=="nonwhite" & births__total==280 replace city_balance_total="balance of county" if state=="alabama" & county=="madison" & city_balance_total=="total" & race=="nonwhite" & births__total==362 replace city_balance_total="balance of county" if state=="alabama" & county=="lee" & city_balance_total=="total" & race=="white" & births__total==142 replace city_balance_total="balance of county" if state=="arizona" & county=="coconino" & city_balance_total=="total" & race=="white" & births__total==400 replace city_balance_total="balance of county" if state=="arizona" & county=="coconino" & city_balance_total=="total" & race=="nonwhite" & births__total==626 replace city_balance_total="balance of county" if state=="arkansas" & county=="mississippi" & city_balance_total=="total" & race=="nonwhite" & births__total==480 replace city_balance_total="balance of county" if state=="arkansas" & county=="mississippi" & city_balance_total=="total" & race=="white" & births__total==982 replace city_balance_total="balance of county" if state=="georgia" & county=="clayton" & city_balance_total=="total" & race=="white" & births__total==692 replace city_balance_total="balance of county" if state=="georgia" & county=="clayton" & city_balance_total=="total" & race=="nonwhite" & births__total==80 replace city_balance_total="balance of city" if state=="new jersey" & county=="ocean" & city_balance_total=="total" & race=="total" & births__total==2718 replace city_balance_total="galena park" if state=="texas" & county=="harris" & city_balance_total=="total" & race=="white" & births__total==200 replace city_balance_total="galena park" if state=="texas" & county=="harris" & city_balance_total=="total" & race=="nonwhite" & births__total==46 replace city_balance_total="houston" if state=="texas" & county=="harris" & city_balance_total=="total" & race=="white" & births__total==18640 replace city_balance_total="houston" if state=="texas" & county=="harris" & city_balance_total=="total" & race=="nonwhite" & births__total==7826 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="total" & births__total==8880 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="white" & births__total==6116 replace county="norfolk (ind. city)" if county=="norfolk" & state=="virginia" & race=="nonwhite" & births__total==2764 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==4762 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2182 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2580 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==1976 replace city_balance_total="seattle" if state=="washington" & county=="king" & city_balance_total=="total" & race=="white" & births__total==10046 replace city_balance_total="seattle" if state=="washington" & county=="king" & city_balance_total=="total" & race=="nonwhite" & births__total==1458 replace county="erie" if state=="new york" & county=="new york" & (city_balance_total=="tonawanda" | city_balance_total=="balance of county") *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="antrim" if state=="michigan" & county=="antkim" replace county="adair" if state=="iowa" & county=="aoair" replace county="caldwell" if state=="north carolina" & county=="calowell" replace county="chautauqua" if state=="new york" & county=="chautaudua" replace county="cochrane" if county=="cochran" & state=="texas" replace county="de kalb" if state=="illinois" & county=="de kals" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coex. with denver city" & state=="colorado" replace county="mcclain" if state=="oklahoma" & county=="mc clain" replace county="mccone" if state=="montana" & county=="mc cone" replace county="mccook" if state=="south dakota" & county=="mc cook" replace county="mccormick" if state=="south carolina" & county=="mc cormick" replace county="mccracken" if state=="kentucky" & county=="mc cracken" replace county="mccreary" if state=="kentucky" & county=="mc creary" replace county="mcculloch" if state=="texas" & county=="mc culloch" replace county="mccurtain" if state=="oklahoma" & county=="mc curtain" replace county="mcdonald" if state=="missouri" & county=="mc donald" replace county="mcdonough" if state=="illinois" & county=="mc donough" replace county="mcdowell" if state=="north carolina" & county=="mc dowell" replace county="mcdowell" if state=="west virginia" & county=="mc dowell" replace county="mcduffie" if state=="georgia" & county=="mc duffie" replace county="mchenry" if state=="illinois" & county=="mc henry" replace county="mchenry" if state=="north dakota" & county=="mc henry" replace county="mcintosh" if state=="georgia" & county=="mc intosh" replace county="mcintosh" if state=="north dakota" & county=="mc intosh" replace county="mcintosh" if state=="oklahoma" & county=="mc intosh" replace county="mckean" if state=="pennsylvania" & county=="mc kean" replace county="mckenzie" if state=="north dakota" & county=="mc kenzie" replace county="mckinley" if state=="new mexico" & county=="mc kinley" replace county="mclean" if state=="illinois" & county=="mc lean" replace county="mclean" if state=="kentucky" & county=="mc lean" replace county="mclean" if state=="north dakota" & county=="mc lean" replace county="mclennan" if state=="texas" & county=="mc lennan" replace county="mcleod" if state=="minnesota" & county=="mc leod" replace county="mcminn" if state=="tennessee" & county=="mc minn" replace county="mcmullen" if state=="texas" & county=="mc mullen" replace county="mcnairy" if state=="tennessee" & county=="mc nairy" replace county="mcpherson" if state=="kansas" & county=="mc pherson" replace county="mcpherson" if state=="nebraska" & county=="mc pherson" replace county="mcpherson" if state=="south dakota" & county=="mc pherson" replace county="union" if state=="new jersey" & county=="new jersey" replace county="o'brien" if state=="iowa" & county=="o brien" replace county="orleans" if county=="orleans, coex. with new orleans city" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coex, with philadelphia city" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coex. with san francisco city" & state=="california" replace county="santa clara" if state=="california" & county=="santa clar" replace county="wilkinson" if state=="georgia" & county=="wilkenson" replace county="white" if state=="georgia" & county=="wheeler" & births__total==190 *correct data entry errors found while checking that county totals sum to state totals replace race = "total" if state=="georgia" & county=="white" & city_balance_total=="total" *correct data entry errors found while checking that white+nonwhite=total drop if state=="new jersey" & race!="total" // new jersey did not report by race in 1962 or 1963 *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park, part" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park," & state=="wyoming" |county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+ births_of_residents_of_area__at0 assert temp==births__total drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_ns label var births_nh_ns "births by residence: attendant not in hospital and not specified" *generate year variable gen year=1962 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1962.dta,replace clear ** *1963 data ** *http://nber15.nber.org/vital-stats-books/nat63_1.cv.pdf *table 2-1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1963.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births_by_place_of_residence__to==. replace births_by_place_of_residence__to=0 if births_by_place_of_residence__to ==. count if births_by_place_of_residence__at==. replace births_by_place_of_residence__at =0 if births_by_place_of_residence__at ==. count if births_by_place_of_residence__a0==. replace births_by_place_of_residence__a0 =0 if births_by_place_of_residence__a0 ==. *check that all pdf pages appear to be in the data gen temp=strpos(page__of_pdf_,"-") gen newpagenumber=substr(page__of_pdf_,temp+1,.) destring newpagenumber, replace drop temp sort newpagenumber gen temp=newpagenumber[_n]-newpagenumber[_n-1] assert temp==0|temp==1|temp==. drop temp newpagenumber *clean data entry errors replace births_by_place_of_residence__at=1148 if state=="alaska" & county=="dist. 19, fairbanks" & city_balance_total=="balance of district" & race=="total" replace births_by_place_of_residence__a0=90 if state=="texas" & county=="lamar" & city_balance_total=="paris" & race=="total" replace births_by_place_of_residence__a0=6 if state=="texas" & county=="lamar" & city_balance_total=="paris" & race=="white" replace births_by_place_of_residence__a0=84 if state=="texas" & county=="lamar" & city_balance_total=="paris" & race=="nonwhite" *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="cochrane" if county=="cochran" & state=="texas" replace county="davidson" if state=="tennessee" & county=="davidson, coex. with nashville city" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coex. with denver city" & state=="colorado" replace county="mcclain" if state=="oklahoma" & county=="mc clain" replace county="mccone" if state=="montana" & county=="mc cone" replace county="mccook" if state=="south dakota" & county=="mc cook" replace county="mccormick" if state=="south carolina" & county=="mc cormick" replace county="mccracken" if state=="kentucky" & county=="mc cracken" replace county="mccreary" if state=="kentucky" & county=="mc creary" replace county="mcculloch" if state=="texas" & county=="mc culloch" replace county="mccurtain" if state=="oklahoma" & county=="mc curtain" replace county="mcdonald" if state=="missouri" & county=="mc donald" replace county="mcdonough" if state=="illinois" & county=="mc donough" replace county="mcdowell" if state=="north carolina" & county=="mc dowell" replace county="mcdowell" if state=="west virginia" & county=="mc dowell" replace county="mcduffie" if state=="georgia" & county=="mc duffie" replace county="mchenry" if state=="illinois" & county=="mc henry" replace county="mchenry" if state=="north dakota" & county=="mc henry" replace county="mcintosh" if state=="georgia" & county=="mc intosh" replace county="mcintosh" if state=="north dakota" & county=="mc intosh" replace county="mcintosh" if state=="oklahoma" & county=="mc intosh" replace county="mckean" if state=="pennsylvania" & county=="mc kean" replace county="mckenzie" if state=="north dakota" & county=="mc kenzie" replace county="mckinley" if state=="new mexico" & county=="mc kinley" replace county="mclean" if state=="illinois" & county=="mc lean" replace county="mclean" if state=="kentucky" & county=="mc lean" replace county="mclean" if state=="north dakota" & county=="mc lean" replace county="mclennan" if state=="texas" & county=="mc lennan" replace county="mcleod" if state=="minnesota" & county=="mc leod" replace county="mcminn" if state=="tennessee" & county=="mc minn" replace county="mcmullen" if state=="texas" & county=="mc mullen" replace county="mcnairy" if state=="tennessee" & county=="mc nairy" replace county="mcpherson" if state=="kansas" & county=="mc pherson" replace county="mcpherson" if state=="nebraska" & county=="mc pherson" replace county="mcpherson" if state=="south dakota" & county=="mc pherson" replace county="o'brien" if state=="iowa" & county=="o brien" replace county="orleans" if county=="orleans, coex. with new orleans city" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coex. with philaoelphia city" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coex. with san francisco city." & state=="california" replace county="fairfax (ind. city)" if county=="fairfax" & state=="virginia" & race=="total" & births_by_place_of_residence__to==476 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & race=="total" & births_by_place_of_residence__to==178 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & race=="white" & births_by_place_of_residence__to==38 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & race=="nonwhite" & births_by_place_of_residence__to==140 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births_by_place_of_residence__to==4626 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births_by_place_of_residence__to==2090 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births_by_place_of_residence__to==2536 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births_by_place_of_residence__to==1934 *correct data entry errors found while checking that white+nonwhite=total drop if state=="new jersey" & race!="total" // new jersey did not report by race in 1962 or 1963 *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park, part" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park," & state=="wyoming" |county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_by_place_of_residence__at+ births_by_place_of_residence__a0 assert temp==births_by_place_of_residence__to drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births_by_place_of_residence__to births label var births "births by residence" rename births_by_place_of_residence__at births_h_p label var births_h_p "births by residence: physician in hospital" rename births_by_place_of_residence__a0 births_nh_ns label var births_nh_ns "births by residence: attendant not in hospital and not specified" *generate year variable gen year=1963 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1963.dta,replace clear ** *1964 data ** *http://nber15.nber.org/vital-stats-books/nat64_1.cv.pdf *table 2-1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1964.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births__total==. replace births__total=0 if births__total ==. count if births_of_residents_of_area__att==. replace births_of_residents_of_area__att =0 if births_of_residents_of_area__att ==. count if births_of_residents_of_area__at0==. replace births_of_residents_of_area__at0 =0 if births_of_residents_of_area__at0 ==. *check that all pdf pages appear to be in the data gen temp=strpos(page__of_pdf_, "-") gen newpagenumber=substr(page__of_pdf_,temp+1,.) destring newpagenumber, replace drop temp sort newpagenumber gen temp=newpagenumber[_n]-newpagenumber[_n-1] assert temp==0|temp==1|temp==. drop temp newpagenumber *clean data entry errors replace births_of_residents_of_area__att=518 if state=="new hampshire" & county=="strafford" & city_balance_total=="balance" & race=="total" replace births_of_residents_of_area__at0=2 if state=="new hampshire" & county=="strafford" & city_balance_total=="balance" & race=="total" replace county="oakland" if state=="michigan" & county=="oceana" & city_balance_total=="balance" & births__total==6286 replace county="ottawa" if state=="michigan" & county=="presque isle" & city_balance_total=="balance" & births__total==1816 replace city_balance_total="coffeyville" if state=="kansas" & county=="montgomery" & city_balance_total=="total" & race=="white" & births__total==214 replace city_balance_total="coffeyville" if state=="kansas" & county=="montgomery" & city_balance_total=="total" & race=="nonwhite" & births__total==38 replace city_balance_total="balance" if state=="montana" & county=="hill" & city_balance_total=="total" & race=="white" & births__total==116 replace city_balance_total="balance" if state=="montana" & county=="hill" & city_balance_total=="total" & race=="nonwhite" & births__total==74 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="cochrane" if county=="cochran" & state=="texas" replace county="desoto" if county=="de soto" & state=="florida" replace county="mcclain" if state=="oklahoma" & county=="mc clain" replace county="mccormick" if state=="south carolina" & county=="mc cormick" replace county="mccracken" if state=="kentucky" & county=="mc cracken" replace county="mccreary" if state=="kentucky" & county=="mc creary" replace county="mcculloch" if state=="texas" & county=="mc culloch" replace county="mccurtain" if state=="oklahoma" & county=="mc curtain" replace county="mcdonough" if state=="illinois" & county=="mc donough" replace county="mcdowell" if state=="west virginia" & county=="mc dowell" replace county="mcduffie" if state=="georgia" & county=="mc duffie" replace county="mchenry" if state=="illinois" & county=="mc henry" replace county="mchenry" if state=="north dakota" & county=="mc henry" replace county="mcintosh" if state=="georgia" & county=="mc intosh" replace county="mcintosh" if state=="north dakota" & county=="mc intosh" replace county="mcintosh" if state=="oklahoma" & county=="mc intosh" replace county="mckenzie" if state=="north dakota" & county=="mc kenzie" replace county="mckinley" if state=="new mexico" & county=="mc kinley" replace county="mclean" if state=="illinois" & county=="mc lean" replace county="mclean" if state=="kentucky" & county=="mc lean" replace county="mclean" if state=="north dakota" & county=="mc lean" replace county="mclennan" if state=="texas" & county=="mc lennan" replace county="mcleod" if state=="minnesota" & county=="mc leod" replace county="mcminn" if state=="tennessee" & county=="mc minn" replace county="mcmullen" if state=="texas" & county=="mc mullen" replace county="mcnairy" if state=="tennessee" & county=="mc nairy" replace county="mcpherson" if state=="kansas" & county=="mc pherson" replace county="san francisco" if county=="san francisco, coex. with san francisco city." & state=="california" replace county="floyd" if state=="texas" & county=="floye" replace county="jackson" if state=="south dakota" & county=="jackson (+81 includes washabaugh)" replace state="virginia" if state=="independent cities" replace county="mcdonald" if state=="missouri" & county=="mc donal d" replace city_balance_total="bridgeport" if state=="connecticut" & county=="fairfield" & births__total==3078 replace city_balance_total="bridgeport" if state=="connecticut" & county=="fairfield" & births__total==752 replace city_balance_total="hartford" if state=="connecticut" & county=="hartford" & births__total==2812 replace city_balance_total="hartford" if state=="connecticut" & county=="hartford" & births__total==1252 replace city_balance_total="new haven" if state=="connecticut" & county=="new haven" & births__total==2186 replace city_balance_total="new haven" if state=="connecticut" & county=="new haven" & births__total==1076 replace city_balance_total="gainesville" if state=="florida" & county=="alachua" & births__total==1016 replace city_balance_total="gainesville" if state=="florida" & county=="alachua" & births__total==366 replace city_balance_total="balance" if state=="florida" & county=="alachua" & births__total==482 replace city_balance_total="balance" if state=="florida" & county=="alachua" & births__total==308 replace city_balance_total="panama city" if state=="florida" & county=="bay" & births__total==580 replace city_balance_total="panama city" if state=="florida" & county=="bay" & births__total==252 replace city_balance_total="cocoa" if state=="florida" & county=="brevard" & births__total==402 replace city_balance_total="cocoa" if state=="florida" & county=="brevard" & births__total==186 replace city_balance_total="melbourne" if state=="florida" & county=="brevard" & births__total==256 replace city_balance_total="melbourne" if state=="florida" & county=="brevard" & births__total==136 replace city_balance_total="fort lauderdale" if state=="florida" & county=="broward" & births__total==1392 replace city_balance_total="fort lauderdale" if state=="florida" & county=="broward" & births__total==956 replace city_balance_total="hallandale" if state=="florida" & county=="broward" & births__total==102 replace city_balance_total="hallandale" if state=="florida" & county=="broward" & births__total==182 replace city_balance_total="pompano beach" if state=="florida" & county=="broward" & births__total==342 replace city_balance_total="pompano beach" if state=="florida" & county=="broward" & births__total==460 replace city_balance_total="balance" if state=="florida" & county=="broward" & births__total==2342 replace city_balance_total="balance" if state=="florida" & county=="broward" & births__total==696 replace city_balance_total="miami" if state=="florida" & county=="dade" & births__total==3308 replace city_balance_total="miami" if state=="florida" & county=="dade" & births__total==2426 replace city_balance_total="balance" if state=="florida" & county=="dade" & births__total==7706 replace city_balance_total="balance" if state=="florida" & county=="dade" & births__total==2590 replace city_balance_total="jacksonville" if state=="florida" & county=="duval" & births__total==2284 replace city_balance_total="jacksonville" if state=="florida" & county=="duval" & births__total==2328 replace city_balance_total="balance" if state=="florida" & county=="marion" & births__total==452 replace city_balance_total="balance" if state=="florida" & county=="marion" & births__total==428 replace city_balance_total="wilmington" if state=="delaware" & county=="new castle" & births__total==1564 replace city_balance_total="wilmington" if state=="delaware" & county=="new castle" & births__total==970 replace city_balance_total="balance" if state=="delaware" & county=="new castle" & births__total==4342 replace city_balance_total="balance" if state=="delaware" & county=="new castle" & births__total==346 replace city_balance_total="balance" if state=="georgia" & county=="clarke" & births__total==228 replace city_balance_total="balance" if state=="georgia" & county=="clarke" & births__total==82 replace city_balance_total="balance" if state=="georgia" & county=="colquitt" & births__total==110 replace city_balance_total="balance" if state=="georgia" & county=="glynn" & births__total==574 replace city_balance_total="balance" if state=="georgia" & county=="glynn" & births__total==86 replace city_balance_total="junction city" if state=="kansas" & county=="geary" & births__total==612 replace city_balance_total="junction city" if state=="kansas" & county=="geary" & births__total==118 replace city_balance_total="balance" if state=="georgia" & county=="colquitt" & city_balance_total=="total" & births__total==232 replace county="montgomery" if state=="maryland" & county=="prince georges" & births__total==326 replace county="otsego" if state=="michigan" & county=="ottawa" & births__total==180 replace county="roscommon" if state=="michigan" & county=="saginaw" & births__total==120 replace city_balance_total="ecorse" if state=="michigan" & county=="wayne" & births__total==224 replace city_balance_total="highland park" if state=="michigan" & county=="wayne" & births__total==356 replace county="oscoda" if state=="michigan" & county=="otsego" & births__total==70 replace county="presque isle" if state=="michigan" & county=="roscommon" & births__total==258 replace county="osceola" if state=="michigan" & county=="oscoda" & births__total==306 replace city_balance_total="ecorse" if state=="michigan" & county=="wayne" & city_balance_total=="total" & race=="nonwhite" & births__total==172 replace city_balance_total="highland park" if state=="michigan" & county=="wayne" & births__total==292 replace county="ontonagon" if state=="michigan" & county=="osceola" & births__total==206 replace county="ogemaw" if state=="michigan" & county=="ontonagon" & births__total==176 replace county="oceana" if state=="michigan" & county=="ogemaw" & births__total==314 replace city_balance_total="balance" if state=="missouri" & county=="st louis" & births__total==9206 replace city_balance_total="balance" if state=="missouri" & county=="st louis" & births__total==424 replace county="yellowstone national park, part" if state=="montana" & county=="yellowstone" & births__total==0 replace county="park" if state=="montana" & county=="park (excl yell nat park)" replace county="mcpherson" if state=="nebraska" & county=="madison" & births__total==10 replace city_balance_total="atlantic city" if state=="new jersey" & county=="atlantic" & births__total==514 replace city_balance_total="atlantic city" if state=="new jersey" & county=="atlantic" & births__total==556 replace city_balance_total="pleasantville" if state=="new jersey" & county=="atlantic" & births__total==242 replace city_balance_total="pleasantville" if state=="new jersey" & county=="atlantic" & births__total==86 replace city_balance_total="balance" if state=="new jersey" & county=="cumberland" & births__total==452 replace city_balance_total="balance" if state=="new jersey" & county=="cumberland" & births__total==226 replace city_balance_total="paterson" if state=="new jersey" & county=="passaic" & births__total==2384 replace city_balance_total="paterson" if state=="new jersey" & county=="passaic" & births__total==1226 replace city_balance_total="balance" if state=="north carolina" & county=="craven" & births__total==1136 replace city_balance_total="balance" if state=="north carolina" & county=="craven" & births__total==328 replace county="mahoning" if state=="ohio" & county=="madison" & births__total==5096 replace city_balance_total="balance" if state=="ohio" & county=="mahoning" & births__total==2006 replace city_balance_total="balance" if state=="ohio" & county=="mahoning" & births__total==4 replace city_balance_total="warren" if state=="ohio" & county=="trumbull" & births__total==1102 replace city_balance_total="warren" if state=="ohio" & county=="trumbull" & births__total==202 replace city_balance_total="ardmore" if state=="oklahoma" & county=="carter" & births__total==322 replace city_balance_total="ardmore" if state=="oklahoma" & county=="carter" & births__total==102 replace city_balance_total="lawton" if state=="oklahoma" & county=="comanche" & births__total==2144 replace city_balance_total="lawton" if state=="oklahoma" & county=="comanche" & births__total==396 replace city_balance_total="balance" if state=="oklahoma" & county=="comanche" & births__total==270 replace city_balance_total="balance" if state=="oklahoma" & county=="comanche" & births__total==86 replace city_balance_total="sapulpa" if state=="oklahoma" & county=="creek" & births__total==220 replace city_balance_total="sapulpa" if state=="oklahoma" & county=="creek" & births__total==46 replace city_balance_total="balance" if state=="oklahoma" & county=="muskogee" & births__total==292 replace city_balance_total="balance" if state=="oklahoma" & county=="muskogee" & births__total==86 replace city_balance_total="oklahoma city, total" if state=="oklahoma" & county=="oklahoma" & births__total==7054 replace city_balance_total="balance" if state=="oklahoma" & county=="oklahoma" & births__total==642 replace city_balance_total="balance" if state=="oklahoma" & county=="oklahoma" & births__total==176 replace city_balance_total="balance" if state=="oklahoma" & county=="seminole" & births__total==194 replace city_balance_total="balance" if state=="oklahoma" & county=="seminole" & births__total==116 replace city_balance_total="tulsa, total" if state=="oklahoma" & county=="tulsa" & births__total==4720 replace city_balance_total="tulsa, total" if state=="oklahoma" & county=="tulsa" & births__total==910 replace city_balance_total="balance" if state=="oklahoma" & county=="tulsa" & births__total==1350 replace city_balance_total="balance" if state=="oklahoma" & county=="tulsa" & births__total==80 replace city_balance_total="braddock" if state=="pennsylvania" & county=="allegheny" & births__total==98 & city_balance_total=="total" replace city_balance_total="braddock" if state=="pennsylvania" & county=="allegheny" & births__total==88 replace city_balance_total="shelbyville" if state=="tennessee" & county=="bedford" & births__total==232 replace city_balance_total="shelbyville" if state=="tennessee" & county=="bedford" & births__total==56 replace city_balance_total="chattanooga" if state=="tennessee" & county=="hamilton" & births__total==1604 replace city_balance_total="chattanooga" if state=="tennessee" & county=="hamilton" & births__total==1116 replace city_balance_total="jackson" if state=="tennessee" & county=="madison" & births__total==734 replace city_balance_total="jackson" if state=="tennessee" & county=="madison" & births__total==414 replace city_balance_total="columbia" if state=="tennessee" & county=="maury" & births__total==348 replace city_balance_total="columbia" if state=="tennessee" & county=="maury" & births__total==130 replace city_balance_total="balance" if state=="tennessee" & county=="maury" & births__total==422 replace city_balance_total="oklahoma city, total" if state=="oklahoma" & county=="oklahoma" & births__total==1602 replace county="marion" if state=="ohio" & county=="mahoning" & births__total==1308 replace county="medina" if state=="ohio" & county=="marion" & births__total==1420 replace race="white" if state=="pennsylvania" & county=="allegheny" & city_balance_total=="braddock" & race=="nonwhite" & births__total==98 replace race="white" if state=="tennessee" & county=="madison" & city_balance_total=="jackson" & race=="total" & births__total==414 replace city_balance_total="balance" if state=="tennessee" & county=="madison" & births__total==298 replace city_balance_total="balance" if state=="tennessee" & county=="madison" & births__total==304 replace race="white" if state=="tennessee" & county=="maury" & births__total==684 & births_of_residents_of_area__att==670 replace race="nonwhite" if state=="tennessee" & county=="maury" & births__total==216 & births_of_residents_of_area__att==178 replace city_balance_total="columbia" if state=="tennessee" & county=="maury" & births__total==478 & births_of_residents_of_area__att==452 replace race="total" if state=="tennessee" & county=="maury" & births__total==478 & births_of_residents_of_area__att==452 replace city_balance_total="columbia" if state=="tennessee" & county=="maury" & births__total==348 & births_of_residents_of_area__att==346 replace race="white" if state=="tennessee" & county=="maury" & births__total==348 & births_of_residents_of_area__att==346 replace city_balance_total="columbia" if state=="tennessee" & county=="maury" & births__total==130 & births_of_residents_of_area__att==106 replace race="nonwhite" if state=="tennessee" & county=="maury" & births__total==130 & births_of_residents_of_area__att==106 replace city_balance_total="balance" if state=="tennessee" & county=="maury" & city_balance_total=="total" & race=="nonwhite" & births__total==422 & births_of_residents_of_area__att==396 replace race="total" if state=="tennessee" & county=="maury" & city_balance_total=="balance" & births__total==422 & births_of_residents_of_area__att==396 replace city_balance_total="balance" if state=="tennessee" & county=="maury" & city_balance_total=="columbia" & race=="total" & births__total==336 & births_of_residents_of_area__att==324 replace race="white" if state=="tennessee" & county=="maury" & city_balance_total=="balance" & births__total==336 & births_of_residents_of_area__att==324 replace city_balance_total="balance" if state=="tennessee" & county=="maury" & city_balance_total=="columbia" & race=="white" & births__total==86 & births_of_residents_of_area__att==72 replace race="nonwhite" if state=="tennessee" & county=="maury" & city_balance_total=="balance" & births__total==86 & births_of_residents_of_area__att==72 replace city_balance_total="balance" if state=="tennessee" & county=="rutherford" & births__total==804 replace city_balance_total="balance" if state=="tennessee" & county=="rutherford" & births__total==126 replace city_balance_total="waxahachie" if state=="texas" & county=="ellis" & births__total==174 replace city_balance_total="waxahachie" if state=="texas" & county=="ellis" & births__total==44 replace city_balance_total="balance" if state=="texas" & county=="ellis" & births__total==420 replace city_balance_total="balance" if state=="texas" & county=="ellis" & births__total==204 replace city_balance_total="balance" if state=="virginia" & county=="fairfax" & births__total==308 replace city_balance_total="seattle" if state=="washington" & county=="king" & births__total==8060 replace city_balance_total="seattle" if state=="washington" & county=="king" & births__total==1370 replace city_balance_total="bluefield" if state=="west virginia" & county=="mercer" & births__total==338 replace city_balance_total="bluefield" if state=="west virginia" & county=="mercer" & births__total==80 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births__total==4834 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births__total==2276 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births__total==2558 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births__total==1794 replace race="white" if state=="tennessee" & county=="madison" & city_balance_total=="balance" & race=="total" & births__total==298 replace births__total=684 if state=="tennessee" & county=="maury" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__att=670 if state=="tennessee" & county=="maury" & city_balance_total=="total" & race=="white" replace births_of_residents_of_area__at0=14 if state=="tennessee" & county=="maury" & city_balance_total=="total" & race=="white" replace city_balance_total="balance of county" if state=="virginia" & county=="fairfax" & births__total==6454 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & race=="total" & births__total==164 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & race=="white" & births__total==54 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & race=="nonwhite" & births__total==110 replace county="meigs" if state=="ohio" & county=="medina" & births__total==344 replace county="mercer" if state=="ohio" & county=="meigs" & births__total==780 replace county="miami" if state=="ohio" & county=="mercer" & births__total==1560 replace county="monroe" if state=="ohio" & county=="miami" & births__total==324 replace county="montgomery" if state=="ohio" & county=="monroe" & births__total==11942 drop if state=="virginia" & county=="fairfax" & births__total==370 replace county="morgan" if state=="ohio" & county=="montgomery" & births__total==228 replace race="nonwhite" if state=="tennessee" & county=="madison" & births__total==304 replace city_balance_total="balance" if state=="virginia" & county=="fairfax" & births__total==6454 replace county="morrow" if state=="ohio" & county=="morgan" & births__total==390 replace county="muskingum" if state=="ohio" & county=="morrow" & births__total==1664 replace county="noble" if state=="ohio" & county=="muskingum" & births__total==174 replace county="ottawa" if state=="ohio" & county=="noble" & births__total==666 replace county="paulding" if state=="ohio" & county=="ottawa" & births__total==378 replace county="perry" if state=="ohio" & county=="paulding" & births__total==546 replace county="pickaway" if state=="ohio" & county=="perry" & births__total==798 replace county="pike" if state=="ohio" & county=="pickaway" & births__total==404 replace county="portage" if state=="ohio" & county=="pike" & births__total==2100 replace county="preble" if state=="ohio" & county=="portage" & births__total==660 replace county="putnam" if state=="ohio" & county=="preble" & births__total==766 replace county="richland" if state=="ohio" & county=="putnam" & births__total==2636 replace county="ross" if state=="ohio" & county=="richland" & births__total==1332 replace county="sandusky" if state=="ohio" & county=="ross" & births__total==1240 replace county="scioto" if state=="ohio" & county=="sandusky" & births__total==1618 replace county="seneca" if state=="ohio" & county=="scioto" & births__total==1362 replace county="shelby" if state=="ohio" & county=="seneca" & births__total==810 replace county="stark" if state=="ohio" & county=="shelby" & births__total==6844 replace county="summit" if state=="ohio" & county=="stark" & births__total==10890 *correct data entry errors found while checking that county totals sum to state totals replace race="total" if state=="tennessee" & county=="mcminn" replace city_balance_total="total" if state=="tennessee" & county=="mcminn" & births__total==712 replace city_balance_total="athens" if state=="tennessee" & county=="mcminn" & births__total==250 replace city_balance_total="balance" if state=="tennessee" & county=="mcminn" & births__total==462 replace race="total" if state=="tennessee" & county=="mcnairy" replace city_balance_total="total" if state=="tennessee" & county=="mcnairy" replace city_balance_total="total" if state=="tennessee" & county=="madison" & births__total==1336 replace race="white" if state=="tennessee" & county=="madison" & births__total==712 replace city_balance_total="total" if state=="tennessee" & county=="madison" & births__total==712 replace race="nonwhite" if state=="tennessee" & county=="madison" & births__total==624 replace city_balance_total="total" if state=="tennessee" & county=="madison" & births__total==624 replace race="nonwhite" if state=="tennessee" & county=="madison" & births__total==320 replace city_balance_total="jackson" if state=="tennessee" & county=="madison" & births__total==320 replace race="total" if state=="tennessee" & county=="madison" & births__total==602 replace city_balance_total="balance" if state=="tennessee" & county=="madison" & births__total==602 replace race="white" if state=="tennessee" & county=="madison" & births__total==298 replace race="nonwhite" if state=="tennessee" & county=="madison" & births__total==304 replace race="total" if state=="tennessee" & county=="marion" replace city_balance_total="total" if state=="tennessee" & county=="marion" replace city_balance_total="total" if state=="tennessee" & county=="marshall" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park, part" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park," & state=="wyoming" |county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_of_residents_of_area__att+ births_of_residents_of_area__at0 assert temp==births__total drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births__total births label var births "births by residence" rename births_of_residents_of_area__att births_h_p label var births_h_p "births by residence: physician in hospital" rename births_of_residents_of_area__at0 births_nh_ns label var births_nh_ns "births by residence: attendant not in hospital and not specified" *generate year variable gen year=1964 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1964.dta,replace clear ** *1965 data ** *http://nber15.nber.org/vital-stats-books/nat65_1.cv.pdf *table 2-1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1965.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births_by_place_of_residence__to==. replace births_by_place_of_residence__to=0 if births_by_place_of_residence__to ==. count if births_by_place_of_residence__at==. replace births_by_place_of_residence__at =0 if births_by_place_of_residence__at ==. count if births_by_place_of_residence__a0==. replace births_by_place_of_residence__a0 =0 if births_by_place_of_residence__a0 ==. *check that all pdf pages appear to be in the data gen temp=strpos(page__of_pdf_,"-") gen newpagenumber=substr(page__of_pdf_,temp+1,.) destring newpagenumber, replace drop temp sort newpagenumber gen temp=newpagenumber[_n]-newpagenumber[_n-1] assert temp==0|temp==1|temp==. drop temp newpagenumber *clean data entry errors replace county="fairfax (ind. city)" if county=="fairfax" & state=="virginia" & race=="total" & births_by_place_of_residence__to==376 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & race=="total" & births_by_place_of_residence__to==148 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & births_by_place_of_residence__to==66 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & births_by_place_of_residence__to==82 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births_by_place_of_residence__to==4218 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births_by_place_of_residence__to==1860 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births_by_place_of_residence__to==2358 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births_by_place_of_residence__to==1680 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="cochrane" if county=="cochran" & state=="texas" replace county="davidson" if county=="davidson, coex. with nashville city" & state=="tennessee" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coex. with denver city" & state=="colorado" replace county="mcclain" if state=="oklahoma" & county=="mc clain" replace county="mccone" if state=="montana" & county=="mc cone" replace county="mccook" if state=="south dakota" & county=="mc cook" replace county="mccormick" if state=="south carolina" & county=="mc cormick" replace county="mccracken" if state=="kentucky" & county=="mc cracken" replace county="mccreary" if state=="kentucky" & county=="mc creary" replace county="mcculloch" if state=="texas" & county=="mc culloch" replace county="mccurtain" if state=="oklahoma" & county=="mc curtain" replace county="mcdonald" if state=="missouri" & county=="mc donald" replace county="mcdonough" if state=="illinois" & county=="mc donough" replace county="mcdowell" if state=="north carolina" & county=="mc dowell" replace county="mcdowell" if state=="west virginia" & county=="mc dowell" replace county="mcduffie" if state=="georgia" & county=="mc duffie" replace county="mchenry" if state=="illinois" & county=="mc henry" replace county="mchenry" if state=="north dakota" & county=="mc henry" replace county="mcintosh" if state=="georgia" & county=="mc intosh" replace county="mcintosh" if state=="north dakota" & county=="mc intosh" replace county="mcintosh" if state=="oklahoma" & county=="mc intosh" replace county="mckean" if state=="pennsylvania" & county=="mc kean" replace county="mckenzie" if state=="north dakota" & county=="mc kenzie" replace county="mckinley" if state=="new mexico" & county=="mc kinley" replace county="mclean" if state=="illinois" & county=="mc lean" replace county="mclean" if state=="kentucky" & county=="mc lean" replace county="mclean" if state=="north dakota" & county=="mc lean" replace county="mclennan" if state=="texas" & county=="mc lennan" replace county="mcleod" if state=="minnesota" & county=="mc leod" replace county="mcminn" if state=="tennessee" & county=="mc minn" replace county="mcmullen" if state=="texas" & county=="mc mullen" replace county="mcnairy" if state=="tennessee" & county=="mc nairy" replace county="mcpherson" if state=="kansas" & county=="mc pherson" replace county="mcpherson" if state=="nebraska" & county=="mc pherson" replace county="mcpherson" if state=="south dakota" & county=="mc pherson" replace county="o'brien" if state=="iowa" & county=="o brien" replace county="orleans" if county=="orleans, coex. with new orleans city" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coex. with philaoelphia city" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coex. with san francisco city." & state=="california" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park, part" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park," & state=="wyoming" |county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_by_place_of_residence__at+births_by_place_of_residence__a0 assert temp==births_by_place_of_residence__to drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births_by_place_of_residence__to births label var births "births by residence" rename births_by_place_of_residence__at births_h label var births_h "births by residence: in hospital" rename births_by_place_of_residence__a0 births_nh_ns label var births_nh_ns "births by residence: not in hospital and not specified" *generate year variable gen year=1965 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1965.dta,replace clear ** *1966 data ** *http://nber15.nber.org/vital-stats-books/nat66_1.cv.pdf *table 2-1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1966.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births_by_place_of_residence__to==. replace births_by_place_of_residence__to=0 if births_by_place_of_residence__to ==. count if births_by_place_of_residence__at==. replace births_by_place_of_residence__at =0 if births_by_place_of_residence__at ==. count if births_by_place_of_residence__a0==. replace births_by_place_of_residence__a0 =0 if births_by_place_of_residence__a0 ==. *check that all pdf pages appear to be in the data gen temp=strpos(page__of_pdf_,"-") gen newpagenumber=substr(page__of_pdf_,temp+1,.) destring newpagenumber, replace drop temp sort newpagenumber gen temp=newpagenumber[_n]-newpagenumber[_n-1] assert temp==0|temp==1|temp==. drop temp newpagenumber *clean data entry errors replace births_by_place_of_residence__at=2 if state=="montana" & county=="yellowstone national park, part" replace county="fairfax (ind. city)" if county=="fairfax" & state=="virginia" & race=="total" & births_by_place_of_residence__to==464 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & race=="total" & births_by_place_of_residence__to==124 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & births_by_place_of_residence__to==32 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & births_by_place_of_residence__to==92 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births_by_place_of_residence__to==4070 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births_by_place_of_residence__to==1818 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births_by_place_of_residence__to==2252 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births_by_place_of_residence__to==1682 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="cochrane" if county=="cochran" & state=="texas" replace county="davidson" if county=="davidson, coex. with nashville city" & state=="tennessee" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coex. with denver city" & state=="colorado" replace county="mcclain" if state=="oklahoma" & county=="mc clain" replace county="mccone" if state=="montana" & county=="mc cone" replace county="mccook" if state=="south dakota" & county=="mc cook" replace county="mccormick" if state=="south carolina" & county=="mc cormick" replace county="mccracken" if state=="kentucky" & county=="mc cracken" replace county="mccreary" if state=="kentucky" & county=="mc creary" replace county="mcculloch" if state=="texas" & county=="mc culloch" replace county="mccurtain" if state=="oklahoma" & county=="mc curtain" replace county="mcdonald" if state=="missouri" & county=="mc donald" replace county="mcdonough" if state=="illinois" & county=="mc donough" replace county="mcdowell" if state=="north carolina" & county=="mc dowell" replace county="mcdowell" if state=="west virginia" & county=="mc dowell" replace county="mcduffie" if state=="georgia" & county=="mc duffie" replace county="mchenry" if state=="illinois" & county=="mc henry" replace county="mchenry" if state=="north dakota" & county=="mc henry" replace county="mcintosh" if state=="georgia" & county=="mc intosh" replace county="mcintosh" if state=="north dakota" & county=="mc intosh" replace county="mcintosh" if state=="oklahoma" & county=="mc intosh" replace county="mckean" if state=="pennsylvania" & county=="mc kean" replace county="mckenzie" if state=="north dakota" & county=="mc kenzie" replace county="mckinley" if state=="new mexico" & county=="mc kinley" replace county="mclean" if state=="illinois" & county=="mc lean" replace county="mclean" if state=="kentucky" & county=="mc lean" replace county="mclean" if state=="north dakota" & county=="mc lean" replace county="mclennan" if state=="texas" & county=="mc lennan" replace county="mcleod" if state=="minnesota" & county=="mc leod" replace county="mcminn" if state=="tennessee" & county=="mc minn" replace county="mcmullen" if state=="texas" & county=="mc mullen" replace county="mcnairy" if state=="tennessee" & county=="mc nairy" replace county="mcpherson" if state=="kansas" & county=="mc pherson" replace county="mcpherson" if state=="nebraska" & county=="mc pherson" replace county="mcpherson" if state=="south dakota" & county=="mc pherson" replace county="o'brien" if state=="iowa" & county=="o brien" replace county="orleans" if county=="orleans, coex. with new orleans city" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coex. with philaoelphia city" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coex. with san francisco city." & state=="california" replace city_balance_total="balance of county" if state=="wisconsin" & county=="jefferson" & births_by_place_of_residence__to==780 *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park, part" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park," & state=="wyoming" |county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_by_place_of_residence__at+births_by_place_of_residence__a0 assert temp==births_by_place_of_residence__to drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births_by_place_of_residence__to births label var births "births by residence" rename births_by_place_of_residence__at births_h_p label var births_h_p "births by residence: in hospital" rename births_by_place_of_residence__a0 births_nh_ns label var births_nh_ns "births by residence: not in hospital and not specified" *generate year variable gen year=1966 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1966.dta,replace clear ** *1967 data ** *http://nber15.nber.org/vital-stats-books/nat67_1.cv.pdf *table 2-1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1967.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births_by_place_of_residence__to==. replace births_by_place_of_residence__to=0 if births_by_place_of_residence__to ==. count if births_by_place_of_residence__at==. replace births_by_place_of_residence__at =0 if births_by_place_of_residence__at ==. count if births_by_place_of_residence__a0==. replace births_by_place_of_residence__a0 =0 if births_by_place_of_residence__a0 ==. *check that all pdf pages appear to be in the data gen temp=strpos(page__of_pdf_,"-") gen newpagenumber=substr(page__of_pdf_,temp+1,.) destring newpagenumber, replace drop temp sort newpagenumber gen temp=newpagenumber[_n]-newpagenumber[_n-1] assert temp==0|temp==1|temp==. drop temp newpagenumber *clean data entry errors replace county="fairfax (ind. city)" if county=="fairfax" & state=="virginia" & race=="total" & births_by_place_of_residence__to==462 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & race=="total" & births_by_place_of_residence__to==139 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & births_by_place_of_residence__to==71 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & births_by_place_of_residence__to==68 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births_by_place_of_residence__to==3923 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births_by_place_of_residence__to==1761 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births_by_place_of_residence__to==2162 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births_by_place_of_residence__to==1630 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="cochrane" if county=="cochran" & state=="texas" replace county="davidson" if county=="davidson, coex. with nashville city" & state=="tennessee" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coex. with denver city" & state=="colorado" replace county="mcclain" if state=="oklahoma" & county=="mc clain" replace county="mccone" if state=="montana" & county=="mc cone" replace county="mccook" if state=="south dakota" & county=="mc cook" replace county="mccormick" if state=="south carolina" & county=="mc cormick" replace county="mccracken" if state=="kentucky" & county=="mc cracken" replace county="mccreary" if state=="kentucky" & county=="mc creary" replace county="mcculloch" if state=="texas" & county=="mc culloch" replace county="mccurtain" if state=="oklahoma" & county=="mc curtain" replace county="mcdonald" if state=="missouri" & county=="mc donald" replace county="mcdonough" if state=="illinois" & county=="mc donough" replace county="mcdowell" if state=="north carolina" & county=="mc dowell" replace county="mcdowell" if state=="west virginia" & county=="mc dowell" replace county="mcduffie" if state=="georgia" & county=="mc duffie" replace county="mchenry" if state=="illinois" & county=="mc henry" replace county="mchenry" if state=="north dakota" & county=="mc henry" replace county="mcintosh" if state=="georgia" & county=="mc intosh" replace county="mcintosh" if state=="north dakota" & county=="mc intosh" replace county="mcintosh" if state=="oklahoma" & county=="mc intosh" replace county="mckean" if state=="pennsylvania" & county=="mc kean" replace county="mckenzie" if state=="north dakota" & county=="mc kenzie" replace county="mckinley" if state=="new mexico" & county=="mc kinley" replace county="mclean" if state=="illinois" & county=="mc lean" replace county="mclean" if state=="kentucky" & county=="mc lean" replace county="mclean" if state=="north dakota" & county=="mc lean" replace county="mclennan" if state=="texas" & county=="mc lennan" replace county="mcleod" if state=="minnesota" & county=="mc leod" replace county="mcminn" if state=="tennessee" & county=="mc minn" replace county="mcmullen" if state=="texas" & county=="mc mullen" replace county="mcnairy" if state=="tennessee" & county=="mc nairy" replace county="mcpherson" if state=="kansas" & county=="mc pherson" replace county="mcpherson" if state=="nebraska" & county=="mc pherson" replace county="mcpherson" if state=="south dakota" & county=="mc pherson" replace county="o'brien" if state=="iowa" & county=="o brien" replace county="orleans" if county=="orleans, coex. with new orleans city" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coex. with philaoelphia city" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coex. with san francisco city." & state=="california" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park, part" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park," & state=="wyoming" |county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_by_place_of_residence__at+births_by_place_of_residence__a0 list if temp!=births_by_place_of_residence__to *checked .pdf, this is a data error not a data entry error *looks like it should be births_of_residents_of_area__a0=61 instead of births_of_residents_of_area__a0=661; changing this replace births_by_place_of_residence__a0=61 if births_by_place_of_residence__a0==661 & state=="louisiana" & county=="tensas" & city_balance_total=="total" & race=="nonwhite" drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births_by_place_of_residence__to births label var births "births by residence" rename births_by_place_of_residence__at births_h label var births_h "births by residence: in hospital" rename births_by_place_of_residence__a0 births_nh_ns label var births_nh_ns "births by residence: not in hospital and not specified" *generate year variable gen year=1967 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1967.dta,replace clear ** *1968 data ** *http://nber15.nber.org/vital-stats-books/vsus_1968_1.cv.pdf *table 2-1 *hand-checked excel file for ?'s (difficult-to-read characters) *corrected below if any clear use natality1968.dta sum desc replace county=lower(county) replace state=lower(state) replace city_balance_total=lower(city_balance_total) replace race=lower(race) replace race="nonwhite" if race=="all other" *missing observations in the original data are zeros *replace these so we can distinguish these from "true" missing values *which would be generated below due to difficult-to-read characters count if births_by_place_of_residence__to==. replace births_by_place_of_residence__to=0 if births_by_place_of_residence__to==. count if births_by_place_of_residence__at==. replace births_by_place_of_residence__at=0 if births_by_place_of_residence__at==. count if births_by_place_of_residence__a0==. replace births_by_place_of_residence__a0=0 if births_by_place_of_residence__a0==. *check that all pdf pages appear to be in the data gen temp=strpos(page__of_pdf_,"-") gen newpagenumber=substr(page__of_pdf_,temp+1,.) destring newpagenumber, replace drop temp sort newpagenumber gen temp=newpagenumber[_n]-newpagenumber[_n-1] assert temp==0|temp==1|temp==. drop temp newpagenumber *clean data entry errors replace births_by_place_of_residence__at=286 if state=="kansas" & county=="leavenworth" & city_balance_total=="balance of county" & race=="total" replace county="fairfax (ind. city)" if county=="fairfax" & state=="virginia" & race=="total" & births_by_place_of_residence__to==438 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & race=="total" & births_by_place_of_residence__to==144 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & births_by_place_of_residence__to==68 replace county="franklin (ind. city)" if county=="franklin" & state=="virginia" & births_by_place_of_residence__to==76 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="total" & births_by_place_of_residence__to==3688 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="white" & births_by_place_of_residence__to==1472 replace county="richmond (ind. city)" if county=="richmond" & state=="virginia" & race=="nonwhite" & births_by_place_of_residence__to==2216 replace county="roanoke (ind. city)" if county=="roanoke" & state=="virginia" & race=="total" & births_by_place_of_residence__to==1668 replace county="franklin" if state=="virginia" & county=="franklin (ind. city)" & city_balance_total=="total" & race=="nonwhite" & births_by_place_of_residence__to==68 *fix county name misspellings (relative to county names in the 1970 census) replace county=subinstr(county,"st.","st",.) replace county=subinstr(county,"ste.","st",.) replace county="cochrane" if county=="cochran" & state=="texas" replace county="davidson" if county=="davidson, coex. with nashville city" & state=="tennessee" replace county="desoto" if county=="de soto" & state=="florida" replace county="denver" if county=="denver, coex. with denver city" & state=="colorado" replace county="mcclain" if state=="oklahoma" & county=="mc clain" replace county="mccone" if state=="montana" & county=="mc cone" replace county="mccook" if state=="south dakota" & county=="mc cook" replace county="mccormick" if state=="south carolina" & county=="mc cormick" replace county="mccracken" if state=="kentucky" & county=="mc cracken" replace county="mccreary" if state=="kentucky" & county=="mc creary" replace county="mcculloch" if state=="texas" & county=="mc culloch" replace county="mccurtain" if state=="oklahoma" & county=="mc curtain" replace county="mcdonald" if state=="missouri" & county=="mc donald" replace county="mcdonough" if state=="illinois" & county=="mc donough" replace county="mcdowell" if state=="north carolina" & county=="mc dowell" replace county="mcdowell" if state=="west virginia" & county=="mc dowell" replace county="mcduffie" if state=="georgia" & county=="mc duffie" replace county="mchenry" if state=="illinois" & county=="mc henry" replace county="mchenry" if state=="north dakota" & county=="mc henry" replace county="mcintosh" if state=="georgia" & county=="mc intosh" replace county="mcintosh" if state=="north dakota" & county=="mc intosh" replace county="mcintosh" if state=="oklahoma" & county=="mc intosh" replace county="mckean" if state=="pennsylvania" & county=="mc kean" replace county="mckenzie" if state=="north dakota" & county=="mc kenzie" replace county="mckinley" if state=="new mexico" & county=="mc kinley" replace county="mclean" if state=="illinois" & county=="mc lean" replace county="mclean" if state=="kentucky" & county=="mc lean" replace county="mclean" if state=="north dakota" & county=="mc lean" replace county="mclennan" if state=="texas" & county=="mc lennan" replace county="mcleod" if state=="minnesota" & county=="mc leod" replace county="mcminn" if state=="tennessee" & county=="mc minn" replace county="mcmullen" if state=="texas" & county=="mc mullen" replace county="mcnairy" if state=="tennessee" & county=="mc nairy" replace county="mcpherson" if state=="kansas" & county=="mc pherson" replace county="mcpherson" if state=="nebraska" & county=="mc pherson" replace county="mcpherson" if state=="south dakota" & county=="mc pherson" replace county="o'brien" if state=="iowa" & county=="o brien" replace county="orleans" if county=="orleans, coex. with new orleans city" & state=="louisiana" replace county="philadelphia" if county=="philadelphia, coex. with philaoelphia city" & state=="pennsylvania" replace county="san francisco" if county=="san francisco, coex. with san francisco city." & state=="california" *check that county names are consistent with 1970 census, except for known deviations preserve egen countyr_state=concat(county state), punct("_") sort countyr_state merge countyr_state using original_census1970_counties assert _m==3|county=="armstrong" & state=="south dakota"|county=="washington" & state=="south dakota"|county=="yellowstone national park (part)" & state=="idaho"|county=="yellowstone national park, part" & state=="idaho"|county=="yellowstone national park" & state=="idaho"|county=="yellowstone national park (part)" & state=="montana"|county=="yellowstone national park (part)" & state=="wyoming"|county=="yellowstone national park (total)" & state=="wyoming"|county=="yellowstone national park," & state=="wyoming" |county=="yellowstone national park" & state=="montana"|county=="park (excl yell nat park)" & state=="montana"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, part" & state=="wyoming"|county=="yellowstone national park, part" & state=="montana"|county=="yellowstone national park, total" & state=="wyoming"|county=="park" & state=="montana"|county=="yellowstone nat. park (part)" & state=="idaho"|county=="yellowstone nat. park (part)" & state=="wyoming"|county=="yellowstone nat. park (total)" & state=="wyoming"|county=="new york city" & state=="new york"|county=="bronx" & state=="new york"|county=="kings" & state=="new york"|county=="new york" & state=="new york"|county=="queens" & state=="new york"|county=="richmond" & state=="new york"|county=="ormsby" & state=="nevada"|county=="carson city" & state=="nevada"|county=="los alamos" & state=="new mexico"|county=="menominee" & state=="wisconsin"|county=="total"|state=="alaska"|state=="hawaii"|state=="virginia"|state=="dc" restore *check for mistakes/misspellings in state names assert state=="alabama"|state=="alaska"|state=="arizona"|state=="arkansas"|state=="california"|state=="colorado"|state=="connecticut"|state=="delaware"|state=="district of columbia"|state=="florida"|state=="georgia"|state=="hawaii"|state=="idaho"|state=="illinois"|state=="indiana"|state=="iowa"|state=="kansas"|state=="kentucky"|state=="louisiana"|state=="maine"|state=="maryland"|state=="massachusetts"|state=="michigan"|state=="minnesota"|state=="mississippi"|state=="missouri"|state=="montana"|state=="nebraska"|state=="nevada"|state=="new hampshire"|state=="new jersey"|state=="new mexico"|state=="new york"|state=="north carolina"|state=="north dakota"|state=="ohio"|state=="oklahoma"|state=="oregon"|state=="pennsylvania"|state=="rhode island"|state=="south carolina"|state=="south dakota"|state=="tennessee"|state=="texas"|state=="utah"|state=="vermont"|state=="virginia"|state=="washington"|state=="west virginia"|state=="wisconsin"|state=="wyoming" *data checks for columns summing to county total gen temp=births_by_place_of_residence__at+births_by_place_of_residence__a0 assert temp==births_by_place_of_residence__to drop temp *clean and label variables rename page__of_pdf_ page_of_pdf_ label var page_of_pdf_ "page of pdf" label var state "state" label var county "county" rename city_balance_total sub_county label var sub_county "city/balance/total" label var race "race" rename births_by_place_of_residence__to births label var births "births by residence" rename births_by_place_of_residence__at births_h label var births_h "births by residence: in hospital" rename births_by_place_of_residence__a0 births_nh_ns label var births_nh_ns "births by residence: not in hospital and not specified" *generate year variable gen year=1968 label var year "year" *check that observations are unique egen tag=tag(state county sub_county race) assert tag==1 drop tag list in 1/1 desc sum saveold clean_natality1968.dta,replace clear log close