*Options clear all set more off capture log close log using create_bartik_datafile, text replace local path_folder /// "\Final NBER" cd "`path_folder'" tempfile temp_county2msa import excel /// "`path_folder'\rawData\CPI MSA to Census counties.xlsx", /// sheet("Codes") firstrow clear reshape long County, i(MSA) j(number) drop number drop if County=="" duplicates tag County, g(dup) bysort County (MSA): gen first_dup = sum(dup)*dup== 1 replace first_dup=first_dup+1 reshape wide MSA, i(County) j(first_dup) split County, p("_") drop dup rename County2 str_cty rename County1 str_state *Set DC to statewide code replace str_cty="999" if str_state=="11" & str_cty=="1" save `temp_county2msa', replace import delimited "`path_folder'\rawData\cbp87co.txt", delimiters(",") clear g str_cty=string(fipscty) g str_state=string(fipstate) merge m:1 str_state str_cty using `temp_county2msa', /// assert(master match) keep(match) g emp_mp=emp replace emp_mp=9.5 if empflag=="A" replace emp_mp=59.5 if empflag=="B" replace emp_mp=174.5 if empflag=="C" replace emp_mp=374.5 if empflag=="E" replace emp_mp=749.5 if empflag=="F" replace emp_mp=1749.5 if empflag=="G" replace emp_mp=3749.5 if empflag=="H" replace emp_mp=7499.5 if empflag=="I" replace emp_mp=17499.5 if empflag=="J" replace emp_mp=37499.5 if empflag=="K" replace emp_mp=74999.5 if empflag=="L" reshape long MSA, i(County sic) j(duplicate_county) drop if MSA=="" collapse (sum) emp emp_mp, by(MSA sic) egen double tot_emp= total(emp*(sic=="----")), by(MSA) g emp_sh=emp/tot_emp g emp_mp_sh=emp_mp/tot_emp rename sic str_sic4 save `temp_county2msa', replace tempfile temp_msa2msa import excel /// "`path_folder'\rawData\CPI MSA to Census CBP MSA.xlsx", /// sheet("Codes") firstrow clear reshape long cbpMSA, i(MSA) j(number) drop number drop if cbpMSA==. duplicates tag cbpMSA, g(dup) bysort cbpMSA (MSA): gen first_dup = sum(dup)*dup== 1 replace first_dup=first_dup+1 reshape wide MSA, i(cbpMSA) j(first_dup) drop dup rename cbpMSA msa save `temp_msa2msa', replace import delimited "`path_folder'\rawData\cbp93msa.txt", delimiters(",") clear merge m:1 msa using `temp_msa2msa', /// assert(master match) keep(match) g emp_mp=emp replace emp_mp=9.5 if empflag=="A" replace emp_mp=59.5 if empflag=="B" replace emp_mp=174.5 if empflag=="C" replace emp_mp=374.5 if empflag=="E" replace emp_mp=749.5 if empflag=="F" replace emp_mp=1749.5 if empflag=="G" replace emp_mp=3749.5 if empflag=="H" replace emp_mp=7499.5 if empflag=="I" replace emp_mp=17499.5 if empflag=="J" replace emp_mp=37499.5 if empflag=="K" replace emp_mp=74999.5 if empflag=="L" rename emp emp reshape long MSA, i(msa sic) j(duplicate_county) drop if MSA=="" collapse (sum) emp emp_mp, by(MSA sic) egen double tot_emp= total(emp*(sic=="----")), by(MSA) g emp_sh=emp/tot_emp g emp_mp_sh=emp_mp/tot_emp drop if ~strpos(sic, "00") g sic2=substr(sic,1,2) rename sic2 str_sic2 save `temp_msa2msa', replace use "`path_folder'\nekarda_ramey\govindustry-v2.3.dta", clear *drop if year!=1987 drop if year!=1992 g str_sic4=string(sic4) g str_sic2=string(sic2) *Uncomment if matching at SIC4 level *merge 1:m str_sic4 using `temp_county2msa', keep(match) *Uncomment if matching at SIC2 level collapse (sum) gtotal vshipio, by(str_sic2) merge 1:m str_sic2 using `temp_msa2msa', keep(match) g gshare = gtotal/vshipio g gempshare = gshare*emp_sh g gemp_mpshare = gshare*emp_mp_sh collapse (sum) gempshare gemp_mpshare, by(MSA) save `temp_county2msa', replace import excel /// "`path_folder'\concordance\CPI MSA to Unemployment (OMB) MSA.xlsx", /// sheet("CPI codes") firstrow clear rename CPIArea area_name rename Code MSA merge 1:1 MSA using `temp_county2msa', assert(master match) keep(match) drop _merge save "`path_folder'\gov_msa.dta", replace log close