# # 2000 data: NAICS Based Measure
#
# # NAICS is available in CBP data post 1997.
#
#
# # cbp employment raw data
# cbp <- fread(paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/CBP/county/cbp",substr(st_yr,3,4),"co.txt"))
# cbp[,naics:=as.numeric(naics)]
# cbp <- cbp[!is.na(naics)]
#
# cbp[,imp_emp:=n1_4*2.5+n5_9*7+n10_19*15+n20_49*35+n50_99*75+n100_249*175+n250_499*375+n500_999*750+n1000_1*1250+n1000_2*2000+n1000_3*3750+n1000_4*10000]
#
# cbp[,imp_emp:=ifelse(!is.na(emp) & emp>0,emp,imp_emp)]
#
# cbp[,fips:=paste0(str_pad(fipstate,2,"left","0"),str_pad(fipscty,3,"left","0"))]
#
# cbp <- cbp[,c("fips","naics","imp_emp")]
#
# naics_sic <- fread("David Dorn/NAICS97_6_to_SIC87_4.csv")
# cbp <- merge(cbp,naics_sic,by.x="naics",by.y="naics6",allow.cartesian = T)
# cbp[,imp_emp:=imp_emp*weight]
#
# cbp[,emp_sic_n:=sum(imp_emp),by=sic4]
# cbp[,emp_fips:=sum(imp_emp),by=fips]
#
#
#
#
# us_imports <- fread("David Dorn/Imports_from_China_by_sic87dd_1991_2014.csv")
# us_imports <- us_imports[,c("sic87dd","l_import_usch_2000","l_import_usch_2007")]
# us_imports[,chg_st_ed:=l_import_usch_2007-l_import_usch_2000]
#
#
#
# emp_naics <- merge(cbp,us_imports[,c("chg_st_ed","sic87dd")],by.x="sic4",by.y="sic87dd",all.x=T)
#
# emp_naics[,chg_per_employee:=chg_st_ed/emp_sic_n]
#
# emp_naics[,w_chg_per_employee:=chg_per_employee*imp_emp/emp_fips]
#
# # d_tradeusch_pw_dr is our approximation of d_tradeusch_pw in ADH
# emp_czone <- emp_naics[,.(import_sensitivity=sum(w_chg_per_employee,na.rm=T)),by=fips]
#
#
#
#
# # Other country IV
# imports_oth <- fread("David Dorn/Imports_from_China_by_sic87dd_1991_2014.csv")
# imports_oth <- imports_oth[,c("sic87dd","l_import_otch_2000","l_import_otch_2007")]
# imports_oth[,chg_st_ed:=l_import_otch_2007-l_import_otch_2000]
#
# cbp <- fread(paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/CBP/county/cbp",substr(st_yr-2,3,4),"co.txt"))
# cbp[,naics:=as.numeric(naics)]
# cbp <- cbp[!is.na(naics)]
#
# cbp[,imp_emp:=n1_4*2.5+n5_9*7+n10_19*15+n20_49*35+n50_99*75+n100_249*175+n250_499*375+n500_999*750+n1000_1*1250+n1000_2*2000+n1000_3*3750+n1000_4*10000]
#
# cbp[,imp_emp:=ifelse(!is.na(emp) & emp>0,emp,imp_emp)]
#
# cbp[,fips:=paste0(str_pad(fipstate,2,"left","0"),str_pad(fipscty,3,"left","0"))]
#
# cbp <- cbp[,c("fips","naics","imp_emp")]
#
# naics_sic <- fread("David Dorn/NAICS97_6_to_SIC87_4.csv")
# cbp <- merge(cbp,naics_sic,by.x="naics",by.y="naics6",allow.cartesian = T)
# cbp[,imp_emp:=imp_emp*weight]
#
# cbp[,emp_sic_n:=sum(imp_emp),by=sic4]
# cbp[,emp_fips:=sum(imp_emp),by=fips]
#
#
# emp_naics_oc <- merge(cbp,imports_oth[,c("chg_st_ed","sic87dd")],by.x="sic4",by.y="sic87dd",all.x=T)
#
# emp_naics_oc[,chg_per_employee:=chg_st_ed/emp_sic_n]
#
# emp_naics_oc[,w_chg_per_employee:=chg_per_employee*imp_emp/emp_fips]
#
# # tradeotch_pw_lag_dr is our approximation of tradeotch_pw_lag in ADH
# emp_naics_oc <- emp_naics_oc[,.(iv_import_sensitivity=sum(w_chg_per_employee,na.rm=T)),by=fips]
#
# measures <- merge(emp_czone,emp_naics_oc,by="fips")
#
# measures[,x_bin:=ntile(import_sensitivity,3)]
# measures[,iv_bin:=ntile(iv_import_sensitivity,100)]
#
# saveRDS(measures,"measures_county_levels_sic87dd.rds")
# pop_data <- list()
#
# pop_2000 <- get_decennial(geography = "county", variables = "P001001",year = 2000)
# pop_2000 <- pop_2000[,c("GEOID","value")]
# names(pop_2000) <- c("fips","population")
# pop_2000 <- data.table(pop_2000)
# pop_2000[,fips:=as.numeric(fips)]
# pop_2000[,year:=2000]
# pop_data[[1]] <- pop_2000
#
# #https://repository.duke.edu/catalog/f49b199b-1496-4636-91f3-36226c8e7f80
# pop_csv <- fread("county_population_2000_2010.csv")
# pop_csv <- pop_csv[year %in% 2001:2009, c("fips","tot_pop","year")]
# names(pop_csv) <- c("fips","population","year")
# pop_data[[2]] <- pop_csv
#
# i=3
# for(yr in 2010:2019){
# pop <- get_acs(geography = "county", variables = c("B01003_001"), year = yr)
# pop <- data.table(pop)
# pop <- dcast(pop,GEOID~variable,value.var = "estimate",fun.aggregate = sum)
# names(pop) <- c("fips","population")
# pop[,year:=yr]
# pop_data[[i]] <- pop
# i=i+1
# }
#
# pop_data <- do.call(rbind,pop_data)
#
# pop_data[,fips:=str_pad(as.character(fips),width = 5,side = "left",pad = "0")]
#
#
#
# #https://apps.bea.gov/regional/downloadzip.cfm
# county_data <- fread("bea_data_1969_2021.csv")
#
# county_data[,variable:=ifelse(Description=="Personal income (thousands of dollars)","total_income",
# ifelse(Description=="Population (persons) 1/","population",
# ifelse(Description=="Per capita personal income (dollars) 2/","income_per_capita","")))]
#
# county_data <- county_data[variable %in% c("income_per_capita")]
# county_data[,c("GeoName","Region","TableName","LineCode","IndustryClassification","Unit","Description"):=list(NULL)]
# setnames(county_data,"GeoFIPS","fips")
#
# county_data <- melt(county_data,id.vars = c("fips","variable"))
# county_data[,value:=as.numeric(value)]
# county_data <- dcast(county_data,fips+variable.1~variable,value.var = "value",fun.aggregate = sum)
# county_data[,variable.1:=as.numeric(as.character(variable.1))]
#
# setnames(county_data,"variable.1","year")
#
# unemp <- fread("la.data.64.County.txt")
# unemp <- unemp[substr(series_id,1,5)=="LAUCN"]
# unemp[,fips:=substr(series_id,6,10)]
# unemp <- unemp[substr(series_id,19,20)=="03"]
# unemp[,value:=as.numeric(value)]
# unemp <- unemp[,.(unemp_rate=mean(value,na.rm=T)),by=.(fips,year)]
# unemp[,c("series_id","period","footnote_codes"):=list(NULL)]
#
# county_data <- merge(county_data,unemp,by=c("fips","year"))
# county_data <- merge(county_data,pop_data,by=c("fips","year"))
#
# saveRDS(county_data,"county_data.rds")
# sod <- list()
# i=1
# for(fl in list.files(path="C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/SOD/data",full.names = T)){
# sod[[i]] <- fread(fl,select = c("YEAR","CERT","DEPSUMBR","DEPSUM","RSSDHCR","RSSDID","STCNTYBR","ASSET","UNINUMBR","SIMS_ESTABLISHED_DATE","SIMS_LATITUDE","SIMS_LONGITUDE","ADDRESBR"))
# i=i+1
# }
#
# sod <- rbindlist(sod,fill=T)
#
# sod[,DEPSUMBR:=str_remove_all(DEPSUMBR,",")]
# sod[,DEPSUM:=str_remove_all(DEPSUM,",")]
# sod[,ASSET:=str_remove_all(ASSET,",")]
#
# sod[,DEPSUMBR:= as.numeric(DEPSUMBR)]
# sod[,DEPSUM:= as.numeric(DEPSUM)]
# sod[,ASSET:= as.numeric(ASSET)]
#
# sod[,fips:=str_pad(STCNTYBR,5,"left","0")]
# sod[,SIMS_ESTABLISHED_DATE:=as.Date(SIMS_ESTABLISHED_DATE,"%m/%d/%Y")]
#
# sod[,br_id:=paste0(tolower(ADDRESBR),round(SIMS_LATITUDE,1),round(SIMS_LONGITUDE,1))]
#
#
#
#
# br <- list()
# i <- 1
# for(yr in 2001:2020){
# temp <- sod[YEAR==yr]
# temp_p <- sod[YEAR==(yr-1)]
# temp_a <- sod[YEAR==(yr+1)]
#
# # openings <- temp[year(SIMS_ESTABLISHED_DATE)==yr]
# openings <- temp[DEPSUMBR> 0 & !UNINUMBR %in% temp_p[DEPSUMBR> 0]$UNINUMBR]
# openings <- rbind(openings,temp[DEPSUMBR> 0 & !br_id %in% temp_p[DEPSUMBR> 0]$br_id])
# openings <- openings[!duplicated(openings)]
# openings <- openings[,.(openings=.N),by=fips]
#
#
# closings <- temp[DEPSUMBR> 0 & !UNINUMBR %in% temp_a[DEPSUMBR> 0]$UNINUMBR]
# closings <- rbind(closings,temp[DEPSUMBR> 0 & !br_id %in% temp_a[DEPSUMBR> 0]$br_id])
# closings <- closings[!duplicated(closings)]
# closings <- closings[,.(closings=.N),by=fips]
#
# temp<- data.table(fips=unique(sod$fips),YEAR=yr)
# temp <- merge(temp,openings,by="fips",all.x = T)
# temp <- merge(temp,closings,by="fips",all.x = T)
# temp[is.na(temp)] <- 0
#
# br[[i]] <- temp
# i=i+1
# }
#
# br <- do.call(rbind,br)
# setorder(br,fips,YEAR)
#
#
# saveRDS(sod,"sod_sum.rds")
# saveRDS(br,"br_open_close.rds")
measures <- readRDS("measures_county_levels_sic87dd.rds")
measures[,import_sensitivity:=(import_sensitivity-min(measures$import_sensitivity,na.rm=T)+1)]
measures[,iv_import_sensitivity:=(iv_import_sensitivity-min(measures$iv_import_sensitivity,na.rm=T)+1)]
measures[,high_x:=ntile(import_sensitivity,2)]
measures[,low_x:=2-high_x]
measures[,rev_x_bin:=3-x_bin]
measures[,is_q:=ntile(import_sensitivity,4)]
measures[,is_top_q:=ifelse(is_q==4,1,0)]
sod <- readRDS("sod_sum.rds")
asset_w <- sod[,c("RSSDID","YEAR","ASSET")]
asset_w <- asset_w[!duplicated(asset_w)]
asset_w[,tot_assets:=sum(ASSET,na.rm=T),by=YEAR]
asset_w[,w_asset:=ASSET/tot_assets]
asset_w[,q80:=quantile(ASSET,0.8,na.rm=T),by=YEAR]
asset_w[,q95:=quantile(ASSET,0.95,na.rm=T),by=YEAR]
asset_w[,size:=ifelse(ASSET <q80,1,ifelse(ASSET<q95,2,0))]
bank_import_exposure <- sod[YEAR %in% 1994:2000]
bank_import_exposure <- bank_import_exposure[,.(county_deposits=sum(DEPSUMBR,na.rm=T)),by=.(RSSDID,fips)]
bank_import_exposure <- merge( bank_import_exposure,measures,by="fips")
bank_import_exposure[,total_deposits:=sum(county_deposits,na.rm=T),by=RSSDID]
bank_import_exposure[,wa_exposure:=county_deposits*import_sensitivity/total_deposits]
bank_import_exposure <- bank_import_exposure[,.(wa_exposure=sum(wa_exposure,na.rm=T)),by=RSSDID]
bank_import_exposure[,exp_bin:=ntile(bank_import_exposure,3)]
bank_import_exposure[,exp_q:=ntile(wa_exposure,4)]
bank_import_exposure[,exp_top_q:=ifelse(exp_q==4,1,0)]
#https://www.dropbox.com/s/3y1qrylnnyn361a/hmdpanel17.zip?dl=0 Neil Bhutta website
hmda_link <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/HMDA lender/hmdapan2017.csv",select=c("code","hmprid",paste0("RSSD",10:17),paste0("RSSD0",8:9),paste0("ENTITY0",3:7)))
hmda_link[,hmda_id:=paste(code,hmprid)]
hmda_link[,RSSD:=ifelse(!is.na(RSSD17),RSSD17,
ifelse(!is.na(RSSD16),RSSD16,
ifelse(!is.na(RSSD15),RSSD15,
ifelse(!is.na(RSSD14),RSSD14,
ifelse(!is.na(RSSD13),RSSD13,
ifelse(!is.na(RSSD12),RSSD12,
ifelse(!is.na(RSSD11),RSSD11,
ifelse(!is.na(RSSD10),RSSD10,
ifelse(!is.na(RSSD09),RSSD09,
ifelse(!is.na(RSSD08),RSSD08,
ifelse(!is.na(ENTITY07),ENTITY07,
ifelse(!is.na(ENTITY06),ENTITY06,
ifelse(!is.na(ENTITY05),ENTITY05,
ifelse(!is.na(ENTITY04),ENTITY04,ENTITY03))))))))))))))]
hmda_link <- hmda_link[,c("hmda_id","RSSD")]
dbdir <- "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/hmda_2004_2017/hmda_2004_2017.db"
con_hmda <- dbConnect(RSQLite::SQLite(), dbdir)
years <- 2004:2017
hmda <- list()
i=1
for(i in 1:length(years)) {
print(years[i])
lar <- dbGetQuery(con_hmda,paste0("select
lar.as_of_year as activity_year,
lar.respondent_id,
lar.census_tract,
lar.loan_purpose,
lar.loan_type,
lar.action_taken,
lar.agency_code,
lar.owner_occupancy,
lar.loan_amount_000s loan_amount,
lar.applicant_income_000s applicant_income
from lar_",years[i]," as lar
where lar.action_taken in (1,3) and
lar.lien_status=1 and
lar.property_type=1
"))
hmda[[i]] <- lar
}
## [1] 2004
## [1] 2005
## [1] 2006
## [1] 2007
## [1] 2008
## [1] 2009
## [1] 2010
## [1] 2011
## [1] 2012
## [1] 2013
## [1] 2014
## [1] 2015
## [1] 2016
## [1] 2017
rm(lar)
hmda <- do.call("rbind", hmda)
hmda <- data.table(hmda)
hmda[,hmda_id:=paste(agency_code,respondent_id)]
hmda[,fips:=substr(census_tract,1,5)]
setkey(hmda,hmda_id)
dbDisconnect(con_hmda)
hmda <- merge(hmda,hmda_link,by="hmda_id")
hmda <- merge(hmda,bank_import_exposure,by.x="RSSD",by.y="RSSDID")
hmda <- merge(hmda,measures,by="fips")
hmda[,approved:=ifelse(action_taken==1,1,0)]
hmda[,bank_fips:=paste(hmda_id,fips)]
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3979945 212.6 7200314 384.6 4006682 214.0
## Vcells 2088783385 15936.2 5450551913 41584.5 5446192539 41551.2
hmda <- hmda[log(1+wa_exposure)>=2 & log(1+wa_exposure)<=4 & log(import_sensitivity)>=2 & log(import_sensitivity)<=4]
sod <- readRDS("sod_sum.rds")
sod <- sod[,c("ASSET","RSSDID","YEAR")]
sod <- sod[!duplicated(sod)]
sod[,q80:=quantile(ASSET,0.8,na.rm=T),by=YEAR]
sod[,q95:=quantile(ASSET,0.95,na.rm=T),by=YEAR]
sod[,size:=ifelse(ASSET <q80,1,ifelse(ASSET<q95,2,0))]
sod[,c("q80","q95"):=list(NULL)]
hmda <- merge(hmda,sod,by.x = c("RSSD","activity_year"),by.y=c("RSSDID","YEAR"),all.x = T,allow.cartesian = T)
hmda_summary <- hmda[loan_purpose==1 & owner_occupancy==1 & action_taken==1,
.(no_loans=.N,
total_loan_amount =sum(loan_amount,na.rm=T),
mean_income=mean(applicant_income,na.rm=T),
mean_loan_amount=mean(loan_amount,na.rm=T)),by=.(activity_year,RSSD,fips)]
hmda_summary[,loan_to_income:=mean_loan_amount/mean_income]
hmda_summary <- merge(hmda_summary,bank_import_exposure,by.x="RSSD",by.y="RSSDID")
hmda_summary <- merge(hmda_summary,measures,by="fips")
hmda_summary[,bank_fips:=paste(RSSD,fips)]
hmda_summary[,fips_year:=paste(fips,activity_year)]
hmda_summary[,tot_loans:=sum(no_loans,na.rm=T),by=.(activity_year,RSSD)]
hmda_summary[,loan_frac:=no_loans/tot_loans]
hmda_summary[,tot_loan_amount_bk_yr:=sum(total_loan_amount,na.rm=T),by=.(activity_year,RSSD)]
hmda_summary[,loan_amt_frac:=total_loan_amount/tot_loan_amount_bk_yr]
hmda_summary <- merge(hmda_summary,sod,by.x = c("RSSD","activity_year"),by.y=c("RSSDID","YEAR"),all.x = T,allow.cartesian = T)
Specification:
\[ log(no.loans.fips.frac) = is\_top\_q*exp\_top\_q \times Year.Dummies+Bank*County FE+YearFE \]
r <- list()
r[[1]] <- felm(log(0.00001+loan_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD,
data=hmda_summary)
r[[2]] <- felm(log(0.00001+loan_amt_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD,
data=hmda_summary)
coef_plot_1reg(r[[1]],"is_top_q:exp_top_q:factor(activity_year)",2004)+ggtitle("Dep. var: log(no_loan_frac")
coef_plot_1reg(r[[2]],"is_top_q:exp_top_q:factor(activity_year)",2004)+ggtitle("Dep. var: log(total_loan_amount_frac")
reg_formula <- as.formula("log(0.00001+loan_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD")
r <- list()
r[[1]] <- felm(reg_formula,data=hmda_summary[size==1])
r[[2]] <- felm(reg_formula,data=hmda_summary[size==2])
r[[3]] <- felm(reg_formula,data=hmda_summary[size==0])
# stargazer(r,type="text")
coef_plot_3reg(r[[1]],"Small banks",r[[2]],"Medium banks",r[[3]],"Large banks","is_top_q:exp_top_q:factor(activity_year)",2004)+
ggtitle("log(loan_fraction) by bank size")
reg_formula <- as.formula("log(0.00001+loan_amt_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD")
r <- list()
r[[1]] <- felm(reg_formula,data=hmda_summary[size==1])
r[[2]] <- felm(reg_formula,data=hmda_summary[size==2])
r[[3]] <- felm(reg_formula,data=hmda_summary[size==0])
# stargazer(r,type="text")
coef_plot_3reg(r[[1]],"Small banks",r[[2]],"Medium banks",r[[3]],"Large banks","is_top_q:exp_top_q:factor(activity_year)",2004)+
ggtitle("log(loan_amount_fraction) by bank size")
hmda_summary <- hmda[loan_purpose==3 & owner_occupancy==1 & action_taken==1,
.(no_loans=.N,
total_loan_amount =sum(loan_amount,na.rm=T),
mean_income=mean(applicant_income,na.rm=T),
mean_loan_amount=mean(loan_amount,na.rm=T)),by=.(activity_year,RSSD,fips)]
hmda_summary[,loan_to_income:=mean_loan_amount/mean_income]
hmda_summary <- merge(hmda_summary,bank_import_exposure,by.x="RSSD",by.y="RSSDID")
hmda_summary <- merge(hmda_summary,measures,by="fips")
hmda_summary[,bank_fips:=paste(RSSD,fips)]
hmda_summary[,tot_loans:=sum(no_loans,na.rm=T),by=.(activity_year,RSSD)]
hmda_summary[,loan_frac:=no_loans/tot_loans]
hmda_summary[,tot_loan_amount_bk_yr:=sum(total_loan_amount,na.rm=T),by=.(activity_year,RSSD)]
hmda_summary[,loan_amt_frac:=total_loan_amount/tot_loan_amount_bk_yr]
hmda_summary <- merge(hmda_summary,sod,by.x = c("RSSD","activity_year"),by.y=c("RSSDID","YEAR"),all.x = T,allow.cartesian = T)
Specification:
\[ log(no.loans.fips.frac) = log(import.sensitivity)\times log(wa.exposure) \times Year.Dummies+Bank*County FE+YearFE \]
r <- list()
r[[1]] <- felm(log(0.00001+loan_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD,
data=hmda_summary)
r[[2]] <- felm(log(0.00001+loan_amt_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD,
data=hmda_summary)
coef_plot_1reg(r[[1]],"is_top_q:exp_top_q:factor(activity_year)",2004)+ggtitle("Dep. var: log(no_loan_frac")
coef_plot_1reg(r[[2]],"is_top_q:exp_top_q:factor(activity_year)",2004)+ggtitle("Dep. var: log(total_loan_amount_frac")
reg_formula <- as.formula("log(0.00001+loan_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD")
r <- list()
r[[1]] <- felm(reg_formula,data=hmda_summary[size==1])
r[[2]] <- felm(reg_formula,data=hmda_summary[size==2])
r[[3]] <- felm(reg_formula,data=hmda_summary[size==0])
# stargazer(r,type="text")
coef_plot_3reg(r[[1]],"Small banks",r[[2]],"Medium banks",r[[3]],"Large banks","is_top_q:exp_top_q:factor(activity_year)",2004)+
ggtitle("log(loan_number_fraction) by bank size")
reg_formula <- as.formula("log(0.00001+loan_amt_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD")
r <- list()
r[[1]] <- felm(reg_formula,data=hmda_summary[size==1])
r[[2]] <- felm(reg_formula,data=hmda_summary[size==2])
r[[3]] <- felm(reg_formula,data=hmda_summary[size==0])
# stargazer(r,type="text")
coef_plot_3reg(r[[1]],"Small banks",r[[2]],"Medium banks",r[[3]],"Large banks","is_top_q:exp_top_q:factor(activity_year)",2004)+
ggtitle("log(loan_amount_fraction) by bank size")