# # 2000 data: NAICS Based Measure
#
# # NAICS is available in CBP data post 1997.
#
#
# # cbp employment raw data
# cbp <- fread(paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/CBP/county/cbp",substr(st_yr,3,4),"co.txt"))
# cbp[,naics:=as.numeric(naics)]
# cbp <- cbp[!is.na(naics)]
#
# cbp[,imp_emp:=n1_4*2.5+n5_9*7+n10_19*15+n20_49*35+n50_99*75+n100_249*175+n250_499*375+n500_999*750+n1000_1*1250+n1000_2*2000+n1000_3*3750+n1000_4*10000]
#
# cbp[,imp_emp:=ifelse(!is.na(emp) & emp>0,emp,imp_emp)]
#
# cbp[,fips:=paste0(str_pad(fipstate,2,"left","0"),str_pad(fipscty,3,"left","0"))]
#
# cbp <- cbp[,c("fips","naics","imp_emp")]
#
# naics_sic <- fread("David Dorn/NAICS97_6_to_SIC87_4.csv")
# cbp <- merge(cbp,naics_sic,by.x="naics",by.y="naics6",allow.cartesian = T)
# cbp[,imp_emp:=imp_emp*weight]
#
# cbp[,emp_sic_n:=sum(imp_emp),by=sic4]
# cbp[,emp_fips:=sum(imp_emp),by=fips]
#
#
#
#
# us_imports <- fread("David Dorn/Imports_from_China_by_sic87dd_1991_2014.csv")
# us_imports <- us_imports[,c("sic87dd","l_import_usch_2000","l_import_usch_2007")]
# us_imports[,chg_st_ed:=l_import_usch_2007-l_import_usch_2000]
#
#
#
# emp_naics <- merge(cbp,us_imports[,c("chg_st_ed","sic87dd")],by.x="sic4",by.y="sic87dd",all.x=T)
#
# emp_naics[,chg_per_employee:=chg_st_ed/emp_sic_n]
#
# emp_naics[,w_chg_per_employee:=chg_per_employee*imp_emp/emp_fips]
#
# # d_tradeusch_pw_dr is our approximation of d_tradeusch_pw in ADH
# emp_czone <- emp_naics[,.(import_sensitivity=sum(w_chg_per_employee,na.rm=T)),by=fips]
#
#
#
#
# # Other country IV
# imports_oth <- fread("David Dorn/Imports_from_China_by_sic87dd_1991_2014.csv")
# imports_oth <- imports_oth[,c("sic87dd","l_import_otch_2000","l_import_otch_2007")]
# imports_oth[,chg_st_ed:=l_import_otch_2007-l_import_otch_2000]
#
# cbp <- fread(paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/CBP/county/cbp",substr(st_yr-2,3,4),"co.txt"))
# cbp[,naics:=as.numeric(naics)]
# cbp <- cbp[!is.na(naics)]
#
# cbp[,imp_emp:=n1_4*2.5+n5_9*7+n10_19*15+n20_49*35+n50_99*75+n100_249*175+n250_499*375+n500_999*750+n1000_1*1250+n1000_2*2000+n1000_3*3750+n1000_4*10000]
#
# cbp[,imp_emp:=ifelse(!is.na(emp) & emp>0,emp,imp_emp)]
#
# cbp[,fips:=paste0(str_pad(fipstate,2,"left","0"),str_pad(fipscty,3,"left","0"))]
#
# cbp <- cbp[,c("fips","naics","imp_emp")]
#
# naics_sic <- fread("David Dorn/NAICS97_6_to_SIC87_4.csv")
# cbp <- merge(cbp,naics_sic,by.x="naics",by.y="naics6",allow.cartesian = T)
# cbp[,imp_emp:=imp_emp*weight]
#
# cbp[,emp_sic_n:=sum(imp_emp),by=sic4]
# cbp[,emp_fips:=sum(imp_emp),by=fips]
#
#
# emp_naics_oc <- merge(cbp,imports_oth[,c("chg_st_ed","sic87dd")],by.x="sic4",by.y="sic87dd",all.x=T)
#
# emp_naics_oc[,chg_per_employee:=chg_st_ed/emp_sic_n]
#
# emp_naics_oc[,w_chg_per_employee:=chg_per_employee*imp_emp/emp_fips]
#
# # tradeotch_pw_lag_dr is our approximation of tradeotch_pw_lag in ADH
# emp_naics_oc <- emp_naics_oc[,.(iv_import_sensitivity=sum(w_chg_per_employee,na.rm=T)),by=fips]
#
# measures <- merge(emp_czone,emp_naics_oc,by="fips")
#
# measures[,x_bin:=ntile(import_sensitivity,3)]
# measures[,iv_bin:=ntile(iv_import_sensitivity,100)]
#
# saveRDS(measures,"measures_county_levels_sic87dd.rds")
# pop_data <- list()
#
# pop_2000 <- get_decennial(geography = "county", variables = "P001001",year = 2000)
# pop_2000 <- pop_2000[,c("GEOID","value")]
# names(pop_2000) <- c("fips","population")
# pop_2000 <- data.table(pop_2000)
# pop_2000[,fips:=as.numeric(fips)]
# pop_2000[,year:=2000]
# pop_data[[1]] <- pop_2000
#
# #https://repository.duke.edu/catalog/f49b199b-1496-4636-91f3-36226c8e7f80
# pop_csv <- fread("county_population_2000_2010.csv")
# pop_csv <- pop_csv[year %in% 2001:2009, c("fips","tot_pop","year")]
# names(pop_csv) <- c("fips","population","year")
# pop_data[[2]] <- pop_csv
#
# i=3
# for(yr in 2010:2019){
# pop <- get_acs(geography = "county", variables = c("B01003_001"), year = yr)
# pop <- data.table(pop)
# pop <- dcast(pop,GEOID~variable,value.var = "estimate",fun.aggregate = sum)
# names(pop) <- c("fips","population")
# pop[,year:=yr]
# pop_data[[i]] <- pop
# i=i+1
# }
#
# pop_data <- do.call(rbind,pop_data)
#
# pop_data[,fips:=str_pad(as.character(fips),width = 5,side = "left",pad = "0")]
#
#
#
# #https://apps.bea.gov/regional/downloadzip.cfm
# county_data <- fread("bea_data_1969_2021.csv")
#
# county_data[,variable:=ifelse(Description=="Personal income (thousands of dollars)","total_income",
# ifelse(Description=="Population (persons) 1/","population",
# ifelse(Description=="Per capita personal income (dollars) 2/","income_per_capita","")))]
#
# county_data <- county_data[variable %in% c("income_per_capita")]
# county_data[,c("GeoName","Region","TableName","LineCode","IndustryClassification","Unit","Description"):=list(NULL)]
# setnames(county_data,"GeoFIPS","fips")
#
# county_data <- melt(county_data,id.vars = c("fips","variable"))
# county_data[,value:=as.numeric(value)]
# county_data <- dcast(county_data,fips+variable.1~variable,value.var = "value",fun.aggregate = sum)
# county_data[,variable.1:=as.numeric(as.character(variable.1))]
#
# setnames(county_data,"variable.1","year")
#
# unemp <- fread("la.data.64.County.txt")
# unemp <- unemp[substr(series_id,1,5)=="LAUCN"]
# unemp[,fips:=substr(series_id,6,10)]
# unemp <- unemp[substr(series_id,19,20)=="03"]
# unemp[,value:=as.numeric(value)]
# unemp <- unemp[,.(unemp_rate=mean(value,na.rm=T)),by=.(fips,year)]
# unemp[,c("series_id","period","footnote_codes"):=list(NULL)]
#
# county_data <- merge(county_data,unemp,by=c("fips","year"))
# county_data <- merge(county_data,pop_data,by=c("fips","year"))
#
# saveRDS(county_data,"county_data.rds")
# sod <- list()
# i=1
# for(fl in list.files(path="C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/SOD/data",full.names = T)){
# sod[[i]] <- fread(fl,select = c("YEAR","CERT","DEPSUMBR","DEPSUM","RSSDHCR","RSSDID","STCNTYBR","ASSET","UNINUMBR","SIMS_ESTABLISHED_DATE","SIMS_LATITUDE","SIMS_LONGITUDE","ADDRESBR"))
# i=i+1
# }
#
# sod <- rbindlist(sod,fill=T)
#
# sod[,DEPSUMBR:=str_remove_all(DEPSUMBR,",")]
# sod[,DEPSUM:=str_remove_all(DEPSUM,",")]
# sod[,ASSET:=str_remove_all(ASSET,",")]
#
# sod[,DEPSUMBR:= as.numeric(DEPSUMBR)]
# sod[,DEPSUM:= as.numeric(DEPSUM)]
# sod[,ASSET:= as.numeric(ASSET)]
#
# sod[,fips:=str_pad(STCNTYBR,5,"left","0")]
# sod[,SIMS_ESTABLISHED_DATE:=as.Date(SIMS_ESTABLISHED_DATE,"%m/%d/%Y")]
#
# sod[,br_id:=paste0(tolower(ADDRESBR),round(SIMS_LATITUDE,1),round(SIMS_LONGITUDE,1))]
#
#
#
#
# br <- list()
# i <- 1
# for(yr in 2001:2020){
# temp <- sod[YEAR==yr]
# temp_p <- sod[YEAR==(yr-1)]
# temp_a <- sod[YEAR==(yr+1)]
#
# # openings <- temp[year(SIMS_ESTABLISHED_DATE)==yr]
# openings <- temp[DEPSUMBR> 0 & !UNINUMBR %in% temp_p[DEPSUMBR> 0]$UNINUMBR]
# openings <- rbind(openings,temp[DEPSUMBR> 0 & !br_id %in% temp_p[DEPSUMBR> 0]$br_id])
# openings <- openings[!duplicated(openings)]
# openings <- openings[,.(openings=.N),by=fips]
#
#
# closings <- temp[DEPSUMBR> 0 & !UNINUMBR %in% temp_a[DEPSUMBR> 0]$UNINUMBR]
# closings <- rbind(closings,temp[DEPSUMBR> 0 & !br_id %in% temp_a[DEPSUMBR> 0]$br_id])
# closings <- closings[!duplicated(closings)]
# closings <- closings[,.(closings=.N),by=fips]
#
# temp<- data.table(fips=unique(sod$fips),YEAR=yr)
# temp <- merge(temp,openings,by="fips",all.x = T)
# temp <- merge(temp,closings,by="fips",all.x = T)
# temp[is.na(temp)] <- 0
#
# br[[i]] <- temp
# i=i+1
# }
#
# br <- do.call(rbind,br)
# setorder(br,fips,YEAR)
#
#
# saveRDS(sod,"sod_sum.rds")
# saveRDS(br,"br_open_close.rds")
measures <- readRDS("measures_county_levels_sic87dd.rds")
measures[,import_sensitivity:=(import_sensitivity-min(measures$import_sensitivity,na.rm=T)+1)]
measures[,iv_import_sensitivity:=(iv_import_sensitivity-min(measures$iv_import_sensitivity,na.rm=T)+1)]
measures[,high_x:=ntile(import_sensitivity,2)-1]
measures[,low_x:=2-high_x]
measures[,rev_x_bin:=3-x_bin]
measures[,is_q:=ntile(import_sensitivity,4)]
measures[,is_top_q:=ifelse(is_q==4,1,0)]
sod <- readRDS("sod_sum.rds")
asset_w <- sod[,c("CERT","YEAR","ASSET")]
asset_w <- asset_w[!duplicated(asset_w)]
asset_w[,tot_assets:=sum(ASSET,na.rm=T),by=YEAR]
asset_w[,w_asset:=ASSET/tot_assets]
asset_w[,q80:=quantile(ASSET,0.8,na.rm=T),by=YEAR]
asset_w[,q95:=quantile(ASSET,0.95,na.rm=T),by=YEAR]
asset_w[,size:=ifelse(ASSET <q80,1,ifelse(ASSET<q95,2,0))]
bank_import_exposure <- sod[YEAR %in% 1994:2000]
bank_import_exposure <- bank_import_exposure[,.(county_deposits=sum(DEPSUMBR,na.rm=T)),by=.(RSSDID,fips)]
bank_import_exposure <- merge( bank_import_exposure,measures,by="fips")
bank_import_exposure[,total_deposits:=sum(county_deposits,na.rm=T),by=RSSDID]
bank_import_exposure[,wa_exposure:=county_deposits*import_sensitivity/total_deposits]
bank_import_exposure <- bank_import_exposure[,.(wa_exposure=sum(wa_exposure,na.rm=T)),by=RSSDID]
bank_import_exposure[,exp_q:=ntile(wa_exposure,4)]
bank_import_exposure[,exp_top_q:=ifelse(exp_q==4,1,0)]
bank_import_exposure[,exp_bin:=ntile(bank_import_exposure,3)]
cra_ts <- read_fst("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/CRA/Transmittal/cra_transmittal_1997_2021")
cra_ts <- cra_ts[,c("respondentid","agencycode","activityyear","id_rssd","taxid")]
cra_disc <- read_fst("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/CRA/Disclosure/cra_disc_1997_2021.fst")
cra_disc <- data.table(cra_disc)
cra_disc <- cra_disc[reportlevel=="040"]
cra_disc[,fips:=paste0(state,county)]
cra_disc <- merge(cra_disc,cra_ts,by=c("respondentid","agencycode","activityyear"))
cra_disc[,RSSD:=as.numeric(id_rssd)]
cra <- merge(cra_disc,bank_import_exposure,by.x="RSSD",by.y="RSSDID")
cra <- merge(cra,measures,by="fips")
cra[,bank_fips:=paste(agencycode,respondentid,fips)]
cra[,hmda_id:=paste(agencycode,respondentid)]
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3372275 180.1 5595154 298.9 3926582 209.8
## Vcells 181684414 1386.2 1044704606 7970.5 1632303997 12453.5
cra <- cra[log(1+wa_exposure)>=2 & log(1+wa_exposure)<=4 & log(import_sensitivity)>=2 & log(import_sensitivity)<=4]
cra[,no_total:=no_sba_lt_100k+no_sba_100_250k+no_sba_250_1000k]
cra[,amount_total:=amount_sba_lt_100k+amount_sba_100_250k+amount_sba_250_1000k]
based on county-year panel
cra_summary <- cra[activityyear>=1999,
.(no_loans=sum(no_total,na.rm=T),
total_loan_amount =sum(amount_total,na.rm=T)
),by=.(activityyear,fips)]
cra_summary <- merge(cra_summary,measures,by="fips")
\[ log(depvar) = is\_top\_q \times times Year Dummies+CountyFE+YearFE \]
min_yr = 2000
reg_formula <- as.formula("log(1+no_loans)~is_top_q*factor(activityyear)|fips+activityyear|0|fips")
reg_formula2 <- as.formula("log(1+no_loans)~log(import_sensitivity)*factor(activityyear)|fips+activityyear|0|fips")
r <- list()
r[[1]] <- felm(reg_formula,data=cra_summary)
r[[2]] <- felm(reg_formula2,data=cra_summary)
reg_formula <- as.formula("log(1+total_loan_amount)~is_top_q*factor(activityyear)|fips+activityyear|0|fips")
reg_formula2 <- as.formula("log(1+total_loan_amount)~log(import_sensitivity)*factor(activityyear)|fips+activityyear|0|fips")
r[[3]] <- felm(reg_formula,data=cra_summary)
r[[4]] <- felm(reg_formula2,data=cra_summary)
# stargazer(r,type="text",column.labels = c("log(loans)","log(loan amount)"),column.separate = rep(2,2),omit.stat = "ser")
coef_plot_1reg(r[[1]],"is_top_q:factor(activityyear)",1999)+ggtitle("Dep var:log(no of loans)")
coef_plot_1reg(r[[3]],"is_top_q:factor(activityyear)",1999)+ggtitle("Dep var:log(loan amount)")
# coef_plot_1reg(r[[2]],"log(import_sensitivity):log(wa_exposure):factor(activityyear)",1997)+ggtitle("Dep var:log(no of loans)")
# coef_plot_1reg(r[[4]],"log(import_sensitivity):log(wa_exposure):factor(activityyear)",1997)+ggtitle("Dep var:log(total loan amount)")
cra_summary <- cra[activityyear>=1999,
.(no_loans=sum(no_total,na.rm=T),
total_loan_amount =sum(amount_total,na.rm=T)
),by=.(activityyear,RSSD,fips)]
cra_summary <- merge(cra_summary,bank_import_exposure,by.x="RSSD",by.y="RSSDID")
cra_summary <- merge(cra_summary,measures,by="fips")
cra_summary[,bank_fips:=paste(RSSD,fips)]
cra_summary[,fips_year:=paste(fips,activityyear)]
cra_summary[,tot_loans:=sum(no_loans,na.rm=T),by=.(activityyear,RSSD)]
cra_summary[,loan_frac:=no_loans/tot_loans]
cra_summary[,tot_loan_amount_bk_yr:=sum(total_loan_amount,na.rm=T),by=.(activityyear,RSSD)]
cra_summary[,loan_amt_frac:=total_loan_amount/tot_loan_amount_bk_yr]
cra_summary[,activityyear:=as.numeric(activityyear)]
sod <- readRDS("sod_sum.rds")
sod <- sod[,c("ASSET","RSSDID","YEAR")]
sod <- sod[!duplicated(sod)]
sod[,q80:=quantile(ASSET,0.8,na.rm=T),by=YEAR]
sod[,q95:=quantile(ASSET,0.95,na.rm=T),by=YEAR]
sod[,size:=ifelse(ASSET <q80,1,ifelse(ASSET<q95,2,0))]
sod[,c("q80","q95"):=list(NULL)]
cra_summary <- merge(cra_summary,sod,by.x=c("RSSD","activityyear"),by.y=c("RSSDID","YEAR"),all.x=T)
\[ log(no.loans.fips.frac) = is\_top\_q\times exp\_top\_q \times Year.Dummies+Bank*County FE+YearFE \]
r <- list()
r[[1]] <- felm(log(0.00001+loan_frac)~is_top_q*exp_top_q*factor(activityyear)|activityyear+bank_fips|0|RSSD,
data=cra_summary)
r[[2]] <- felm(log(0.00001+loan_amt_frac)~is_top_q*exp_top_q*factor(activityyear)|activityyear+bank_fips|0|RSSD,
data=cra_summary)
coef_plot_1reg(r[[1]],"is_top_q:exp_top_q:factor(activityyear)",1999)+ggtitle("Dep. var: log(no_loan_frac)")
coef_plot_1reg(r[[2]],"is_top_q:exp_top_q:factor(activityyear)",1999)+ggtitle("Dep. var: log(total_loan_amount_frac)")
reg_formula <- as.formula("log(0.00001+loan_frac)~is_top_q*exp_top_q*factor(activityyear)|activityyear+bank_fips|0|RSSD")
r <- list()
r[[1]] <- felm(reg_formula,data=cra_summary[size==1])
r[[2]] <- felm(reg_formula,data=cra_summary[size==2])
r[[3]] <- felm(reg_formula,data=cra_summary[size==0])
# stargazer(r,type="text")
coef_plot_3reg(r[[1]],"Small banks",r[[2]],"Medium banks",r[[3]],"Large banks","is_top_q:exp_top_q:factor(activityyear)",1999)+
ggtitle("log(loan_no_fraction) by bank size")
reg_formula <- as.formula("log(0.00001+loan_amt_frac)~is_top_q*exp_top_q*factor(activityyear)|activityyear+bank_fips|0|RSSD")
r <- list()
r[[1]] <- felm(reg_formula,data=cra_summary[size==1])
r[[2]] <- felm(reg_formula,data=cra_summary[size==2])
r[[3]] <- felm(reg_formula,data=cra_summary[size==0])
# stargazer(r,type="text")
coef_plot_3reg(r[[1]],"Small banks",r[[2]],"Medium banks",r[[3]],"Large banks","is_top_q:exp_top_q:factor(activityyear)",1999)+
ggtitle("log(loan_amount_fraction) by bank size")