# # 2000 data: NAICS Based Measure
# 
# # NAICS is available in CBP data post 1997.
# 
# 
# # cbp employment raw data
# cbp <- fread(paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/CBP/county/cbp",substr(st_yr,3,4),"co.txt"))
# cbp[,naics:=as.numeric(naics)]
# cbp <- cbp[!is.na(naics)]
# 
# cbp[,imp_emp:=n1_4*2.5+n5_9*7+n10_19*15+n20_49*35+n50_99*75+n100_249*175+n250_499*375+n500_999*750+n1000_1*1250+n1000_2*2000+n1000_3*3750+n1000_4*10000]
# 
# cbp[,imp_emp:=ifelse(!is.na(emp) & emp>0,emp,imp_emp)]
# 
# cbp[,fips:=paste0(str_pad(fipstate,2,"left","0"),str_pad(fipscty,3,"left","0"))]
# 
# cbp <- cbp[,c("fips","naics","imp_emp")]
# 
# naics_sic <- fread("David Dorn/NAICS97_6_to_SIC87_4.csv")
# cbp <- merge(cbp,naics_sic,by.x="naics",by.y="naics6",allow.cartesian = T)
# cbp[,imp_emp:=imp_emp*weight]
# 
# cbp[,emp_sic_n:=sum(imp_emp),by=sic4]
# cbp[,emp_fips:=sum(imp_emp),by=fips]
# 
# 
# 
# 
# us_imports <- fread("David Dorn/Imports_from_China_by_sic87dd_1991_2014.csv")
# us_imports <- us_imports[,c("sic87dd","l_import_usch_2000","l_import_usch_2007")]
# us_imports[,chg_st_ed:=l_import_usch_2007-l_import_usch_2000]
# 
# 
# 
# emp_naics <- merge(cbp,us_imports[,c("chg_st_ed","sic87dd")],by.x="sic4",by.y="sic87dd",all.x=T)
# 
# emp_naics[,chg_per_employee:=chg_st_ed/emp_sic_n]
# 
# emp_naics[,w_chg_per_employee:=chg_per_employee*imp_emp/emp_fips]
# 
# # d_tradeusch_pw_dr is our approximation of d_tradeusch_pw in ADH
# emp_czone <- emp_naics[,.(import_sensitivity=sum(w_chg_per_employee,na.rm=T)),by=fips]
# 
# 
# 
# 
# # Other country IV
# imports_oth <- fread("David Dorn/Imports_from_China_by_sic87dd_1991_2014.csv")
# imports_oth <- imports_oth[,c("sic87dd","l_import_otch_2000","l_import_otch_2007")]
# imports_oth[,chg_st_ed:=l_import_otch_2007-l_import_otch_2000]
# 
# cbp <- fread(paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/CBP/county/cbp",substr(st_yr-2,3,4),"co.txt"))
# cbp[,naics:=as.numeric(naics)]
# cbp <- cbp[!is.na(naics)]
# 
# cbp[,imp_emp:=n1_4*2.5+n5_9*7+n10_19*15+n20_49*35+n50_99*75+n100_249*175+n250_499*375+n500_999*750+n1000_1*1250+n1000_2*2000+n1000_3*3750+n1000_4*10000]
# 
# cbp[,imp_emp:=ifelse(!is.na(emp) & emp>0,emp,imp_emp)]
# 
# cbp[,fips:=paste0(str_pad(fipstate,2,"left","0"),str_pad(fipscty,3,"left","0"))]
# 
# cbp <- cbp[,c("fips","naics","imp_emp")]
# 
# naics_sic <- fread("David Dorn/NAICS97_6_to_SIC87_4.csv")
# cbp <- merge(cbp,naics_sic,by.x="naics",by.y="naics6",allow.cartesian = T)
# cbp[,imp_emp:=imp_emp*weight]
# 
# cbp[,emp_sic_n:=sum(imp_emp),by=sic4]
# cbp[,emp_fips:=sum(imp_emp),by=fips]
# 
# 
# emp_naics_oc <- merge(cbp,imports_oth[,c("chg_st_ed","sic87dd")],by.x="sic4",by.y="sic87dd",all.x=T)
# 
# emp_naics_oc[,chg_per_employee:=chg_st_ed/emp_sic_n]
# 
# emp_naics_oc[,w_chg_per_employee:=chg_per_employee*imp_emp/emp_fips]
# 
# # tradeotch_pw_lag_dr is our approximation of tradeotch_pw_lag in ADH
# emp_naics_oc <- emp_naics_oc[,.(iv_import_sensitivity=sum(w_chg_per_employee,na.rm=T)),by=fips]
# 
# measures <- merge(emp_czone,emp_naics_oc,by="fips")
# 
# measures[,x_bin:=ntile(import_sensitivity,3)]
# measures[,iv_bin:=ntile(iv_import_sensitivity,100)]
# 
# saveRDS(measures,"measures_county_levels_sic87dd.rds")
# pop_data <- list()
# 
# pop_2000 <- get_decennial(geography = "county", variables = "P001001",year = 2000)
# pop_2000 <- pop_2000[,c("GEOID","value")]
# names(pop_2000) <- c("fips","population")
# pop_2000 <- data.table(pop_2000)
# pop_2000[,fips:=as.numeric(fips)]
# pop_2000[,year:=2000]
# pop_data[[1]] <- pop_2000
# 
# #https://repository.duke.edu/catalog/f49b199b-1496-4636-91f3-36226c8e7f80
# pop_csv <- fread("county_population_2000_2010.csv")
# pop_csv <- pop_csv[year %in% 2001:2009, c("fips","tot_pop","year")]
# names(pop_csv) <- c("fips","population","year")
# pop_data[[2]] <- pop_csv
# 
# i=3
# for(yr in 2010:2019){
#   pop <- get_acs(geography = "county", variables = c("B01003_001"), year = yr)
#   pop <- data.table(pop)
#   pop <- dcast(pop,GEOID~variable,value.var = "estimate",fun.aggregate = sum)
#   names(pop) <- c("fips","population")
#   pop[,year:=yr]
#   pop_data[[i]] <- pop
#   i=i+1
# }
# 
# pop_data <- do.call(rbind,pop_data)
# 
# pop_data[,fips:=str_pad(as.character(fips),width = 5,side = "left",pad = "0")]
# 
# 
# 
# #https://apps.bea.gov/regional/downloadzip.cfm
# county_data <- fread("bea_data_1969_2021.csv")
# 
# county_data[,variable:=ifelse(Description=="Personal income (thousands of dollars)","total_income",
#                               ifelse(Description=="Population (persons) 1/","population",
#                                      ifelse(Description=="Per capita personal income (dollars) 2/","income_per_capita","")))]
# 
# county_data <- county_data[variable %in% c("income_per_capita")]
# county_data[,c("GeoName","Region","TableName","LineCode","IndustryClassification","Unit","Description"):=list(NULL)]
# setnames(county_data,"GeoFIPS","fips")
# 
# county_data <- melt(county_data,id.vars = c("fips","variable"))
# county_data[,value:=as.numeric(value)]
# county_data <- dcast(county_data,fips+variable.1~variable,value.var = "value",fun.aggregate = sum)
# county_data[,variable.1:=as.numeric(as.character(variable.1))]
# 
# setnames(county_data,"variable.1","year")
# 
# unemp <- fread("la.data.64.County.txt")
# unemp <- unemp[substr(series_id,1,5)=="LAUCN"]
# unemp[,fips:=substr(series_id,6,10)]
# unemp <- unemp[substr(series_id,19,20)=="03"]
# unemp[,value:=as.numeric(value)]
# unemp <- unemp[,.(unemp_rate=mean(value,na.rm=T)),by=.(fips,year)]
# unemp[,c("series_id","period","footnote_codes"):=list(NULL)]
# 
# county_data <- merge(county_data,unemp,by=c("fips","year"))
# county_data <- merge(county_data,pop_data,by=c("fips","year"))
# 
# saveRDS(county_data,"county_data.rds")
# sod <- list()
# i=1
# for(fl in list.files(path="C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/SOD/data",full.names = T)){
#   sod[[i]] <- fread(fl,select = c("YEAR","CERT","DEPSUMBR","DEPSUM","RSSDHCR","RSSDID","STCNTYBR","ASSET","UNINUMBR","SIMS_ESTABLISHED_DATE","SIMS_LATITUDE","SIMS_LONGITUDE","ADDRESBR"))
#   i=i+1
# }
# 
# sod <- rbindlist(sod,fill=T)
# 
# sod[,DEPSUMBR:=str_remove_all(DEPSUMBR,",")]
# sod[,DEPSUM:=str_remove_all(DEPSUM,",")]
# sod[,ASSET:=str_remove_all(ASSET,",")]
# 
# sod[,DEPSUMBR:= as.numeric(DEPSUMBR)]
# sod[,DEPSUM:= as.numeric(DEPSUM)]
# sod[,ASSET:= as.numeric(ASSET)]
# 
# sod[,fips:=str_pad(STCNTYBR,5,"left","0")]
# sod[,SIMS_ESTABLISHED_DATE:=as.Date(SIMS_ESTABLISHED_DATE,"%m/%d/%Y")]
# 
# sod[,br_id:=paste0(tolower(ADDRESBR),round(SIMS_LATITUDE,1),round(SIMS_LONGITUDE,1))]
# 
# 
# 
# 
# br <- list()
# i <- 1
# for(yr in 2001:2020){
#   temp <- sod[YEAR==yr]
#   temp_p <- sod[YEAR==(yr-1)]
#   temp_a <- sod[YEAR==(yr+1)]
#   
#   # openings <- temp[year(SIMS_ESTABLISHED_DATE)==yr]
#   openings <- temp[DEPSUMBR> 0 & !UNINUMBR %in% temp_p[DEPSUMBR> 0]$UNINUMBR]
#   openings <- rbind(openings,temp[DEPSUMBR> 0 & !br_id %in% temp_p[DEPSUMBR> 0]$br_id])
#   openings <- openings[!duplicated(openings)]
#   openings <- openings[,.(openings=.N),by=fips]
#   
#   
#   closings <- temp[DEPSUMBR> 0 & !UNINUMBR %in% temp_a[DEPSUMBR> 0]$UNINUMBR]
#   closings <- rbind(closings,temp[DEPSUMBR> 0 & !br_id %in% temp_a[DEPSUMBR> 0]$br_id])
#   closings <- closings[!duplicated(closings)]
#   closings <- closings[,.(closings=.N),by=fips]
#   
#   temp<- data.table(fips=unique(sod$fips),YEAR=yr)
#   temp <- merge(temp,openings,by="fips",all.x = T)
#   temp <- merge(temp,closings,by="fips",all.x = T)
#   temp[is.na(temp)] <- 0
#   
#   br[[i]] <- temp
#   i=i+1
# }
# 
# br <- do.call(rbind,br)
# setorder(br,fips,YEAR)
# 
# 
# saveRDS(sod,"sod_sum.rds")
# saveRDS(br,"br_open_close.rds")
measures <- readRDS("measures_county_levels_sic87dd.rds")
measures[,import_sensitivity:=(import_sensitivity-min(measures$import_sensitivity,na.rm=T)+1)]
measures[,iv_import_sensitivity:=(iv_import_sensitivity-min(measures$iv_import_sensitivity,na.rm=T)+1)]
measures[,high_x:=ntile(import_sensitivity,2)]
measures[,low_x:=2-high_x]
measures[,rev_x_bin:=3-x_bin]
measures[,is_q:=ntile(import_sensitivity,4)]
measures[,is_top_q:=ifelse(is_q==4,1,0)]
sod <- readRDS("sod_sum.rds")

asset_w <- sod[,c("RSSDID","YEAR","ASSET")]
asset_w <- asset_w[!duplicated(asset_w)]
asset_w[,tot_assets:=sum(ASSET,na.rm=T),by=YEAR]
asset_w[,w_asset:=ASSET/tot_assets]
asset_w[,q80:=quantile(ASSET,0.8,na.rm=T),by=YEAR]
asset_w[,q95:=quantile(ASSET,0.95,na.rm=T),by=YEAR]
asset_w[,size:=ifelse(ASSET <q80,1,ifelse(ASSET<q95,2,0))]

bank_import_exposure <- sod[YEAR %in% 1994:2000]

bank_import_exposure <- bank_import_exposure[,.(county_deposits=sum(DEPSUMBR,na.rm=T)),by=.(RSSDID,fips)]

bank_import_exposure <- merge( bank_import_exposure,measures,by="fips")

bank_import_exposure[,total_deposits:=sum(county_deposits,na.rm=T),by=RSSDID]

bank_import_exposure[,wa_exposure:=county_deposits*import_sensitivity/total_deposits]

bank_import_exposure <- bank_import_exposure[,.(wa_exposure=sum(wa_exposure,na.rm=T)),by=RSSDID]

bank_import_exposure[,exp_bin:=ntile(bank_import_exposure,3)]

bank_import_exposure[,exp_q:=ntile(wa_exposure,4)]
bank_import_exposure[,exp_top_q:=ifelse(exp_q==4,1,0)]
#https://www.dropbox.com/s/3y1qrylnnyn361a/hmdpanel17.zip?dl=0 Neil Bhutta website

hmda_link <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/HMDA lender/hmdapan2017.csv",select=c("code","hmprid",paste0("RSSD",10:17),paste0("RSSD0",8:9),paste0("ENTITY0",3:7)))

hmda_link[,hmda_id:=paste(code,hmprid)]

hmda_link[,RSSD:=ifelse(!is.na(RSSD17),RSSD17,
                           ifelse(!is.na(RSSD16),RSSD16,
                                  ifelse(!is.na(RSSD15),RSSD15,
                                         ifelse(!is.na(RSSD14),RSSD14,
                                                ifelse(!is.na(RSSD13),RSSD13,
                                                       ifelse(!is.na(RSSD12),RSSD12,
                                                              ifelse(!is.na(RSSD11),RSSD11,
                                                                     ifelse(!is.na(RSSD10),RSSD10,
                                                                            ifelse(!is.na(RSSD09),RSSD09,
                                                                                   ifelse(!is.na(RSSD08),RSSD08,
                                                                                          ifelse(!is.na(ENTITY07),ENTITY07,
                                                                                                 ifelse(!is.na(ENTITY06),ENTITY06,
                                                                                                        ifelse(!is.na(ENTITY05),ENTITY05,
                                                                                                               ifelse(!is.na(ENTITY04),ENTITY04,ENTITY03))))))))))))))]

hmda_link <- hmda_link[,c("hmda_id","RSSD")]
dbdir <- "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/hmda_2004_2017/hmda_2004_2017.db"
con_hmda <- dbConnect(RSQLite::SQLite(), dbdir)

years <- 2004:2017

hmda <- list()

i=1
for(i in 1:length(years)) {
  print(years[i])
  lar <- dbGetQuery(con_hmda,paste0("select
                                  lar.as_of_year as activity_year,
                                  lar.respondent_id,
                                  lar.census_tract,
                                  lar.loan_purpose,
                                  lar.loan_type,
                                  lar.action_taken,
                                  lar.agency_code,
                                  lar.owner_occupancy,
                                  lar.loan_amount_000s loan_amount,
                                  lar.applicant_income_000s applicant_income
                                from lar_",years[i]," as lar
                                where lar.action_taken in (1,3) and
                                      lar.lien_status=1 and
                                      lar.property_type=1
                                "))

  hmda[[i]] <- lar
}
## [1] 2004
## [1] 2005
## [1] 2006
## [1] 2007
## [1] 2008
## [1] 2009
## [1] 2010
## [1] 2011
## [1] 2012
## [1] 2013
## [1] 2014
## [1] 2015
## [1] 2016
## [1] 2017
rm(lar)
hmda <- do.call("rbind", hmda)
hmda <- data.table(hmda)
hmda[,hmda_id:=paste(agency_code,respondent_id)]
hmda[,fips:=substr(census_tract,1,5)]
setkey(hmda,hmda_id)

dbDisconnect(con_hmda)
hmda <- merge(hmda,hmda_link,by="hmda_id")
hmda <- merge(hmda,bank_import_exposure,by.x="RSSD",by.y="RSSDID")
hmda <- merge(hmda,measures,by="fips")
hmda[,approved:=ifelse(action_taken==1,1,0)]
hmda[,bank_fips:=paste(hmda_id,fips)]
gc()
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells    3979945   212.6    7200314   384.6    4006682   214.0
## Vcells 2088783385 15936.2 5450551913 41584.5 5446192539 41551.2
hmda <- hmda[log(1+wa_exposure)>=2 & log(1+wa_exposure)<=4 & log(import_sensitivity)>=2 & log(import_sensitivity)<=4]
sod <- readRDS("sod_sum.rds")
sod <- sod[,c("ASSET","RSSDID","YEAR")]
sod <- sod[!duplicated(sod)]
sod[,q80:=quantile(ASSET,0.8,na.rm=T),by=YEAR]
sod[,q95:=quantile(ASSET,0.95,na.rm=T),by=YEAR]
sod[,size:=ifelse(ASSET <q80,1,ifelse(ASSET<q95,2,0))]
sod[,c("q80","q95"):=list(NULL)]

hmda <- merge(hmda,sod,by.x = c("RSSD","activity_year"),by.y=c("RSSDID","YEAR"),all.x = T,allow.cartesian = T)
hmda_summary <- hmda[loan_purpose==1 & owner_occupancy==1 & action_taken==1,
                     .(no_loans=.N,
                       total_loan_amount =sum(loan_amount,na.rm=T),
                       mean_income=mean(applicant_income,na.rm=T),
                       mean_loan_amount=mean(loan_amount,na.rm=T)),by=.(activity_year,RSSD,fips)]
hmda_summary[,loan_to_income:=mean_loan_amount/mean_income]
hmda_summary <- merge(hmda_summary,bank_import_exposure,by.x="RSSD",by.y="RSSDID")
hmda_summary <- merge(hmda_summary,measures,by="fips")
hmda_summary[,bank_fips:=paste(RSSD,fips)]
hmda_summary[,fips_year:=paste(fips,activity_year)]
hmda_summary[,tot_loans:=sum(no_loans,na.rm=T),by=.(activity_year,RSSD)]
hmda_summary[,loan_frac:=no_loans/tot_loans]
hmda_summary[,tot_loan_amount_bk_yr:=sum(total_loan_amount,na.rm=T),by=.(activity_year,RSSD)]
hmda_summary[,loan_amt_frac:=total_loan_amount/tot_loan_amount_bk_yr]
hmda_summary <- merge(hmda_summary,sod,by.x = c("RSSD","activity_year"),by.y=c("RSSDID","YEAR"),all.x = T,allow.cartesian = T)

1 Loan Composition

1.1 New Purchase

Specification:

\[ log(no.loans.fips.frac) = is\_top\_q*exp\_top\_q \times Year.Dummies+Bank*County FE+YearFE \]

r <- list()
r[[1]] <- felm(log(0.00001+loan_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD,
               data=hmda_summary)
r[[2]] <- felm(log(0.00001+loan_amt_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD,
               data=hmda_summary)


coef_plot_1reg(r[[1]],"is_top_q:exp_top_q:factor(activity_year)",2004)+ggtitle("Dep. var: log(no_loan_frac")

coef_plot_1reg(r[[2]],"is_top_q:exp_top_q:factor(activity_year)",2004)+ggtitle("Dep. var: log(total_loan_amount_frac")

reg_formula <- as.formula("log(0.00001+loan_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD")

r <- list()

r[[1]] <- felm(reg_formula,data=hmda_summary[size==1])
r[[2]] <- felm(reg_formula,data=hmda_summary[size==2])
r[[3]] <- felm(reg_formula,data=hmda_summary[size==0])
# stargazer(r,type="text")


coef_plot_3reg(r[[1]],"Small banks",r[[2]],"Medium banks",r[[3]],"Large banks","is_top_q:exp_top_q:factor(activity_year)",2004)+
  ggtitle("log(loan_fraction) by bank size")

reg_formula <- as.formula("log(0.00001+loan_amt_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD")

r <- list()

r[[1]] <- felm(reg_formula,data=hmda_summary[size==1])
r[[2]] <- felm(reg_formula,data=hmda_summary[size==2])
r[[3]] <- felm(reg_formula,data=hmda_summary[size==0])
# stargazer(r,type="text")


coef_plot_3reg(r[[1]],"Small banks",r[[2]],"Medium banks",r[[3]],"Large banks","is_top_q:exp_top_q:factor(activity_year)",2004)+
  ggtitle("log(loan_amount_fraction) by bank size")

hmda_summary <- hmda[loan_purpose==3 & owner_occupancy==1 & action_taken==1,
                     .(no_loans=.N,
                       total_loan_amount =sum(loan_amount,na.rm=T),
                       mean_income=mean(applicant_income,na.rm=T),
                       mean_loan_amount=mean(loan_amount,na.rm=T)),by=.(activity_year,RSSD,fips)]
hmda_summary[,loan_to_income:=mean_loan_amount/mean_income]
hmda_summary <- merge(hmda_summary,bank_import_exposure,by.x="RSSD",by.y="RSSDID")
hmda_summary <- merge(hmda_summary,measures,by="fips")
hmda_summary[,bank_fips:=paste(RSSD,fips)]
hmda_summary[,tot_loans:=sum(no_loans,na.rm=T),by=.(activity_year,RSSD)]
hmda_summary[,loan_frac:=no_loans/tot_loans]
hmda_summary[,tot_loan_amount_bk_yr:=sum(total_loan_amount,na.rm=T),by=.(activity_year,RSSD)]
hmda_summary[,loan_amt_frac:=total_loan_amount/tot_loan_amount_bk_yr]
hmda_summary <- merge(hmda_summary,sod,by.x = c("RSSD","activity_year"),by.y=c("RSSDID","YEAR"),all.x = T,allow.cartesian = T)

1.2 Refinance

Specification:

\[ log(no.loans.fips.frac) = log(import.sensitivity)\times log(wa.exposure) \times Year.Dummies+Bank*County FE+YearFE \]

r <- list()
r[[1]] <- felm(log(0.00001+loan_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD,
               data=hmda_summary)
r[[2]] <- felm(log(0.00001+loan_amt_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD,
               data=hmda_summary)


coef_plot_1reg(r[[1]],"is_top_q:exp_top_q:factor(activity_year)",2004)+ggtitle("Dep. var: log(no_loan_frac")

coef_plot_1reg(r[[2]],"is_top_q:exp_top_q:factor(activity_year)",2004)+ggtitle("Dep. var: log(total_loan_amount_frac")

reg_formula <- as.formula("log(0.00001+loan_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD")

r <- list()

r[[1]] <- felm(reg_formula,data=hmda_summary[size==1])
r[[2]] <- felm(reg_formula,data=hmda_summary[size==2])
r[[3]] <- felm(reg_formula,data=hmda_summary[size==0])
# stargazer(r,type="text")


coef_plot_3reg(r[[1]],"Small banks",r[[2]],"Medium banks",r[[3]],"Large banks","is_top_q:exp_top_q:factor(activity_year)",2004)+
    ggtitle("log(loan_number_fraction) by bank size")

reg_formula <- as.formula("log(0.00001+loan_amt_frac)~is_top_q*exp_top_q*factor(activity_year)|bank_fips+activity_year|0|RSSD")

r <- list()

r[[1]] <- felm(reg_formula,data=hmda_summary[size==1])
r[[2]] <- felm(reg_formula,data=hmda_summary[size==2])
r[[3]] <- felm(reg_formula,data=hmda_summary[size==0])
# stargazer(r,type="text")


coef_plot_3reg(r[[1]],"Small banks",r[[2]],"Medium banks",r[[3]],"Large banks","is_top_q:exp_top_q:factor(activity_year)",2004)+
    ggtitle("log(loan_amount_fraction) by bank size")