rm(list=ls())
library(data.table)
library(fst)
library(RSQLite)
library(DBI)  
library(dplyr)
library(tidyr)
library(lfe)
library(stargazer)
library(stringr)
library(ggplot2)
library(readxl)
library(reshape2)
library(stringi)
library(zoo)
source('C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/functions.R')
fit_felm <- function(formula, data, sample_frac,drop_dv_outliers=TRUE) {
  dep_var_name <- as.character(formula[[2]])
  if(drop_dv_outliers==TRUE) {
    quantiles <- quantile(data[[dep_var_name]], c(0.01, 0.99), na.rm = TRUE)
    subset_data <- data[data[[dep_var_name]] > quantiles[1] & data[[dep_var_name]] < quantiles[2], ]
  } else {
    subset_data = data
  }
  subset_data <- subset_data[sample(nrow(subset_data), size = nrow(subset_data)*sample_frac), ]
  felm(formula, data = subset_data)
}
hmda_link_entity <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/HMDA lender/hmdapan2017.csv",select=c("code","hmprid",paste0("RSSD",12:17)))#,paste0("ENTITY",15:16)
hmda_link_entity[,hmda_id:=paste0(code,"-",hmprid)]
hmda_link_entity[,c("code","hmprid"):=list(NULL)]
hmda_link_entity <- melt(hmda_link_entity,id.vars = c("hmda_id"))
hmda_link_entity <- data.table(hmda_link_entity)
hmda_link_entity <- hmda_link_entity[!is.na(value)]
hmda_link_entity[,year:=as.character(variable)]
hmda_link_entity[,year:=substr(year,5,6)]
setnames(hmda_link_entity,"value","RSSD")
hmda_link_entity[,variable:=NULL]
hmda_link_entity[,year:=as.numeric(year)]
hmda_link_entity[,year:=ifelse(year>85,1900+year,2000+year)]


hmda_link_2021 <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/HMDA lender/hmdapan2021.csv",select=c("LEI","RSSD21","CODE21","HMPRID"))

hmda_link_2021[,hmda_id:=paste0(CODE21,"-",HMPRID)]
hmda_link_2021 <- hmda_link_2021[,c("LEI","RSSD21","hmda_id")]
gse_limit_files <- list.files("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/GSE Limits",full.names = T,pattern = ".xls")
gse_limits <- list()
i=1
for(fl in gse_limit_files) {
  yr <- substr(fl,117,120)
  if(yr<=2014) {
    temp <- read_xls(fl,sheet = 1,skip=2,col_names = F)  
  } else{
    temp <- read_xlsx(fl,sheet = 1,skip=2,col_names = F)  
  }
  
  names(temp) <- c("statefips","countyfips","countyname","cbsa","statecode","gse_limit","gse_limit_2","gse_limit_3","gse_limit_4")
  temp <- data.table(temp)
  temp[,year:=yr]
  gse_limits[[i]] <- temp
  i=i+1
}

gse_limits <- rbindlist(gse_limits)
gse_limits[,gse_limit:=floor(gse_limit/1000)]
gse_limits[,year:=as.numeric(year)]



gse_limits[,county:=paste0(statefips,countyfips)]

gse_limits <- gse_limits[,c("county","year","gse_limit")]

setorder(gse_limits,county,year)
gse_limits[,gse_limit_1:=lag(gse_limit),by=county]
gse_limits[,gse_limit_2:=lead(gse_limit),by=county]

gse_limits[,limit_change:=gse_limit-gse_limit_1]
zrd <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Zillow Research Data/County_zhvi_uc_sfrcondo_tier_0.67_1.0_sm_sa_month.csv")


zrd[,fips:=paste0(str_pad(StateCodeFIPS,2,pad="0"),str_pad(MunicipalCodeFIPS,3,pad="0"))]
zrd[,c("RegionID","RegionName","SizeRank","StateName","RegionType","State","Metro","StateCodeFIPS","MunicipalCodeFIPS")] <- NULL
zrd <- melt(zrd,id.vars = c("fips"))
zrd$month <- as.Date(as.character(zrd$variable),origin = "1970-01-01")
zrd$variable <- NULL
names(zrd) <- c("county","zhvi","month")
zrd <- data.table(zrd)
zrd[,asofdate:=year(month)]

zrd <- zrd[,.(zhvi=mean(zhvi,na.rm=T)),by=.(county,asofdate)]

zrd <- merge(zrd,gse_limits,by.x=c("county","asofdate"),by.y=c("county","year"))
zrd[,zhvi:=zhvi/1000]
zrd[,above_conforming_limit:=ifelse(zhvi>gse_limit,1,0)]

above_conforming_limit <- zrd[,.(above_conforming_limit=sum(above_conforming_limit,na.rm=T)),by=asofdate]
gse_limits_yr <- gse_limits[,.(
  gse_limit=median(gse_limit,na.rm=T),
  limit_change=median(limit_change,na.rm=T)),
  by=year]


gse_limits_yr <- merge(gse_limits_yr,above_conforming_limit,by.x="year",by.y="asofdate")

setnames(gse_limits_yr,"gse_limit","gse_limit_year")
data <- gse_limits[year %in% c(2016:2023),.(.N),by=.(gse_limit,year)]

years <- unique(data$year)

# Initialize an empty list to store the data.tables
data_tables_list <- list()

# Loop through each year, filter the data, and store in the list
for (yr in years) {
  # Filter data for the current year
  dt_year <- data[year == yr, .(gse_limit, N)]
  
  # Add an 'id' column that is simply the row number

  

  
  # Rearrange columns to have 'id' as the first column
  
  setorder(dt_year,gse_limit)
  
  other <- dt_year[N==1]
  dt_year <- dt_year[N>1]
  new_row <- setNames(as.list(rep(NA, ncol(dt_year))), names(dt_year))
  new_row[["gse_limit"]] <- -1
  new_row[["N"]] <- nrow(other)

# Convert the list to a data.table and append
  dt_year <- rbind(dt_year, as.data.table((new_row)))
  dt_year[, id := .I]
  
  setnames(dt_year, "gse_limit", paste0("gse_limit_", yr))
  setnames(dt_year, "N", paste0("N_", yr))
  
  
  # Add the data.table to the list, with the name being the year
  data_tables_list[[as.character(yr)]] <- dt_year
}

merged_data <- Reduce(function(x, y) merge(x, y, by = "id", all = TRUE), data_tables_list)

print(merged_data)
##     id gse_limit_2016 N_2016 gse_limit_2017 N_2017 gse_limit_2018 N_2018
##  1:  1            417   3000            424   2996            453   3014
##  2:  2            426      6            426      6            458     17
##  3:  3            437     20            433      3            483      2
##  4:  4            458     27            437      6            494     14
##  5:  5            474      5            458     17            517     11
##  6:  6            483      2            466     14            529     13
##  7:  7            517      7            483      2            535     20
##  8:  8            523      7            488      4            600      2
##  9:  9            529      4            493     10            603      7
## 10: 10            535     20            517      7            615      2
## 11: 11            540      3            529      4            625     11
## 12: 12            600      3            535     20            667      3
## 13: 13            625    112            592      3            679    103
## 14: 14            657      2            598      7             -1     15
## 15: 15             -1     16            600      2             NA     NA
## 16: 16             NA     NA            625     11             NA     NA
## 17: 17             NA     NA            636    103             NA     NA
## 18: 18             NA     NA            657      2             NA     NA
## 19: 19             NA     NA             -1     17             NA     NA
##     gse_limit_2019 N_2019 gse_limit_2020 N_2020 gse_limit_2021 N_2021
##  1:            484   3035            510   3030            548   3062
##  2:            517      7            520      7            586     13
##  3:            529      3            529      2            596     10
##  4:            534     14            535     20            598      4
##  5:            535     20            563     13            600      2
##  6:            552      4            569      4            625      7
##  7:            561     10            575     10            646      4
##  8:            600      2            600      2            724      7
##  9:            625     11            625     10            726      2
## 10:            688      7            646      4            739      2
## 11:            718      2            690      8            765      2
## 12:            726    106            726      2            776      3
## 13:             -1     13            741      3            817      2
## 14:             NA     NA            762      2            822    102
## 15:             NA     NA            765    102             -1     11
## 16:             NA     NA             -1     14             NA     NA
## 17:             NA     NA             NA     NA             NA     NA
## 18:             NA     NA             NA     NA             NA     NA
## 19:             NA     NA             NA     NA             NA     NA
##     gse_limit_2022 N_2022 gse_limit_2023 N_2023
##  1:            647   3074            726   3073
##  2:            648      2            740      2
##  3:            675      4            744      4
##  4:            684     10            763      4
##  5:            694     13            787     10
##  6:            726      2            828      7
##  7:            770      7            890     13
##  8:            856      2            948      3
##  9:            891      3            977      4
## 10:            970    102           1089    103
## 11:             -1     14             -1     11
## 12:             NA     NA             NA     NA
## 13:             NA     NA             NA     NA
## 14:             NA     NA             NA     NA
## 15:             NA     NA             NA     NA
## 16:             NA     NA             NA     NA
## 17:             NA     NA             NA     NA
## 18:             NA     NA             NA     NA
## 19:             NA     NA             NA     NA
stargazer(merged_data,summary = F,type="text")
## 
## =====================================================================================================================================================================================
##    id gse_limit_2016 N_2016 gse_limit_2017 N_2017 gse_limit_2018 N_2018 gse_limit_2019 N_2019 gse_limit_2020 N_2020 gse_limit_2021 N_2021 gse_limit_2022 N_2022 gse_limit_2023 N_2023
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## 1  1       417       3,000       424       2,996       453       3,014       484       3,035       510       3,030       548       3,062       647       3,074       726       3,073 
## 2  2       426         6         426         6         458         17        517         7         520         7         586         13        648         2         740         2   
## 3  3       437         20        433         3         483         2         529         3         529         2         596         10        675         4         744         4   
## 4  4       458         27        437         6         494         14        534         14        535         20        598         4         684         10        763         4   
## 5  5       474         5         458         17        517         11        535         20        563         13        600         2         694         13        787         10  
## 6  6       483         2         466         14        529         13        552         4         569         4         625         7         726         2         828         7   
## 7  7       517         7         483         2         535         20        561         10        575         10        646         4         770         7         890         13  
## 8  8       523         7         488         4         600         2         600         2         600         2         724         7         856         2         948         3   
## 9  9       529         4         493         10        603         7         625         11        625         10        726         2         891         3         977         4   
## 10 10      535         20        517         7         615         2         688         7         646         4         739         2         970        102       1,089       103  
## 11 11      540         3         529         4         625         11        718         2         690         8         765         2          -1         14         -1         11  
## 12 12      600         3         535         20        667         3         726        106        726         2         776         3                                               
## 13 13      625        112        592         3         679        103         -1         13        741         3         817         2                                               
## 14 14      657         2         598         7          -1         15                              762         2         822        102                                              
## 15 15       -1         16        600         2                                                     765        102         -1         11                                              
## 16 16                            625         11                                                     -1         14                                                                    
## 17 17                            636        103                                                                                                                                      
## 18 18                            657         2                                                                                                                                       
## 19 19                             -1         17                                                                                                                                      
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
ggplot(gse_limits_yr, aes(x=year)) +
  geom_col(aes(y=gse_limit_year),fill="dodgerblue",alpha=0.5) +
  scale_x_continuous(breaks = unique(gse_limits_yr$year))+
  labs(x="",y="GSE limit ($'000)")+
  theme_minimal()

ggplot(gse_limits_yr, aes(x=year)) +
  geom_col(aes(y=above_conforming_limit),fill="dodgerblue",alpha=0.5) +
  scale_x_continuous(breaks = unique(gse_limits_yr$year))+
  labs(x="",y="Number of counties")+
  theme_minimal()

ggplot(gse_limits_yr, aes(x=year, y=limit_change)) +
  geom_col(filll="dodgerblue",size=2) +
  # geom_point(size=3,color="dodgerblue4")+
  scale_x_continuous(breaks = unique(gse_limits_yr$year))+
  labs(x="",y="GSE limit Change ($'000)",title = "GSE Limit History")+
  theme_minimal()

hmda_con <- dbConnect(RSQLite::SQLite(), "C:/Users/dratnadiwakara2/Downloads/HMDA/hmda.db")

yrs <- as.character(2012:2017)
hmda <- list()
i=1
for(yr in yrs) {
  print(yr)
  hmda[[i]] <- data.table(dbGetQuery(hmda_con,
                                     paste0("select
                                                agencycode,
                                                respondentid,
                                                asofdate,
                                                purposeofloan,
                                                typeofpurchaser,
                                                amountofloan,
                                                censustract,
                                                applicantincome,
                                                typeofloan,
                                                actiontaken,
                                                denialreason1,
                                                denialreason2,
                                                denialreason3
                                            from lar_",yr," 
                                            where 
                                            actiontaken in (1,3,6) and
                                            propertytype==1")))
  i=i+1
}
## [1] "2012"
## [1] "2013"
## [1] "2014"
## [1] "2015"
## [1] "2016"
## [1] "2017"
hmda <- rbindlist(hmda)
hmda <- data.table(hmda)
hmda[,county:=substr(censustract,1,5)]


hmda <- merge(hmda,gse_limits,by.x=c("asofdate","county"),by.y=c("year","county"),all.x=T)
hmda <- merge(hmda,gse_limits_yr,by.x="asofdate",by.y="year")
hmda[,gse_limit:=ifelse(is.na(gse_limit),gse_limit_year,gse_limit)]

hmda[,amountofloan:=as.numeric(amountofloan)]
hmda[,dist_gse_limit:=amountofloan-gse_limit]

hmda[,jumbo_this_year:=ifelse(amountofloan>gse_limit,1,0)]
hmda[,jumbo_last_year:=ifelse(amountofloan>gse_limit_1,1,0)]
hmda[,jumbo_next_year:=ifelse(amountofloan>gse_limit_2,1,0)]

hmda[,hmda_id:=paste0(agencycode,"-",respondentid)]

hmda <- merge(hmda,hmda_link_entity,by.x=c("asofdate","hmda_id"),by.y=c("year","hmda_id"))
hmda <- hmda[RSSD>0 & !is.na(RSSD)]
gc()
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells    2724408   145.5    4748061   253.6    4748061   253.6
## Vcells 1929342739 14719.8 4822981824 36796.5 4799716535 36619.0
yrs <- 2018:2022
hmda_post <- list()
i=1
for(yr in yrs) {
  print(yr)
  hmda_post[[i]] <- data.table(dbGetQuery(hmda_con,
                                     paste0("select
                                                lei,
                                                asofdate,
                                                purposeofloan,
                                                typeofpurchaser,
                                                amountofloan,
                                                censustract,
                                                actiontaken,
                                                conforming_loan_limit,
                                                combined_loan_to_value_ratio,
                                                interest_rate,
                                                rate_spread,
                                                total_loan_costs,
                                                debt_to_income_ratio,
                                                typeofloan,
                                                applicant_age,
                                                applicantincome,
                                                denialreason1,
                                                denialreason2,
                                                denialreason3
                                            from lar_",yr," 
                                            where 
                                            actiontaken in (1,3,6) and
                                            derived_dwelling_category='Single Family (1-4 Units):Site-Built'")))
  i=i+1
}
## [1] 2018
## [1] 2019
## [1] 2020
## [1] 2021
## [1] 2022
hmda_post <- rbindlist(hmda_post)
hmda_post <- data.table(hmda_post)
hmda_post[,county:=substr(censustract,1,5)]


hmda_post <- merge(hmda_post,gse_limits,by.x=c("asofdate","county"),by.y=c("year","county"),all.x=T)
hmda_post <- merge(hmda_post,gse_limits_yr,by.x="asofdate",by.y="year")
hmda_post[,gse_limit:=ifelse(is.na(gse_limit),gse_limit_year,gse_limit)]

hmda_post[,amountofloan:=as.numeric(amountofloan)]
hmda_post[,dist_gse_limit:=amountofloan-gse_limit]

hmda_post[,jumbo_this_year:=ifelse(amountofloan>gse_limit,1,0)]
hmda_post[,jumbo_last_year:=ifelse(amountofloan>gse_limit_1,1,0)]
hmda_post[,jumbo_next_year:=ifelse(amountofloan>gse_limit_2,1,0)]

hmda_post <- merge(hmda_post,hmda_link_2021,by.x=c("lei"),by.y=c("LEI"))

hmda_post <- hmda_post[RSSD21>0 & !is.na(RSSD21)]

hmda_post[,purposeofloan:=ifelse(purposeofloan>10,3,purposeofloan)]
setnames(hmda_post,"RSSD21","RSSD")
gc()
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells    2967015   158.5    4748061   253.6    4748061   253.6
## Vcells 3961287347 30222.3 6945269825 52988.3 6938149716 52933.9
# hmda_1_summary <- hmda[actiontaken==1,.(no_loans=.N,total_loan_amt=sum(amountofloan)),
#                              by=.(RSSD,asofdate)]
# 
# hmda_2_summary <- hmda_post[actiontaken==1,.(no_loans=.N,total_loan_amt=sum(amountofloan)),
#                              by=.(RSSD,asofdate)]


jumbo_100k_by_lender <- hmda[  actiontaken==1 & typeofloan==1 & 
                               dist_gse_limit>10 & 
                               asofdate %in% 2012:2016,
                             .(no_of_jumbo_loans=.N,
                               total_jumbo_loan_amt=sum(amountofloan)),
                             by=RSSD]

all_by_lender <- hmda[asofdate %in% 2012:2016 & actiontaken==1 & typeofloan==1  ,
                             .(all_no_of_loans=.N,
                               all_total_loan_amt=sum(amountofloan)),
                             by=RSSD]

jumbo_100k_by_lender <- merge(jumbo_100k_by_lender,all_by_lender,by="RSSD")

jumbo_100k_by_lender[,no_of_jumbo_loans_pct:=no_of_jumbo_loans*100/all_no_of_loans]
jumbo_100k_by_lender[,total_jumbo_loan_amt_pct:=total_jumbo_loan_amt*100/all_total_loan_amt]



HHI <- hmda[  actiontaken==1 & typeofloan==1 & 
                               asofdate %in% 2012:2016,.(amountofloan=sum(amountofloan,na.rm=T)),
                    by=.(RSSD,county)]

HHI[, total_loan_by_rssd_asofdate := sum(amountofloan), by = .(RSSD)]
HHI[, market_share := amountofloan / total_loan_by_rssd_asofdate]
HHI[, squared_market_share := market_share^2, by = .(RSSD, county)]


HHI <- HHI[market_share>0, .(HHI = sum(squared_market_share),.N), by = .(RSSD)]

jumbo_100k_by_lender <- merge(jumbo_100k_by_lender,HHI,by="RSSD",all.x=T)
con_call <- dbConnect(RSQLite::SQLite(), "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Call Reports/call_reports.db")

con_ubpr <- dbConnect(RSQLite::SQLite(),  "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Call Reports/ubpr.db")


data_periods <- apply(expand.grid(c("0331","0630","0930","1231"), 2012:2023), 1, paste, collapse="")
# data_periods <- c(data_periods,"03312023")
# data_periods <- apply(expand.grid(c("1231"), 2022), 1, paste, collapse="")

upbr <- list()

i=1
for (dp in data_periods) {
  ubpr_dp <- as.Date(dp,"%m%d%Y")
  ubpr_yr <- year(ubpr_dp)
    
  ubpr_data <- dbGetQuery(con_ubpr,paste0("select 
                                        UBPR1754 htm_cost,
                                        UBPR1771 htm_value,
                                        UBPRE566 htm_assets_ubpr,
                                        UBPRE630 roe,
                                        UBPRE565 afs_assets_upbr,
                                        UBPRKX40 nii_assets,
                                        UBPRE023 nii_earnings_assets,
                                        UBPRE002 interest_expense_assets_1,
                                        UBPRD087 asset_growth,
                                        UBPRE162 deposit_growth_qt,
                                        UBPRE209 deposit_growth_yr,
                                        UBPR1410 real_estate_loans,
                                        UBPR1766 commercial_industrial_loans,
                                        UBPR2122 total_loans,
                                        UBPR2170 total_assets,
                                        UBPR2746 cre_construction_land_dev_loans,
                                        UBPRD214 re_loans_sfr,
                                        UBPRD220 sfr_first_lein_ma,
                                        UBPRE006 provisions_to_assets,
                                        UBPRE397 re_loans_net_loss,
                                        UBPRE401 sfr_net_loss,
                                        UBPRE408 commercial_industrial_net_loss,
                                        UBPRE410 loans_to_individuals_net_loss,
                                        UBPR3210 total_equity,
                                        UBPR0081+UBPR0071 cash,
                                        UBPRD588 total_securities,
                                        UBPRB559 mortgage_backed_securities,
                                        UBPRB558 govt_agency_securities,
                                        UBPR2200 total_deposits,
                                        UBPRE660 risk_weighted_assets,
                                        UBPRE678 interest_income_assets,
                                        UBPRE679 interest_expense_assets_2,
                                        UBPR7414 delinquent_to_loans,
                                        UBPR7400 personnel_expense_to_assets,
                                        UBPRE090 assets_per_employee,
                                        UBPRD667+UBPRD669 pastdue_nonaccrual_loans,
                                        UBPRK437+UBPRK426 time_deposits,
                                        UBPRA549+UBPRA555+UBPRA564+UBPRA570 assets_maturity_less_than_3_months,
                                        UBPRA550+UBPRA556+UBPRA248+UBPRA565+UBPRA571+UBPRA247 assets_maturity_3_mn_1yr,
                                        UBPRA551+UBPRA557+UBPRA561+UBPRA566+UBPRA572 assets_maturity_1_to_3_yr,
                                        UBPRA552+UBPRA558+UBPRA562+UBPRA567+UBPRA573 assets_maturity_3_to_5_yr,
                                        UBPRA553+UBPRA559+UBPRA568+UBPRA574 assets_maturity_5_to_15_yr,
                                        UBPRA554+UBPRA560+UBPRA569+UBPRA575 assets_maturity_more_than_15_yr,
                                        UBPRA549+UBPRA555 security_maturity_less_than_3_months,
                                        UBPRA550+UBPRA556 security_maturity_3_mn_1yr,
                                        UBPRA551+UBPRA557 security_maturity_1_to_3_yr,
                                        UBPRA552+UBPRA558 security_maturity_3_to_5_yr,
                                        UBPRA553+UBPRA559 security_maturity_5_to_15_yr,
                                        UBPRA554+UBPRA560 security_maturity_more_than_15_yr,
                                        UBPRA564 re_loans_maturity_less_than_3_months,
                                        UBPRA570 other_loans_maturity_less_than_3_months,
                                        UBPRA565 re_loans_maturity_3_mn_1yr,
                                        UBPRA571 other_loans_maturity_3_mn_1yr,
                                        UBPRA566 re_loans_maturity_1_to_3_yr,
                                        UBPRA572 other_loans_maturity_1_to_3_yr,
                                        UBPRA567 re_loans_maturity_3_to_5_yr,
                                        UBPRA573 other_loans_maturity_3_to_5_yr,
                                        UBPRA568 re_loans_maturity_5_to_15_yr,
                                        UBPRA574 other_loans_maturity_5_to_15_yr,
                                        UBPRA569 re_loans_maturity_more_than_15_yr,
                                        UBPRA575 other_loans_maturity_more_than_15_yr,
                                        UBPRD535 deposits_over_1_yr,
                                        UBPRD536 deposits_over_3_yr,
                                        UBPRE485+UBPRE486 mtg_delinq_pct,
                                        UBPRE487+UBPRE488 mtg_delinq_pct_2,
                                        UBPRD488 total_capital_ratio,
                                        UBPR3833 unused_commitments_total_gt1yr,
                                        UBPRE262 unused_commitments_assets_home_equity,
                                        UBPRE263 unused_commitments_assets_credit_card,
                                        UBPRE264+UBPRE265+UBPRE225 unused_commitments_assets_cre,
                                        UBPRE218 unused_commitments_assets_sfr_construction,
                                        UBPRE266 unused_commitments_assets_all_other,
                                        UBPRE262+UBPRE263+UBPRE264+UBPRE265+UBPRE225+UBPRE218+UBPRE266 unused_commitments_assets_total,
                                        IDRSSD
                                      from ubpr_",ubpr_yr," 
                                      where 
                                      data_period=",as.numeric(ubpr_dp),""))
  
  call_1 <- dbGetQuery(con_call,paste0("select 
                                    RCON2200 deposits_domestic,
                                    RCONF049 deposits_domestic_insured,
                                    RCONF051 deposits_domestic_uninsured,
                                    RCONF050 deposits_domestic_no_insured,
                                    RCONF052 deposits_domestic_no_uninsured,
                                    RCON6631 deposits_domestic_non_interest,
                                    RCON2210 demand_deposits,
                                    RCON1797+RCON5367+RCON5368+RCON1460+RCONF158 secured_by_residential_real_estate,
                                    RCONF160+RCONF161+RCONF159 secured_by_nonresidential_real_estate,
                                    RCONB538+RCONB539+RCONK137+RCONK207 consumer_loans,
                                    RCON0010 cash_due_from_depositories,
                                    RCON0090 balance_due_from_fed,
                                    RCON5565+RCON5567+RCON5569 smb_secured_by_re,
                                    RCON5571+RCON5573+RCON5575 smb_ci,
                                    IDRSSD 
                                 from call_1_",dp," "))


  call_2 <- dbGetQuery(con_call,paste0("select 
                                        FDIC_Certificate_Number,
                                        Financial_Institution_Name,
                                        Financial_Institution_State,
                                        data_period,
                                        RIADC017 data_processing_expenses,
                                        RIAD4135 salaries_employee_benefits,
                                        RIAD4150 no_of_full_time_employees,
                                        IDRSSD 
                                     from call_2_",dp," "))
  
  
  ubpr_data <- merge(ubpr_data,call_2,by="IDRSSD")
  ubpr_data <- merge(ubpr_data,call_1,by="IDRSSD")
  ubpr_data <- data.table(ubpr_data)

  upbr[[i]] <- ubpr_data

  i=i+1
}

upbr <- rbindlist(upbr,fill=T)

dbDisconnect(con_call)
dbDisconnect(con_ubpr)

upbr[,wa_maturiy_loans:=(re_loans_maturity_less_than_3_months*0.25+re_loans_maturity_3_mn_1yr*0.75+re_loans_maturity_1_to_3_yr*2+re_loans_maturity_3_to_5_yr*4+re_loans_maturity_5_to_15_yr*10+re_loans_maturity_more_than_15_yr*20 + other_loans_maturity_less_than_3_months*0.25+other_loans_maturity_3_mn_1yr*0.75+other_loans_maturity_1_to_3_yr*2+other_loans_maturity_3_to_5_yr*4+other_loans_maturity_5_to_15_yr*10+other_loans_maturity_more_than_15_yr*20)/(re_loans_maturity_less_than_3_months+re_loans_maturity_3_mn_1yr+re_loans_maturity_1_to_3_yr+re_loans_maturity_3_to_5_yr+re_loans_maturity_5_to_15_yr+re_loans_maturity_more_than_15_yr+other_loans_maturity_less_than_3_months+other_loans_maturity_3_mn_1yr+other_loans_maturity_1_to_3_yr+other_loans_maturity_3_to_5_yr+other_loans_maturity_5_to_15_yr+other_loans_maturity_more_than_15_yr)]

upbr[,wa_maturiy_assets:=(assets_maturity_less_than_3_months*0.25+assets_maturity_3_mn_1yr*0.75+assets_maturity_1_to_3_yr*2+assets_maturity_3_to_5_yr*4+assets_maturity_5_to_15_yr*10+assets_maturity_more_than_15_yr*20)/(assets_maturity_less_than_3_months+assets_maturity_3_mn_1yr+assets_maturity_1_to_3_yr+assets_maturity_3_to_5_yr+assets_maturity_5_to_15_yr+assets_maturity_more_than_15_yr)]

upbr[,wa_maturiy_security:=(security_maturity_less_than_3_months*0.25+security_maturity_3_mn_1yr*0.75+security_maturity_1_to_3_yr*2+security_maturity_3_to_5_yr*4+security_maturity_5_to_15_yr*10+security_maturity_more_than_15_yr*20)/(security_maturity_less_than_3_months+security_maturity_3_mn_1yr+security_maturity_1_to_3_yr+security_maturity_3_to_5_yr+security_maturity_5_to_15_yr+security_maturity_more_than_15_yr)]

# upbr[,interest_deposits:=total_assets*interest_expense_assets/total_deposits]
upbr[,time_deposit_frac:=time_deposits/total_deposits]
upbr[,deposits_over_1_yr_frac:=deposits_over_1_yr/total_deposits]
upbr[,deposits_over_3_yr_frac:=deposits_over_3_yr/total_deposits]
upbr[,pastdue_nonaccrual_loans_to_loans:=pastdue_nonaccrual_loans*100/total_loans]
upbr[,re_loans_sfr_assets:=re_loans_sfr/total_assets]
upbr[,data_processing_expenses_nii:=((data_processing_expenses*100)/total_assets)/nii_assets]
upbr[,ci_assets:=commercial_industrial_loans/total_assets]
upbr[,uninsured_deposits_assets:=deposits_domestic_uninsured/total_assets]

upbr[,mbs_assets:=mortgage_backed_securities/total_assets]
upbr[,govt_assets:=govt_agency_securities/total_assets]

upbr[,smb_re_assets:=smb_secured_by_re/total_assets]
upbr[,smb_ci_assets:=smb_ci/total_assets]


upbr[,creconst_assets:=cre_construction_land_dev_loans/total_assets]
upbr_2016 <- upbr[data_period=="12312016"]
upbr_2016 <- merge(upbr_2016,jumbo_100k_by_lender,by.x="IDRSSD",by.y="RSSD")
upbr_2016[,re_loan_frac_assets:=sfr_first_lein_ma*100/total_assets]
upbr_2016[,jumbo_frac_assets:=total_jumbo_loan_amt*100/(total_assets)]
upbr_2016[,re_loan_dependance:=ntile(re_loan_frac_assets,100)]
upbr_2016[,jumbo_dependance:=ntile(jumbo_frac_assets,4)]
upbr_2016[,jumbo_dependance:=ifelse(jumbo_frac_assets>15 & jumbo_frac_assets<25,5,
                                    ifelse(jumbo_frac_assets>=25,6,jumbo_dependance))]

upbr_2016[,jumbo_7590:=ntile(jumbo_frac_assets,100)]
upbr_2016[,jumbo_7590:=ifelse(jumbo_7590<75,1,ifelse(jumbo_7590<90,2,3))]

upbr_2016[,loans_assets:=total_loans/total_assets]
upbr_2016[,deposits_assets:=total_deposits/total_assets]
upbr_2016[,securities_assets:=total_securities/total_assets]

ggplot(upbr_2016[jumbo_frac_assets<50],aes(x=jumbo_frac_assets,fill=factor(jumbo_7590)))+geom_histogram(alpha=0.5,bins=60)

high_jumbo_cutoff <- 4:6
ggplot(upbr_2016[jumbo_frac_assets < 50], aes(x = jumbo_frac_assets, fill = factor(jumbo_7590))) +
  geom_histogram(alpha = 0.75, bins = 60) +
  scale_fill_manual(values = c("skyblue", "dodgerblue", "dodgerblue4"), 
                    labels = c("Less than 75th %tile", "75-90th %tile", "Greater than 90th %tile"),
                    name = "")+
  labs(x="Jumbo loans 2012-2016/Assets",y="Number of banks")+
   theme_minimal()+
  theme(legend.position = "bottom")

bank_list <- upbr_2016[,c("IDRSSD","Financial_Institution_Name","total_assets","N","HHI","jumbo_frac_assets")]
setorder(bank_list,-jumbo_frac_assets)
# write.csv(bank_list,"C:/Users/dratnadiwakara2/Downloads/bank_list.csv")
library(DescTools)

round_values <- function(x) {
  ifelse(x > 100, round(x, 0), ifelse(x > 10, round(x, 1), round(x, 2)))
}

upbr_2016[!is.na(jumbo_7590) ,.N,by=jumbo_7590]
##    jumbo_7590    N
## 1:          1 2684
## 2:          2  540
## 3:          3  396
columns_to_include <- c('total_assets','loans_assets','deposits_assets','re_loan_frac_assets','jumbo_frac_assets','securities_assets','nii_assets','interest_expense_assets_1',"HHI","N") #,'deposits_state_hhi','deposits_college_hhi','deposits_income_hhi'

# result_table <- bank_wa_mean_data[!is.na(bank_size), lapply(.SD, function(x) list(Mean = mean(x, na.rm = TRUE))), by = bank_size, .SDcols = columns_to_include]
result_table <- upbr_2016[!is.na(jumbo_7590) , lapply(.SD, function(x) {
  x_winsorized <- Winsorize(x, probs = c(0.05, 0.95), na.rm = TRUE)
  list(Winsorized_Mean = mean(x_winsorized, na.rm = TRUE))
}), by = jumbo_7590, .SDcols = columns_to_include]

setorder(result_table,jumbo_7590)

transposed_result_table <- t(result_table)

colnames(transposed_result_table) <- unlist(transposed_result_table[1,])

transposed_result_table <- transposed_result_table[-1,]

transposed_result_table <- cbind(rownames(transposed_result_table), transposed_result_table)

transposed_result_table <- as.data.table(transposed_result_table)

transposed_result_table <- data.frame(lapply(transposed_result_table, function(x) unlist(x, use.names = FALSE)))

transposed_result_table <- data.table(transposed_result_table)

numeric_cols <- names(transposed_result_table)[sapply(transposed_result_table, is.numeric)]

transposed_result_table[, (numeric_cols) := lapply(.SD, round_values), .SDcols = numeric_cols]

# print(transposed_result_table)

stargazer(transposed_result_table,summary=F,type="text",rownames = F)
## 
## ===================================================
## V1                          X1      X2       X3    
## ---------------------------------------------------
## total_assets              743,364 903,420 1,061,157
## loans_assets               0.660   0.740    0.770  
## deposits_assets            0.840   0.830    0.820  
## re_loan_frac_assets       15.400  21.400   28.600  
## jumbo_frac_assets          2.980   9.370   23.600  
## securities_assets          0.200   0.130    0.110  
## nii_assets                 3.380   3.450    3.340  
## interest_expense_assets_1  0.390   0.460    0.510  
## HHI                        0.350   0.340    0.380  
## N                         32.900  49.300   74.300  
## ---------------------------------------------------
hmda[,jumbo:=ifelse(dist_gse_limit>1,1,ifelse(dist_gse_limit< -1,0,NA))]
hmda_post[,jumbo:=ifelse(dist_gse_limit>1,1,ifelse(dist_gse_limit< -1,0,NA))]


hmda_1_summary <- hmda[actiontaken==1 & purposeofloan %in% c(1,3) & typeofloan==1 ,.(no_loans=.N,total_loan_amt=sum(amountofloan)),
                             by=.(RSSD,asofdate,jumbo)]

hmda_2_summary <- hmda_post[actiontaken==1 & purposeofloan %in% c(1,3) & typeofloan==1   ,.(no_loans=.N,total_loan_amt=sum(amountofloan)),
                             by=.(RSSD,asofdate,jumbo)]


hmda_summary <- rbind(hmda_1_summary,hmda_2_summary)

hmda_summary <- merge(hmda_summary,upbr_2016[,c("IDRSSD","jumbo_dependance","re_loan_frac_assets","jumbo_frac_assets","total_assets","jumbo_7590")],
                      by.x="RSSD",by.y="IDRSSD",all.x=T)

hmda_summary[,new_loans_assets:=total_loan_amt/total_assets]

hmda_summary[,log_total_loan_amt:=log(1+total_loan_amt)]

hmda_summary[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]
hmda_summary[,high_jumbo_assets2:=ifelse(jumbo_dependance %in% 5:6,1,0)]
hmda_summary_plot <- hmda_summary[asofdate<2020 & asofdate>=2014]

# hmda_summary_plot[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]

hmda_summary_plot <- hmda_summary_plot[!is.na(jumbo_7590) & !is.na(jumbo),.(normalized_amount=sum(total_loan_amt,na.rm=T)),by=.(asofdate,jumbo_7590,jumbo)]

hmda_summary_plot[, base_amount := normalized_amount[asofdate == 2014], by = .(jumbo_7590,jumbo)]

hmda_summary_plot[,normalized_amount:=normalized_amount/base_amount]

g1 <- ggplot(hmda_summary_plot[jumbo==1 & !is.na(jumbo_7590)],
       aes(x=asofdate,y=normalized_amount,color=factor(jumbo_7590)))+
  geom_line()+
  scale_color_manual(values = c("skyblue", "dodgerblue", "dodgerblue4"), 
                    labels = c("Less than 75th %tile", "75-90th %tile", "Greater than 90th %tile"),
                    name = "")+
  labs(x="",y="Loan amount (normalized by 2016)")+
  theme_minimal()+
  theme(legend.position="bottom")+ggtitle("Jumbo")

g2 <- ggplot(hmda_summary_plot[jumbo==0 & !is.na(jumbo_7590)],
       aes(x=asofdate,y=normalized_amount,color=factor(jumbo_7590)))+
  geom_line()+
  scale_color_manual(values = c("skyblue", "dodgerblue", "dodgerblue4"), 
                    labels = c("Less than 75th %tile", "75-90th %tile", "Greater than 90th %tile"),
                    name = "")+
  labs(x="",y="")+
  theme_minimal()+
  theme(legend.position="bottom")+ggtitle("Non-Jumbo")

library(gridExtra)
grid.arrange(g1,g2,nrow=1)

r <- list()
r[[1]] <- felm(log_total_loan_amt~log(jumbo_frac_assets)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=hmda_summary[asofdate %in% 2012:2021 & jumbo==1 ])
r[[2]] <- felm(log_total_loan_amt~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=hmda_summary[asofdate %in% 2012:2021 & jumbo==1 ])
r[[3]] <- felm(log_total_loan_amt~log(jumbo_frac_assets)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=hmda_summary[asofdate %in% 2012:2021 & jumbo==0 ])
r[[4]] <- felm(log_total_loan_amt~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=hmda_summary[asofdate %in% 2012:2021 & jumbo==0 ])

stargazer(r,type="text",no.space = T,column.labels = c("Jumbo","Conforming"),column.separate = c(2,2),omit.stat = "ser",
          add.lines = list(c("Bank FE",rep("Y",4)),c("Year FE",rep("Y",4))))
## 
## =================================================================================
##                                                     Dependent variable:          
##                                           ---------------------------------------
##                                                     log_total_loan_amt           
##                                                  Jumbo            Conforming     
##                                              (1)       (2)       (3)       (4)   
## ---------------------------------------------------------------------------------
## log(jumbo_frac_assets)                                                           
##                                            (0.000)             (0.000)           
## factor(jumbo_7590)2                                                              
##                                                      (0.000)             (0.000) 
## factor(jumbo_7590)3                                                              
##                                                      (0.000)             (0.000) 
## I(asofdate > 2016)                                                               
##                                            (0.000)   (0.000)   (0.000)   (0.000) 
## log(jumbo_frac_assets):I(asofdate > 2016) -0.158***           -0.041***          
##                                            (0.018)             (0.014)           
## factor(jumbo_7590)2:I(asofdate > 2016)              -0.222***            -0.045  
##                                                      (0.044)             (0.037) 
## factor(jumbo_7590)3:I(asofdate > 2016)              -0.291***           -0.185***
##                                                      (0.049)             (0.046) 
## ---------------------------------------------------------------------------------
## Bank FE                                       Y         Y         Y         Y    
## Year FE                                       Y         Y         Y         Y    
## Observations                               23,664    23,664    27,348    27,348  
## R2                                          0.851     0.851     0.912     0.912  
## Adjusted R2                                 0.825     0.824     0.898     0.898  
## =================================================================================
## Note:                                                 *p<0.1; **p<0.05; ***p<0.01
r <- list()

r[[1]] <- felm(log_total_loan_amt~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=hmda_summary[jumbo==1])
r[[2]] <- felm(log_total_loan_amt~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=hmda_summary[jumbo==0])


g1 <- coef_plot_1reg(r[[1]],"high_jumbo_assets:factor(asofdate)",2012)+ggtitle("log(Total loan volume), Jumbo")
g2 <- coef_plot_1reg(r[[2]],"high_jumbo_assets:factor(asofdate)",2012)+ggtitle("log(Total loan volume), Non-jumbo")
grid.arrange(g1,g2,nrow=1)

hmda_merged <- rbind(hmda[actiontaken %in% c(1,3) & asofdate %in% 2012:2022,
                          c("actiontaken","amountofloan","applicantincome","asofdate","purposeofloan","dist_gse_limit","RSSD","county","typeofloan")],
                     hmda_post[actiontaken %in% c(1,3) & asofdate %in% 2012:2022,
                               c("actiontaken","amountofloan","applicantincome","asofdate","purposeofloan","dist_gse_limit","RSSD","county","typeofloan")])

hmda_merged <- merge(hmda_merged,
                     upbr_2016[,c("IDRSSD","jumbo_dependance","re_loan_frac_assets","jumbo_frac_assets","total_assets","jumbo_7590")],
                     by.x="RSSD",by.y="IDRSSD",all.x=T)

hmda_merged[,county_year:=paste(county,asofdate)]

# hmda_merged[,bank_county:=paste(county,RSSD)]

hmda_merged <- hmda_merged[applicantincome>0 & amountofloan>0]

hmda_merged[,loan_to_income:=amountofloan/applicantincome]

hmda_merged[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]
hmda_merged[,high_jumbo_assets2:=ifelse(jumbo_dependance %in% 5:6,1,0)]

hmda_merged[,approved:=ifelse(actiontaken==1,1,ifelse(actiontaken==3,0,NA))]

# hmda_merged[,jumbo_dependance_cat:=ifelse(jumbo_dependance<4,1,jumbo_dependance)]
hmda_merged_jumbo <- hmda_merged[dist_gse_limit>10 & dist_gse_limit<750]
hmda_merged_jumbo[,loan_amount_bin:=ntile(amountofloan,10),by=county_year]
hmda_merged_jumbo[,county_year_amt_bin:=paste(county_year,loan_amount_bin)]
hmda_merged_jumbo[,bank_county:=paste(county,RSSD)]
hmda_merged_conforming <- hmda_merged[dist_gse_limit< -10 & dist_gse_limit> -110]
hmda_merged_conforming[,loan_amount_bin:=ntile(amountofloan,10),by=county_year]
hmda_merged_conforming[,county_year_amt_bin:=paste(county_year,loan_amount_bin)]
r <- list()

r[[1]] <- felm(approved~log(jumbo_frac_assets)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)+I(purposeofloan==1)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_jumbo)

r[[2]] <- felm(approved~factor(jumbo_7590)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)+I(purposeofloan==1)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_jumbo)

r[[3]] <- felm(approved~log(jumbo_frac_assets)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)+I(purposeofloan==1)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_conforming)

r[[4]] <- felm(approved~factor(jumbo_7590)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)+I(purposeofloan==1)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_conforming)


stargazer(r,type="text",no.space = T,column.labels = c("Jumbo","Conforming"),column.separate = c(2,2),omit.stat = "ser",
          add.lines = list(c("Bank FE",rep("Y",4)),c("County-Yr-LoanAmt Decile FE",rep("Y",4))))
## 
## =================================================================================
##                                                     Dependent variable:          
##                                           ---------------------------------------
##                                                          approved                
##                                                  Jumbo            Conforming     
##                                              (1)       (2)       (3)       (4)   
## ---------------------------------------------------------------------------------
## log(jumbo_frac_assets)                                                           
##                                            (0.000)             (0.000)           
## factor(jumbo_7590)2                                                              
##                                                      (0.000)             (0.000) 
## factor(jumbo_7590)3                                                              
##                                                      (0.000)             (0.000) 
## I(asofdate > 2016)                                                               
##                                            (0.000)   (0.000)   (0.000)   (0.000) 
## log(applicantincome)                      0.121***  0.121***  0.106***  0.106*** 
##                                            (0.002)   (0.002)   (0.001)   (0.001) 
## log(amountofloan)                         -0.143*** -0.141*** -0.217*** -0.212***
##                                            (0.009)   (0.009)   (0.033)   (0.033) 
## I(purposeofloan == 1)                     0.108***  0.108***  0.107***  0.107*** 
##                                            (0.003)   (0.003)   (0.002)   (0.002) 
## log(jumbo_frac_assets):I(asofdate > 2016) -0.016***           -0.014***          
##                                            (0.001)             (0.001)           
## factor(jumbo_7590)2:I(asofdate > 2016)              -0.057***           -0.049***
##                                                      (0.002)             (0.003) 
## factor(jumbo_7590)3:I(asofdate > 2016)              -0.028***           -0.033***
##                                                      (0.002)             (0.003) 
## ---------------------------------------------------------------------------------
## Bank FE                                       Y         Y         Y         Y    
## County-Yr-LoanAmt Decile FE                   Y         Y         Y         Y    
## Observations                              2,656,807 2,656,807 2,470,802 2,470,802
## R2                                          0.153     0.154     0.160     0.161  
## Adjusted R2                                 0.108     0.109     0.097     0.098  
## =================================================================================
## Note:                                                 *p<0.1; **p<0.05; ***p<0.01
r <- list()

r[[1]] <- felm(approved~high_jumbo_assets*factor(asofdate)+log(applicantincome)+log(amountofloan)+I(purposeofloan == 1)|county_year_amt_bin+RSSD|0|county,             data=hmda_merged_jumbo[asofdate %in% 2014:2021])
r[[2]] <- felm(approved~high_jumbo_assets*factor(asofdate)+log(applicantincome)+log(amountofloan)+I(purposeofloan == 1)|county_year_amt_bin+RSSD|0|county,             data=hmda_merged_conforming[asofdate %in% 2014:2021])

g1 <- coef_plot_1reg(r[[1]],"high_jumbo_assets:factor(asofdate)",2014)+ggtitle("Jumbo")
g2 <- coef_plot_1reg(r[[2]],"high_jumbo_assets:factor(asofdate)",2014)+ggtitle("Conforming")

grid.arrange(g1,g2,nrow=1)

r <- list()

r[[1]] <- felm(loan_to_income~log(jumbo_frac_assets)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)+I(purposeofloan==1)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_jumbo[typeofloan==1  & loan_to_income>1 & loan_to_income<5])

r[[2]] <- felm(loan_to_income~factor(jumbo_7590)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)+I(purposeofloan==1)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_jumbo[typeofloan==1  & loan_to_income>1 & loan_to_income<5])

r[[3]] <- felm(loan_to_income~log(jumbo_frac_assets)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)+I(purposeofloan==1)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_conforming[typeofloan==1  & loan_to_income>1 & loan_to_income<5])

r[[4]] <- felm(loan_to_income~factor(jumbo_7590)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)+I(purposeofloan==1)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_conforming[typeofloan==1  & loan_to_income>1 & loan_to_income<5])


stargazer(r,type="text",no.space = T,column.labels = c("Jumbo","Conforming"),column.separate = c(2,2),omit.stat = "ser",
          add.lines = list(c("Bank FE",rep("Y",4)),c("County-Yr-LoanAmt Decile FE",rep("Y",4))))
## 
## =================================================================================
##                                                     Dependent variable:          
##                                           ---------------------------------------
##                                                       loan_to_income             
##                                                  Jumbo            Conforming     
##                                              (1)       (2)       (3)       (4)   
## ---------------------------------------------------------------------------------
## log(jumbo_frac_assets)                                                           
##                                            (0.000)             (0.000)           
## factor(jumbo_7590)2                                                              
##                                                      (0.000)             (0.000) 
## factor(jumbo_7590)3                                                              
##                                                      (0.000)             (0.000) 
## I(asofdate > 2016)                                                               
##                                            (0.000)   (0.000)   (0.000)   (0.000) 
## log(applicantincome)                      -2.559*** -2.559*** -2.503*** -2.503***
##                                            (0.021)   (0.021)   (0.012)   (0.012) 
## log(amountofloan)                         2.579***  2.579***  2.555***  2.555*** 
##                                            (0.023)   (0.023)   (0.025)   (0.025) 
## I(purposeofloan == 1)                     -0.009*** -0.009*** -0.009*** -0.009***
##                                            (0.002)   (0.002)   (0.001)   (0.001) 
## log(jumbo_frac_assets):I(asofdate > 2016) -0.002***            0.0004            
##                                            (0.001)            (0.0003)           
## factor(jumbo_7590)2:I(asofdate > 2016)                0.001               0.001  
##                                                      (0.001)             (0.001) 
## factor(jumbo_7590)3:I(asofdate > 2016)              -0.004***             0.001  
##                                                      (0.001)             (0.001) 
## ---------------------------------------------------------------------------------
## Bank FE                                       Y         Y         Y         Y    
## County-Yr-LoanAmt Decile FE                   Y         Y         Y         Y    
## Observations                              2,171,166 2,171,166 1,958,350 1,958,350
## R2                                          0.963     0.963     0.963     0.963  
## Adjusted R2                                 0.961     0.961     0.960     0.960  
## =================================================================================
## Note:                                                 *p<0.1; **p<0.05; ***p<0.01
upbr_merged <- merge(upbr,upbr_2016[,c("IDRSSD","jumbo_dependance","re_loan_frac_assets",
                                       "jumbo_frac_assets","jumbo_7590")],
                     by.x="IDRSSD",by.y="IDRSSD")

upbr_merged[,data_period:=as.Date(data_period,format="%m%d%Y")]

upbr_merged[,qtrs:=((year(data_period) - 2017) * 4) + quarter(data_period) -4 ]

upbr_merged[,re_loan_frac_assets:=sfr_first_lein_ma*100/total_assets]

upbr_merged[,unrealized_loss_equity:=(htm_value-htm_cost)/total_equity]

upbr_merged[,equity_assets:=total_equity/total_assets]
upbr_merged[,loans_assets:=total_loans/total_assets]
upbr_merged[,other_loans_assets:=(total_loans-sfr_first_lein_ma)/total_assets]
upbr_merged[,other_loans:=(total_loans-sfr_first_lein_ma)]

upbr_merged[,real_estate_loans_assets:=real_estate_loans/total_assets]
upbr_merged[,commercial_industrial_loans:=commercial_industrial_loans/total_assets]
upbr_merged[,cre_construction_land_dev_loans_assets:=cre_construction_land_dev_loans/total_assets]
upbr_merged[,mbs_assets:=mortgage_backed_securities/total_assets]
upbr_merged[,govt_assets:=govt_agency_securities/total_assets]
upbr_merged[,sfr_first_lein_ma_assets:=sfr_first_lein_ma/total_assets]
upbr_merged[,deposits_assets:=total_deposits/total_assets]
upbr_merged[,securities_assets:=total_securities/total_assets]
upbr_merged[,cash_assets:=cash/total_assets]

upbr_merged[,log_total_assets:=log(total_assets)]
upbr_merged[,log_maturity_security:=log(wa_maturiy_security)]
upbr_merged[,log_maturity_assets:=log(wa_maturiy_assets)]
upbr_merged[,log_salaries_employee_benefits:=log(1+salaries_employee_benefits)]
upbr_merged[,log_no_of_full_time_employees:=log(1+no_of_full_time_employees)]
upbr_merged[,rwa_assets:=risk_weighted_assets/total_assets]
upbr_merged[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]
lower_bound <- quantile(upbr_merged$mtg_delinq_pct, probs = 0.01, na.rm = TRUE)
upper_bound <- quantile(upbr_merged$mtg_delinq_pct, probs = 0.99, na.rm = TRUE)


r <- list()

r[[1]] <- felm(mtg_delinq_pct~high_jumbo_assets *I(qtrs %in% 1:8)+high_jumbo_assets *I(qtrs %in% 9:16) + log(total_assets) + equity_assets | IDRSSD + data_period | 0 | IDRSSD,      data=upbr_merged[(mtg_delinq_pct > lower_bound & mtg_delinq_pct < upper_bound) & qtrs <= 16 & qtrs>= -12])

r[[2]] <- felm(mtg_delinq_pct~high_jumbo_assets *I(qtrs %in% 1:8)+high_jumbo_assets *I(qtrs %in% 9:16) + log(total_assets) + equity_assets | IDRSSD + data_period | 0 | IDRSSD,      data=upbr_merged[(mtg_delinq_pct > lower_bound & mtg_delinq_pct < upper_bound) & qtrs <= 16 & qtrs>= -12 & total_assets<1e6 ])

r[[3]] <- felm(mtg_delinq_pct~high_jumbo_assets *I(qtrs %in% 1:8)+high_jumbo_assets *I(qtrs %in% 9:16) + log(total_assets) + equity_assets | IDRSSD + data_period | 0 | IDRSSD,      data=upbr_merged[(mtg_delinq_pct > lower_bound & mtg_delinq_pct < upper_bound) & qtrs <= 16 & qtrs>= -12 & total_assets>1e6 & total_assets<1e7])

r[[4]] <- felm(mtg_delinq_pct~high_jumbo_assets *I(qtrs %in% 1:8)+high_jumbo_assets *I(qtrs %in% 9:16) + log(total_assets) + equity_assets | IDRSSD + data_period | 0 | IDRSSD,      data=upbr_merged[(mtg_delinq_pct > lower_bound & mtg_delinq_pct < upper_bound) & qtrs <= 16 & qtrs>= -12 & total_assets>1e7])


stargazer(r,type="text",no.space = T,column.labels = c("All","<1b","1-10b",">10b"),omit.stat = "ser",
          add.lines = list(c("Bank FE",rep("Y",4)),c("Quarter",rep("Y",4))))
## 
## ================================================================
##                                       Dependent variable:       
##                                ---------------------------------
##                                         mtg_delinq_pct          
##                                  All      <1b    1-10b    >10b  
##                                  (1)      (2)     (3)      (4)  
## ----------------------------------------------------------------
## high_jumbo_assets                                               
##                                (0.000)  (0.000) (0.000)  (0.000)
## I(qtrs 1:8)                                                     
##                                (0.000)  (0.000) (0.000)  (0.000)
## I(qtrs 9:16)                                                    
##                                (0.000)  (0.000) (0.000)  (0.000)
## log(total_assets)               0.116*   0.048   0.101    0.103 
##                                (0.068)  (0.117) (0.110)  (0.281)
## equity_assets                   1.140    0.570   0.827    5.180 
##                                (1.065)  (1.427) (1.554)  (4.581)
## high_jumbo_assets:I(qtrs 1:8)   0.039    0.007  0.144**  -0.046 
##                                (0.037)  (0.047) (0.064)  (0.197)
## high_jumbo_assets:I(qtrs 9:16) 0.146***  0.073  0.318***  0.002 
##                                (0.050)  (0.069) (0.075)  (0.249)
## ----------------------------------------------------------------
## Bank FE                           Y        Y       Y        Y   
## Quarter                           Y        Y       Y        Y   
## Observations                    76,666  56,867   16,642   3,157 
## R2                              0.633    0.605   0.751    0.794 
## Adjusted R2                     0.616    0.584   0.736    0.781 
## ================================================================
## Note:                                *p<0.1; **p<0.05; ***p<0.01
lower_bound <- quantile(upbr_merged$log_total_assets, probs = 0.01, na.rm = TRUE)
upper_bound <- quantile(upbr_merged$log_total_assets, probs = 0.99, na.rm = TRUE)


r <- list()

r[[1]] <- felm(log_total_assets~high_jumbo_assets *I(qtrs>0) + equity_assets | IDRSSD + data_period | 0 | IDRSSD,      data=upbr_merged[(log_total_assets > lower_bound & log_total_assets < upper_bound) & qtrs <= 8 & qtrs>= -8])

r[[2]] <- felm(log_total_assets~high_jumbo_assets *I(qtrs>0) + equity_assets | IDRSSD + data_period | 0 | IDRSSD,      data=upbr_merged[(log_total_assets > lower_bound & log_total_assets < upper_bound) & qtrs <= 8 & qtrs>= -8 & total_assets<1e6 ])

r[[3]] <- felm(log_total_assets~high_jumbo_assets *I(qtrs>0) + equity_assets | IDRSSD + data_period | 0 | IDRSSD,      data=upbr_merged[(log_total_assets > lower_bound & log_total_assets < upper_bound) & qtrs <= 8 & qtrs>= -8 & total_assets>1e6 & total_assets<1e7])

r[[4]] <- felm(log_total_assets~high_jumbo_assets *I(qtrs>0) + equity_assets | IDRSSD + data_period | 0 | IDRSSD,      data=upbr_merged[(log_total_assets > lower_bound & log_total_assets < upper_bound) & qtrs <= 8 & qtrs>= -8 & total_assets>1e7])


stargazer(r,type="text",no.space = T,column.labels = c("All","<1b","1-10b",">10b"),omit.stat = "ser",
          add.lines = list(c("Bank FE",rep("Y",4)),c("Quarter",rep("Y",4))))
## 
## ==================================================================
##                                       Dependent variable:         
##                               ------------------------------------
##                                         log_total_assets          
##                                  All       <1b     1-10b    >10b  
##                                  (1)       (2)      (3)      (4)  
## ------------------------------------------------------------------
## high_jumbo_assets                                                 
##                                (0.000)   (0.000)  (0.000)  (0.000)
## I(qtrs > 0)                                                       
##                                (0.000)   (0.000)  (0.000)  (0.000)
## equity_assets                 -1.739*** -2.448*** 1.844***  0.551 
##                                (0.505)   (0.312)  (0.405)  (1.315)
## high_jumbo_assets:I(qtrs > 0) 0.040***  0.040***   0.030*  -0.013 
##                                (0.007)   (0.006)  (0.015)  (0.026)
## ------------------------------------------------------------------
## Bank FE                           Y         Y        Y        Y   
## Quarter                           Y         Y        Y        Y   
## Observations                   57,024    45,692    9,991    1,341 
## R2                              0.993     0.988    0.978    0.962 
## Adjusted R2                     0.993     0.987    0.976    0.958 
## ==================================================================
## Note:                                  *p<0.1; **p<0.05; ***p<0.01
dep_var <- "log_total_assets"
formula <- as.formula(paste( dep_var,"~ high_jumbo_assets * factor(qtrs)  + equity_assets | IDRSSD + data_period | 0 | IDRSSD"))
lower_bound <- quantile(upbr_merged$log_total_assets, probs = 0.01, na.rm = TRUE)
upper_bound <- quantile(upbr_merged$log_total_assets, probs = 0.99, na.rm = TRUE)
filtered_data <- upbr_merged %>%
      filter((log_total_assets > lower_bound & log_total_assets < upper_bound) & qtrs <= 16 & qtrs>= -12)
coef_plot_1reg(felm(formula, data = filtered_data[qtrs <= 16 & qtrs>= -12]),"high_jumbo_assets:factor(qtrs)",-12)+ggtitle(dep_var)

dep_var <- "log_total_assets"
formula <- as.formula(paste( dep_var,"~ high_jumbo_assets * factor(qtrs)  + equity_assets | IDRSSD + data_period | 0 | IDRSSD"))
lower_bound <- quantile(upbr_merged$log_total_assets, probs = 0.01, na.rm = TRUE)
upper_bound <- quantile(upbr_merged$log_total_assets, probs = 0.99, na.rm = TRUE)
filtered_data <- upbr_merged %>%
      filter((log_total_assets > lower_bound & log_total_assets < upper_bound) & qtrs <= 16 & qtrs>= -12 & total_assets<1e7)
coef_plot_1reg(felm(formula, data = filtered_data[qtrs <= 16 & qtrs>= -12]),"high_jumbo_assets:factor(qtrs)",-12)+ggtitle(dep_var)

dep_var <- "mtg_delinq_pct"
formula <- as.formula(paste( dep_var,"~ high_jumbo_assets * factor(qtrs) + log(total_assets) + equity_assets | IDRSSD + data_period | 0 | IDRSSD"))
lower_bound <- quantile(upbr_merged$mtg_delinq_pct, probs = 0.01, na.rm = TRUE)
upper_bound <- quantile(upbr_merged$mtg_delinq_pct, probs = 0.99, na.rm = TRUE)
filtered_data <- upbr_merged %>%
      filter((mtg_delinq_pct > lower_bound & mtg_delinq_pct < upper_bound) & qtrs <= 16 & qtrs>= -12 & total_assets>1e6 & total_assets<1e7)
coef_plot_1reg(felm(formula, data = filtered_data[qtrs <= 16 & qtrs>= -12]),"high_jumbo_assets:factor(qtrs)",-12)+ggtitle(dep_var)

library(dplyr)
library(broom)
library(lfe)

run_bank_fe_regression_coef <- function(dep_var_names, min_qt, max_qt, sample_frac) {
  # Initialize an empty data frame to store results for each dependent variable
  results_df <- data.frame()

  for (dep_var_name in dep_var_names) {
    # print(dep_var_name)
    formula <- as.formula(paste(dep_var_name, "~ high_jumbo_assets * I(qtrs > 0 & qtrs <5) + high_jumbo_assets * I(qtrs >4 ) + log(total_assets) + equity_assets | IDRSSD + data_period | 0 | IDRSSD"))

    lower_bound <- quantile(upbr_merged[[dep_var_name]], probs = 0.01, na.rm = TRUE)
    upper_bound <- quantile(upbr_merged[[dep_var_name]], probs = 0.99, na.rm = TRUE)

    filtered_data <- upbr_merged %>%
      filter((.data[[dep_var_name]] > lower_bound & .data[[dep_var_name]] < upper_bound) &
             qtrs <= max_qt & qtrs >= min_qt)

    reg <- felm(formula, data = filtered_data)
    reg1 <- summary(felm(formula, data = filtered_data[total_assets<1e6]))
    reg2 <-  summary(felm(formula, data = filtered_data[total_assets<1e7 & total_assets>1e6]))
    reg3 <-  summary(felm(formula, data = filtered_data[total_assets>1e7]))
    

    summary_reg <- summary(reg)
    # print( summary_reg)
    fifth_coef <- summary_reg$coefficients[6, 1]
    fifth_se <- summary_reg$coefficients[6, 2]
    sixth_coef <- summary_reg$coefficients[7, 1]
    sixth_se <- summary_reg$coefficients[7, 2]
    
    sixth_coef1 <- reg1$coefficients[7, 1]
    sixth_coef2 <- reg2$coefficients[7, 1]
    sixth_coef3 <- reg3$coefficients[7, 1]
    
    r_squared <- summary_reg$r2adj
    n <- summary_reg$N
    mean_dep_var <- mean(filtered_data[[dep_var_name]], na.rm = TRUE)

    # Store results for the current dependent variable in a temporary data frame
    temp_results <- data.frame(
      dep_var_name = dep_var_name,
      fifth_coef = fifth_coef,
      fifth_se = fifth_se,
      sixth_coef = sixth_coef,
      sixth_se = sixth_se,
      sixth_coef1 = sixth_coef1,
      sixth_coef2 = sixth_coef2,
      sixth_coef3 = sixth_coef3,
      r_squared = r_squared,
      n = n,
      mean_dep_var = mean_dep_var
    )

    # Append the temporary results to the main results data frame
    results_df <- rbind(results_df, temp_results)
  }

  return(results_df)
}
dependent_vars <- c("log_total_assets","loans_assets", "re_loans_sfr_assets", "other_loans_assets",
                     "nii_assets", "securities_assets","interest_expense_assets_2", 
                    "personnel_expense_to_assets","assets_per_employee",
                    "wa_maturiy_assets","log_no_of_full_time_employees","total_capital_ratio",
                    "roe","unused_commitments_assets_total","uninsured_deposits_assets",
                    "unused_commitments_assets_cre","unused_commitments_assets_sfr_construction",
                    "ci_assets","deposits_assets")


results_df<- run_bank_fe_regression_coef(dependent_vars,-7,8,1)
run_bank_fe_regression  <- function(dep_var_name,min_qt,max_qt,sample_frac) {
formula <- as.formula(paste(dep_var_name, "~ high_jumbo_assets * factor(qtrs) + log(total_assets) + equity_assets | IDRSSD + data_period | 0 | IDRSSD"))
  
  lower_bound <- quantile(upbr_merged[[dep_var_name]], probs = 0.01, na.rm = TRUE)
  upper_bound <- quantile(upbr_merged[[dep_var_name]], probs = 0.99, na.rm = TRUE)
  
  filtered_data <- upbr_merged %>%
    filter((.data[[dep_var_name]] > lower_bound & .data[[dep_var_name]] < upper_bound) & qtrs <= max_qt & qtrs >= min_qt)
  
  rssd_list <- unique(filtered_data$IDRSSD)
  rssd_sample <- sample(rssd_list,size=length(rssd_list)*sample_frac,replace = F)
  
  plot_result <- coef_plot_1reg(felm(formula, data = filtered_data[IDRSSD %in% rssd_sample]), "high_jumbo_assets:factor(qtrs)", min_qt) + ggtitle(dep_var_name)
  
  return(plot_result)
}
dependent_vars <- c("loans_assets", "re_loans_sfr_assets", "other_loans_assets",
                     "nii_assets", "securities_assets","interest_expense_assets_2", 
                    "personnel_expense_to_assets","assets_per_employee",
                    "wa_maturiy_assets","log_no_of_full_time_employees","total_capital_ratio",
                    "roe","unused_commitments_assets_total","uninsured_deposits_assets",
                    "unused_commitments_assets_cre","unused_commitments_assets_sfr_construction",
                    "ci_assets","deposits_assets","mbs_assets")

for(dv in dependent_vars) {
  print(run_bank_fe_regression(dv,-7,8,1))
}

dependent_vars <- c( "securities_assets",
                    "wa_maturiy_assets",
                    "mbs_assets",
                    "govt_assets",
                    "smb_re_assets",
                    "smb_ci_assets",
                    "htm_assets_ubpr",
                    "creconst_assets")

for(dv in dependent_vars) {
  print(run_bank_fe_regression(dv,-12,8,1))
}

# creates a data.table with county-level house prices (zhvi) for years 2015 and 2022

zrd <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Zillow Research Data/County_zhvi_uc_sfr_tier_0.33_0.67_sm_sa_month.csv")


zrd[,fips:=paste0(str_pad(StateCodeFIPS,2,pad="0"),str_pad(MunicipalCodeFIPS,3,pad="0"))]
zrd[,c("RegionID","RegionName","SizeRank","StateName","RegionType","State","Metro","StateCodeFIPS","MunicipalCodeFIPS")] <- NULL
zrd <- melt(zrd,id.vars = c("fips"))
zrd$month <- as.Date(as.character(zrd$variable),origin = "1970-01-01")
zrd$variable <- NULL
names(zrd) <- c("county","zhvi","month")
zrd <- data.table(zrd)
zrd[,asofdate:=year(month)]

zrd <- zrd[,.(zhvi=mean(zhvi,na.rm=T)),by=.(county,asofdate)]
zrd[, rank := frank(-zhvi, ties.method = "min"), by = asofdate]



hmda_county <- hmda[actiontaken==1 & purposeofloan %in% c(1) & typeofloan==1 & dist_gse_limit<  -10,.(amountofloan=sum(amountofloan,na.rm=T)),
                    by=.(RSSD,asofdate,county)]
temp <- hmda_post[actiontaken==1 & purposeofloan %in% c(1) & typeofloan==1 & dist_gse_limit < -10,.(amountofloan=sum(amountofloan,na.rm=T)),
                    by=.(RSSD,asofdate,county)]

hmda_county <- rbind(hmda_county,temp)

hmda_county[, total_loan_by_rssd_asofdate := sum(amountofloan), by = .(RSSD, asofdate)]
hmda_county[, market_share := amountofloan / total_loan_by_rssd_asofdate]
hmda_county[, squared_market_share := market_share^2, by = .(RSSD, asofdate, county)]

hmda_county <- merge(hmda_county, zrd[,c("rank","county","asofdate","zhvi")],by=c("county","asofdate"))

# Step 3: Aggregate to calculate HHI for each RSSD-asofdate
HHI_conforming <- hmda_county[market_share>0, .(HHI = sum(squared_market_share),.N,zhvi=mean(zhvi),rank=mean(rank)), by = .(RSSD, asofdate)]

HHI_conforming <- merge(HHI_conforming,upbr_2016[,c("IDRSSD","jumbo_dependance","re_loan_frac_assets","jumbo_frac_assets","total_assets","jumbo_7590")],
                      by.x="RSSD",by.y="IDRSSD",all.x=T)

HHI_conforming[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]
hmda_county <- hmda[actiontaken==1 & purposeofloan %in% c(1) & typeofloan==1 & dist_gse_limit>10,.(amountofloan=sum(amountofloan,na.rm=T)),
                    by=.(RSSD,asofdate,county)]
temp <- hmda_post[actiontaken==1 & purposeofloan %in% c(1) & typeofloan==1 & dist_gse_limit>10,.(amountofloan=sum(amountofloan,na.rm=T)),
                    by=.(RSSD,asofdate,county)]

hmda_county <- rbind(hmda_county,temp)

hmda_county[, total_loan_by_rssd_asofdate := sum(amountofloan), by = .(RSSD, asofdate)]
hmda_county[, market_share := amountofloan / total_loan_by_rssd_asofdate]
hmda_county[, squared_market_share := market_share^2, by = .(RSSD, asofdate, county)]

hmda_county <- merge(hmda_county, zrd[,c("rank","county","asofdate","zhvi")],by=c("county","asofdate"))

# Step 3: Aggregate to calculate HHI for each RSSD-asofdate
HHI <- hmda_county[market_share>0, .(HHI = sum(squared_market_share),.N,zhvi=mean(zhvi),rank=mean(rank)), by = .(RSSD, asofdate)]

HHI <- merge(HHI,upbr_2016[,c("IDRSSD","jumbo_dependance","re_loan_frac_assets","jumbo_frac_assets","total_assets","jumbo_7590")],
                      by.x="RSSD",by.y="IDRSSD",all.x=T)

HHI[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]
r <- list()
r[[1]] <- felm(HHI~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI)
r[[2]] <- felm(HHI~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI[total_assets<1e6])
r[[3]] <- felm(HHI~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI[total_assets>1e6 & total_assets<1e7])
r[[4]] <- felm(HHI~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI[total_assets>1e7])
r[[5]] <- felm(HHI~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI_conforming)

stargazer(r,type="text",no.space = T,column.labels = c("All","<1b","1-10b",">10b","All"),omit.stat = "ser")
## 
## =================================================================================
##                                                   Dependent variable:            
##                                        ------------------------------------------
##                                                           HHI                    
##                                          All      <1b     1-10b    >10b     All  
##                                          (1)      (2)      (3)      (4)     (5)  
## ---------------------------------------------------------------------------------
## factor(jumbo_7590)2                                                              
##                                        (0.000)  (0.000)  (0.000)  (0.000) (0.000)
## factor(jumbo_7590)3                                                              
##                                        (0.000)  (0.000)  (0.000)  (0.000) (0.000)
## I(asofdate > 2016)                                                               
##                                        (0.000)  (0.000)  (0.000)  (0.000) (0.000)
## factor(jumbo_7590)2:I(asofdate > 2016) 0.043*** 0.033*** 0.074*** 0.035** -0.001 
##                                        (0.010)  (0.012)  (0.017)  (0.016) (0.006)
## factor(jumbo_7590)3:I(asofdate > 2016) 0.073*** 0.069*** 0.098***  0.020   0.006 
##                                        (0.010)  (0.013)  (0.016)  (0.016) (0.007)
## ---------------------------------------------------------------------------------
## Observations                            20,901   14,774   5,173     954   28,775 
## R2                                      0.647    0.518    0.626    0.694   0.791 
## Adjusted R2                             0.578    0.409    0.577    0.655   0.761 
## =================================================================================
## Note:                                                 *p<0.1; **p<0.05; ***p<0.01
r <- list()
r[[1]] <- felm(log(N)~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI)
r[[2]] <- felm(log(N)~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI[total_assets<1e6])
r[[3]] <- felm(log(N)~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI[total_assets>1e6 & total_assets<1e7])
r[[4]] <- felm(log(N)~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI[total_assets>1e7])
r[[5]] <- felm(log(N)~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI_conforming)

stargazer(r,type="text",no.space = T,column.labels = c("All","<1b","1-10b",">10b","All"),omit.stat = "ser")
## 
## ==================================================================================
##                                                    Dependent variable:            
##                                        -------------------------------------------
##                                                          log(N)                   
##                                           All      <1b     1-10b    >10b     All  
##                                           (1)      (2)      (3)      (4)     (5)  
## ----------------------------------------------------------------------------------
## factor(jumbo_7590)2                                                               
##                                         (0.000)  (0.000)  (0.000)  (0.000) (0.000)
## factor(jumbo_7590)3                                                               
##                                         (0.000)  (0.000)  (0.000)  (0.000) (0.000)
## I(asofdate > 2016)                                                                
##                                         (0.000)  (0.000)  (0.000)  (0.000) (0.000)
## factor(jumbo_7590)2:I(asofdate > 2016) -0.075**  -0.014  -0.162*** -0.229   0.018 
##                                         (0.030)  (0.034)  (0.062)  (0.145) (0.028)
## factor(jumbo_7590)3:I(asofdate > 2016) -0.135*** -0.020  -0.386*** -0.258* -0.059*
##                                         (0.034)  (0.041)  (0.065)  (0.144) (0.036)
## ----------------------------------------------------------------------------------
## Observations                            20,901   14,774    5,173     954   28,775 
## R2                                       0.842    0.635    0.778    0.863   0.895 
## Adjusted R2                              0.811    0.552    0.750    0.846   0.880 
## ==================================================================================
## Note:                                                  *p<0.1; **p<0.05; ***p<0.01
r <- list()
r[[1]] <-felm(HHI~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=HHI)
r[[2]] <-felm(HHI~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=HHI_conforming)

r[[3]] <-felm(log(N)~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=HHI)
r[[4]] <-felm(log(N)~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=HHI_conforming)

coef_plot_1reg(r[[1]],"high_jumbo_assets:factor(asofdate)",2012)+ggtitle("Jumbo; Dep. Var: HHI")

coef_plot_1reg(r[[2]],"high_jumbo_assets:factor(asofdate)",2012)+ggtitle("Conforming; Dep. Var: HHI")

coef_plot_1reg(r[[3]],"high_jumbo_assets:factor(asofdate)",2012)+ggtitle("Jumbo; Dep. Var: log(no counties)")

coef_plot_1reg(r[[4]],"high_jumbo_assets:factor(asofdate)",2012)+ggtitle("Conforming; Dep. Var:  log(no counties)")