rm(list=ls())
library(data.table)
library(fst)
library(RSQLite)
library(DBI)  
library(dplyr)
library(lfe)
library(stargazer)
library(stringr)
library(ggplot2)
library(readxl)
library(stringi)
library(zoo)
source('C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/functions.R')
fit_felm <- function(formula, data, sample_frac,drop_dv_outliers=TRUE) {
  dep_var_name <- as.character(formula[[2]])
  if(drop_dv_outliers==TRUE) {
    quantiles <- quantile(data[[dep_var_name]], c(0.01, 0.99), na.rm = TRUE)
    subset_data <- data[data[[dep_var_name]] > quantiles[1] & data[[dep_var_name]] < quantiles[2], ]
  } else {
    subset_data = data
  }
  subset_data <- subset_data[sample(nrow(subset_data), size = nrow(subset_data)*sample_frac), ]
  felm(formula, data = subset_data)
}
hmda_link_entity <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/HMDA lender/hmdapan2017.csv",select=c("code","hmprid",paste0("RSSD",12:17)))#,paste0("ENTITY",15:16)
hmda_link_entity[,hmda_id:=paste0(code,"-",hmprid)]
hmda_link_entity[,c("code","hmprid"):=list(NULL)]
hmda_link_entity <- melt(hmda_link_entity,id.vars = c("hmda_id"))
hmda_link_entity <- data.table(hmda_link_entity)
hmda_link_entity <- hmda_link_entity[!is.na(value)]
hmda_link_entity[,year:=as.character(variable)]
hmda_link_entity[,year:=substr(year,5,6)]
setnames(hmda_link_entity,"value","RSSD")
hmda_link_entity[,variable:=NULL]
hmda_link_entity[,year:=as.numeric(year)]
hmda_link_entity[,year:=ifelse(year>85,1900+year,2000+year)]


hmda_link_2021 <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/HMDA lender/hmdapan2021.csv",select=c("LEI","RSSD21","CODE21","HMPRID"))

hmda_link_2021[,hmda_id:=paste0(CODE21,"-",HMPRID)]
hmda_link_2021 <- hmda_link_2021[,c("LEI","RSSD21","hmda_id")]
gse_limit_files <- list.files("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/GSE Limits",full.names = T,pattern = ".xls")
gse_limits <- list()
i=1
for(fl in gse_limit_files) {
  yr <- substr(fl,117,120)
  if(yr<=2014) {
    temp <- read_xls(fl,sheet = 1,skip=2,col_names = F)  
  } else{
    temp <- read_xlsx(fl,sheet = 1,skip=2,col_names = F)  
  }
  
  names(temp) <- c("statefips","countyfips","countyname","cbsa","statecode","gse_limit","gse_limit_2","gse_limit_3","gse_limit_4")
  temp <- data.table(temp)
  temp[,year:=yr]
  gse_limits[[i]] <- temp
  i=i+1
}

gse_limits <- rbindlist(gse_limits)
gse_limits[,gse_limit:=floor(gse_limit/1000)]
gse_limits[,year:=as.numeric(year)]



gse_limits[,county:=paste0(statefips,countyfips)]

gse_limits <- gse_limits[,c("county","year","gse_limit")]

setorder(gse_limits,county,year)
gse_limits[,gse_limit_1:=lag(gse_limit),by=county]
gse_limits[,gse_limit_2:=lead(gse_limit),by=county]

gse_limits[,limit_change:=gse_limit-gse_limit_1]

gse_limits_yr <- gse_limits[,.(gse_limit=median(gse_limit,na.rm=T),limit_change=median(limit_change,na.rm=T)),by=year]
setnames(gse_limits_yr,"gse_limit","gse_limit_yr")
ggplot(gse_limits_yr, aes(x=year, y=gse_limit_yr)) +
  geom_line(color="dodgerblue",size=2) +
  geom_point(size=3,color="dodgerblue4")+
  scale_x_continuous(breaks = unique(gse_limits_yr$year))+
  labs(x="",y="GSE limit ($'000)",title = "GSE Limit History")+
  theme_minimal()

ggplot(gse_limits_yr, aes(x=year, y=limit_change)) +
  geom_col(filll="dodgerblue",size=2) +
  # geom_point(size=3,color="dodgerblue4")+
  scale_x_continuous(breaks = unique(gse_limits_yr$year))+
  labs(x="",y="GSE limit Change ($'000)",title = "GSE Limit History")+
  theme_minimal()

hmda_con <- dbConnect(RSQLite::SQLite(), "C:/Users/dratnadiwakara2/Downloads/HMDA/hmda.db")

yrs <- as.character(2012:2017)
hmda <- list()
i=1
for(yr in yrs) {
  print(yr)
  hmda[[i]] <- data.table(dbGetQuery(hmda_con,
                                     paste0("select
                                                agencycode,
                                                respondentid,
                                                asofdate,
                                                purposeofloan,
                                                typeofpurchaser,
                                                amountofloan,
                                                censustract,
                                                applicantincome,
                                                typeofloan,
                                                actiontaken,
                                                denialreason1,
                                                denialreason2,
                                                denialreason3
                                            from lar_",yr," 
                                            where 
                                            actiontaken in (1,3,6) and
                                            propertytype==1")))
  i=i+1
}
## [1] "2012"
## [1] "2013"
## [1] "2014"
## [1] "2015"
## [1] "2016"
## [1] "2017"
hmda <- rbindlist(hmda)
hmda <- data.table(hmda)
hmda[,county:=substr(censustract,1,5)]


hmda <- merge(hmda,gse_limits,by.x=c("asofdate","county"),by.y=c("year","county"),all.x=T)
hmda <- merge(hmda,gse_limits_yr,by.x="asofdate",by.y="year")
hmda[,gse_limit:=ifelse(is.na(gse_limit),gse_limit_yr,gse_limit)]

hmda[,amountofloan:=as.numeric(amountofloan)]
hmda[,dist_gse_limit:=amountofloan-gse_limit]

hmda[,jumbo_this_year:=ifelse(amountofloan>gse_limit,1,0)]
hmda[,jumbo_last_year:=ifelse(amountofloan>gse_limit_1,1,0)]
hmda[,jumbo_next_year:=ifelse(amountofloan>gse_limit_2,1,0)]

hmda[,hmda_id:=paste0(agencycode,"-",respondentid)]

hmda <- merge(hmda,hmda_link_entity,by.x=c("asofdate","hmda_id"),by.y=c("year","hmda_id"))
hmda <- hmda[RSSD>0 & !is.na(RSSD)]
gc()
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells    2594406   138.6    4806374   256.7    4806374   256.7
## Vcells 1891748216 14432.9 4728287392 36074.0 4725499797 36052.8
yrs <- 2018:2022
hmda_post <- list()
i=1
for(yr in yrs) {
  print(yr)
  hmda_post[[i]] <- data.table(dbGetQuery(hmda_con,
                                     paste0("select
                                                lei,
                                                asofdate,
                                                purposeofloan,
                                                typeofpurchaser,
                                                amountofloan,
                                                censustract,
                                                actiontaken,
                                                conforming_loan_limit,
                                                combined_loan_to_value_ratio,
                                                interest_rate,
                                                rate_spread,
                                                total_loan_costs,
                                                debt_to_income_ratio,
                                                typeofloan,
                                                applicant_age,
                                                applicantincome,
                                                denialreason1,
                                                denialreason2,
                                                denialreason3
                                            from lar_",yr," 
                                            where 
                                            actiontaken in (1,3,6) and
                                            derived_dwelling_category='Single Family (1-4 Units):Site-Built'")))
  i=i+1
}
## [1] 2018
## [1] 2019
## [1] 2020
## [1] 2021
## [1] 2022
hmda_post <- rbindlist(hmda_post)
hmda_post <- data.table(hmda_post)
hmda_post[,county:=substr(censustract,1,5)]


hmda_post <- merge(hmda_post,gse_limits,by.x=c("asofdate","county"),by.y=c("year","county"),all.x=T)
hmda_post <- merge(hmda_post,gse_limits_yr,by.x="asofdate",by.y="year")
hmda_post[,gse_limit:=ifelse(is.na(gse_limit),gse_limit_yr,gse_limit)]

hmda_post[,amountofloan:=as.numeric(amountofloan)]
hmda_post[,dist_gse_limit:=amountofloan-gse_limit]

hmda_post[,jumbo_this_year:=ifelse(amountofloan>gse_limit,1,0)]
hmda_post[,jumbo_last_year:=ifelse(amountofloan>gse_limit_1,1,0)]
hmda_post[,jumbo_next_year:=ifelse(amountofloan>gse_limit_2,1,0)]

hmda_post <- merge(hmda_post,hmda_link_2021,by.x=c("lei"),by.y=c("LEI"))

hmda_post <- hmda_post[RSSD21>0 & !is.na(RSSD21)]

hmda_post[,purposeofloan:=ifelse(purposeofloan>10,3,purposeofloan)]
setnames(hmda_post,"RSSD21","RSSD")
gc()
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells    2837372   151.6    4806374   256.7    4806374   256.7
## Vcells 3852413835 29391.6 6808909844 51947.9 6785332717 51768.0
# hmda_1_summary <- hmda[actiontaken==1,.(no_loans=.N,total_loan_amt=sum(amountofloan)),
#                              by=.(RSSD,asofdate)]
# 
# hmda_2_summary <- hmda_post[actiontaken==1,.(no_loans=.N,total_loan_amt=sum(amountofloan)),
#                              by=.(RSSD,asofdate)]


jumbo_100k_by_lender <- hmda[  actiontaken==1 & typeofloan==1 & 
                               dist_gse_limit>10 & 
                               asofdate %in% 2012:2016,
                             .(no_of_jumbo_loans=.N,
                               total_jumbo_loan_amt=sum(amountofloan)),
                             by=RSSD]

all_by_lender <- hmda[asofdate %in% 2012:2016 & actiontaken==1 & typeofloan==1  ,
                             .(all_no_of_loans=.N,
                               all_total_loan_amt=sum(amountofloan)),
                             by=RSSD]

jumbo_100k_by_lender <- merge(jumbo_100k_by_lender,all_by_lender,by="RSSD")

jumbo_100k_by_lender[,no_of_jumbo_loans_pct:=no_of_jumbo_loans*100/all_no_of_loans]
jumbo_100k_by_lender[,total_jumbo_loan_amt_pct:=total_jumbo_loan_amt*100/all_total_loan_amt]



HHI <- hmda[  actiontaken==1 & typeofloan==1 & 
                               asofdate %in% 2012:2016,.(amountofloan=sum(amountofloan,na.rm=T)),
                    by=.(RSSD,county)]

HHI[, total_loan_by_rssd_asofdate := sum(amountofloan), by = .(RSSD)]
HHI[, market_share := amountofloan / total_loan_by_rssd_asofdate]
HHI[, squared_market_share := market_share^2, by = .(RSSD, county)]


HHI <- HHI[market_share>0, .(HHI = sum(squared_market_share),.N), by = .(RSSD)]

jumbo_100k_by_lender <- merge(jumbo_100k_by_lender,HHI,by="RSSD",all.x=T)
con_call <- dbConnect(RSQLite::SQLite(), "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Call Reports/call_reports.db")

con_ubpr <- dbConnect(RSQLite::SQLite(),  "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Call Reports/ubpr.db")


data_periods <- apply(expand.grid(c("0331","0630","0930","1231"), 2012:2023), 1, paste, collapse="")
# data_periods <- c(data_periods,"03312023")
# data_periods <- apply(expand.grid(c("1231"), 2022), 1, paste, collapse="")

upbr <- list()

i=1
for (dp in data_periods) {
  ubpr_dp <- as.Date(dp,"%m%d%Y")
  ubpr_yr <- year(ubpr_dp)
    
  ubpr_data <- dbGetQuery(con_ubpr,paste0("select 
                                        UBPR1754 htm_cost,
                                        UBPR1771 htm_value,
                                        UBPRE566 htm_assets_ubpr,
                                        UBPRE630 roe,
                                        UBPRE565 afs_assets_upbr,
                                        UBPRKX40 nii_assets,
                                        UBPRE023 nii_earnings_assets,
                                        UBPRE002 interest_expense_assets_1,
                                        UBPRD087 asset_growth,
                                        UBPRE162 deposit_growth_qt,
                                        UBPRE209 deposit_growth_yr,
                                        UBPR1410 real_estate_loans,
                                        UBPR1766 commercial_industrial_loans,
                                        UBPR2122 total_loans,
                                        UBPR2170 total_assets,
                                        UBPR2746 cre_construction_land_dev_loans,
                                        UBPRD214 re_loans_sfr,
                                        UBPRD220 sfr_first_lein_ma,
                                        UBPRE006 provisions_to_assets,
                                        UBPRE397 re_loans_net_loss,
                                        UBPRE401 sfr_net_loss,
                                        UBPRE408 commercial_industrial_net_loss,
                                        UBPRE410 loans_to_individuals_net_loss,
                                        UBPR3210 total_equity,
                                        UBPR0081+UBPR0071 cash,
                                        UBPRD588 total_securities,
                                        UBPRB559 mortgage_backed_securities,
                                        UBPRB558 govt_agency_securities,
                                        UBPR2200 total_deposits,
                                        UBPRE660 risk_weighted_assets,
                                        UBPRE678 interest_income_assets,
                                        UBPRE679 interest_expense_assets_2,
                                        UBPR7414 delinquent_to_loans,
                                        UBPR7400 personnel_expense_to_assets,
                                        UBPRE090 assets_per_employee,
                                        UBPRD667+UBPRD669 pastdue_nonaccrual_loans,
                                        UBPRK437+UBPRK426 time_deposits,
                                        UBPRA549+UBPRA555+UBPRA564+UBPRA570 assets_maturity_less_than_3_months,
                                        UBPRA550+UBPRA556+UBPRA248+UBPRA565+UBPRA571+UBPRA247 assets_maturity_3_mn_1yr,
                                        UBPRA551+UBPRA557+UBPRA561+UBPRA566+UBPRA572 assets_maturity_1_to_3_yr,
                                        UBPRA552+UBPRA558+UBPRA562+UBPRA567+UBPRA573 assets_maturity_3_to_5_yr,
                                        UBPRA553+UBPRA559+UBPRA568+UBPRA574 assets_maturity_5_to_15_yr,
                                        UBPRA554+UBPRA560+UBPRA569+UBPRA575 assets_maturity_more_than_15_yr,
                                        UBPRA549+UBPRA555 security_maturity_less_than_3_months,
                                        UBPRA550+UBPRA556 security_maturity_3_mn_1yr,
                                        UBPRA551+UBPRA557 security_maturity_1_to_3_yr,
                                        UBPRA552+UBPRA558 security_maturity_3_to_5_yr,
                                        UBPRA553+UBPRA559 security_maturity_5_to_15_yr,
                                        UBPRA554+UBPRA560 security_maturity_more_than_15_yr,
                                        UBPRA564 re_loans_maturity_less_than_3_months,
                                        UBPRA570 other_loans_maturity_less_than_3_months,
                                        UBPRA565 re_loans_maturity_3_mn_1yr,
                                        UBPRA571 other_loans_maturity_3_mn_1yr,
                                        UBPRA566 re_loans_maturity_1_to_3_yr,
                                        UBPRA572 other_loans_maturity_1_to_3_yr,
                                        UBPRA567 re_loans_maturity_3_to_5_yr,
                                        UBPRA573 other_loans_maturity_3_to_5_yr,
                                        UBPRA568 re_loans_maturity_5_to_15_yr,
                                        UBPRA574 other_loans_maturity_5_to_15_yr,
                                        UBPRA569 re_loans_maturity_more_than_15_yr,
                                        UBPRA575 other_loans_maturity_more_than_15_yr,
                                        UBPRD535 deposits_over_1_yr,
                                        UBPRD536 deposits_over_3_yr,
                                        UBPRE485+UBPRE486 mtg_delinq_pct,
                                        UBPRE487+UBPRE488 mtg_delinq_pct_2,
                                        UBPRD488 total_capital_ratio,
                                        UBPR3833 unused_commitments_total_gt1yr,
                                        UBPRE262 unused_commitments_assets_home_equity,
                                        UBPRE263 unused_commitments_assets_credit_card,
                                        UBPRE264+UBPRE265+UBPRE225 unused_commitments_assets_cre,
                                        UBPRE218 unused_commitments_assets_sfr_construction,
                                        UBPRE266 unused_commitments_assets_all_other,
                                        UBPRE262+UBPRE263+UBPRE264+UBPRE265+UBPRE225+UBPRE218+UBPRE266 unused_commitments_assets_total,
                                        IDRSSD
                                      from ubpr_",ubpr_yr," 
                                      where 
                                      data_period=",as.numeric(ubpr_dp),""))
  
  call_1 <- dbGetQuery(con_call,paste0("select 
                                    RCON2200 deposits_domestic,
                                    RCONF049 deposits_domestic_insured,
                                    RCONF051 deposits_domestic_uninsured,
                                    RCONF050 deposits_domestic_no_insured,
                                    RCONF052 deposits_domestic_no_uninsured,
                                    RCON6631 deposits_domestic_non_interest,
                                    RCON2210 demand_deposits,
                                    RCON1797+RCON5367+RCON5368+RCON1460+RCONF158 secured_by_residential_real_estate,
                                    RCONF160+RCONF161+RCONF159 secured_by_nonresidential_real_estate,
                                    RCONB538+RCONB539+RCONK137+RCONK207 consumer_loans,
                                    RCON0010 cash_due_from_depositories,
                                    RCON0090 balance_due_from_fed,
                                    IDRSSD 
                                 from call_1_",dp," "))


  call_2 <- dbGetQuery(con_call,paste0("select 
                                        FDIC_Certificate_Number,
                                        Financial_Institution_Name,
                                        Financial_Institution_State,
                                        data_period,
                                        RIADC017 data_processing_expenses,
                                        RIAD4135 salaries_employee_benefits,
                                        RIAD4150 no_of_full_time_employees,
                                        IDRSSD 
                                     from call_2_",dp," "))
  
  
  ubpr_data <- merge(ubpr_data,call_2,by="IDRSSD")
  ubpr_data <- merge(ubpr_data,call_1,by="IDRSSD")
  ubpr_data <- data.table(ubpr_data)

  upbr[[i]] <- ubpr_data

  i=i+1
}

upbr <- rbindlist(upbr,fill=T)

dbDisconnect(con_call)
dbDisconnect(con_ubpr)

upbr[,wa_maturiy_loans:=(re_loans_maturity_less_than_3_months*0.25+re_loans_maturity_3_mn_1yr*0.75+re_loans_maturity_1_to_3_yr*2+re_loans_maturity_3_to_5_yr*4+re_loans_maturity_5_to_15_yr*10+re_loans_maturity_more_than_15_yr*20 + other_loans_maturity_less_than_3_months*0.25+other_loans_maturity_3_mn_1yr*0.75+other_loans_maturity_1_to_3_yr*2+other_loans_maturity_3_to_5_yr*4+other_loans_maturity_5_to_15_yr*10+other_loans_maturity_more_than_15_yr*20)/(re_loans_maturity_less_than_3_months+re_loans_maturity_3_mn_1yr+re_loans_maturity_1_to_3_yr+re_loans_maturity_3_to_5_yr+re_loans_maturity_5_to_15_yr+re_loans_maturity_more_than_15_yr+other_loans_maturity_less_than_3_months+other_loans_maturity_3_mn_1yr+other_loans_maturity_1_to_3_yr+other_loans_maturity_3_to_5_yr+other_loans_maturity_5_to_15_yr+other_loans_maturity_more_than_15_yr)]

upbr[,wa_maturiy_assets:=(assets_maturity_less_than_3_months*0.25+assets_maturity_3_mn_1yr*0.75+assets_maturity_1_to_3_yr*2+assets_maturity_3_to_5_yr*4+assets_maturity_5_to_15_yr*10+assets_maturity_more_than_15_yr*20)/(assets_maturity_less_than_3_months+assets_maturity_3_mn_1yr+assets_maturity_1_to_3_yr+assets_maturity_3_to_5_yr+assets_maturity_5_to_15_yr+assets_maturity_more_than_15_yr)]

upbr[,wa_maturiy_security:=(security_maturity_less_than_3_months*0.25+security_maturity_3_mn_1yr*0.75+security_maturity_1_to_3_yr*2+security_maturity_3_to_5_yr*4+security_maturity_5_to_15_yr*10+security_maturity_more_than_15_yr*20)/(security_maturity_less_than_3_months+security_maturity_3_mn_1yr+security_maturity_1_to_3_yr+security_maturity_3_to_5_yr+security_maturity_5_to_15_yr+security_maturity_more_than_15_yr)]

# upbr[,interest_deposits:=total_assets*interest_expense_assets/total_deposits]
upbr[,time_deposit_frac:=time_deposits/total_deposits]
upbr[,deposits_over_1_yr_frac:=deposits_over_1_yr/total_deposits]
upbr[,deposits_over_3_yr_frac:=deposits_over_3_yr/total_deposits]
upbr[,pastdue_nonaccrual_loans_to_loans:=pastdue_nonaccrual_loans*100/total_loans]
upbr[,re_loans_sfr_assets:=re_loans_sfr/total_assets]
upbr[,data_processing_expenses_nii:=((data_processing_expenses*100)/total_assets)/nii_assets]
upbr[,ci_assets:=commercial_industrial_loans/total_assets]
upbr[,uninsured_deposits_assets:=deposits_domestic_uninsured/total_assets]
upbr_2016 <- upbr[data_period=="12312016"]
upbr_2016 <- merge(upbr_2016,jumbo_100k_by_lender,by.x="IDRSSD",by.y="RSSD")
upbr_2016[,re_loan_frac_assets:=sfr_first_lein_ma*100/total_assets]
upbr_2016[,jumbo_frac_assets:=total_jumbo_loan_amt*100/(total_assets)]
upbr_2016[,re_loan_dependance:=ntile(re_loan_frac_assets,100)]
upbr_2016[,jumbo_dependance:=ntile(jumbo_frac_assets,4)]
upbr_2016[,jumbo_dependance:=ifelse(jumbo_frac_assets>15 & jumbo_frac_assets<25,5,
                                    ifelse(jumbo_frac_assets>=25,6,jumbo_dependance))]

upbr_2016[,jumbo_7590:=ntile(jumbo_frac_assets,100)]
upbr_2016[,jumbo_7590:=ifelse(jumbo_7590<75,1,ifelse(jumbo_7590<90,2,3))]

upbr_2016[,loans_assets:=total_loans/total_assets]
upbr_2016[,deposits_assets:=total_deposits/total_assets]
upbr_2016[,securities_assets:=total_securities/total_assets]

ggplot(upbr_2016[jumbo_frac_assets<50],aes(x=jumbo_frac_assets,fill=factor(jumbo_7590)))+geom_histogram(alpha=0.5,bins=60)

high_jumbo_cutoff <- 4:6
library(DescTools)

round_values <- function(x) {
  ifelse(x > 100, round(x, 0), ifelse(x > 10, round(x, 1), round(x, 2)))
}

upbr_2016[!is.na(jumbo_7590) ,.N,by=jumbo_7590]
##    jumbo_7590    N
## 1:          1 2684
## 2:          2  540
## 3:          3  396
columns_to_include <- c('total_assets','loans_assets','deposits_assets','re_loan_frac_assets','jumbo_frac_assets','securities_assets','nii_assets','interest_expense_assets_1',"HHI","N") #,'deposits_state_hhi','deposits_college_hhi','deposits_income_hhi'

# result_table <- bank_wa_mean_data[!is.na(bank_size), lapply(.SD, function(x) list(Mean = mean(x, na.rm = TRUE))), by = bank_size, .SDcols = columns_to_include]
result_table <- upbr_2016[!is.na(jumbo_7590) , lapply(.SD, function(x) {
  x_winsorized <- Winsorize(x, probs = c(0.05, 0.95), na.rm = TRUE)
  list(Winsorized_Mean = mean(x_winsorized, na.rm = TRUE))
}), by = jumbo_7590, .SDcols = columns_to_include]

setorder(result_table,jumbo_7590)

transposed_result_table <- t(result_table)

colnames(transposed_result_table) <- unlist(transposed_result_table[1,])

transposed_result_table <- transposed_result_table[-1,]

transposed_result_table <- cbind(rownames(transposed_result_table), transposed_result_table)

transposed_result_table <- as.data.table(transposed_result_table)

transposed_result_table <- data.frame(lapply(transposed_result_table, function(x) unlist(x, use.names = FALSE)))

transposed_result_table <- data.table(transposed_result_table)

numeric_cols <- names(transposed_result_table)[sapply(transposed_result_table, is.numeric)]

transposed_result_table[, (numeric_cols) := lapply(.SD, round_values), .SDcols = numeric_cols]

# print(transposed_result_table)

stargazer(transposed_result_table,summary=F,type="text")
## 
## ======================================================
##               V1               X1      X2       X3    
## ------------------------------------------------------
## 1        total_assets        743,364 903,420 1,061,157
## 2        loans_assets         0.660   0.740    0.770  
## 3       deposits_assets       0.840   0.830    0.820  
## 4     re_loan_frac_assets    15.400  21.400   28.600  
## 5      jumbo_frac_assets      2.980   9.370   23.600  
## 6      securities_assets      0.200   0.130    0.110  
## 7         nii_assets          3.380   3.450    3.340  
## 8  interest_expense_assets_1  0.390   0.460    0.510  
## 9             HHI             0.350   0.340    0.380  
## 10             N             32.900  49.300   74.300  
## ------------------------------------------------------
hmda[,jumbo:=ifelse(dist_gse_limit>1,1,ifelse(dist_gse_limit< -1,0,NA))]
hmda_post[,jumbo:=ifelse(dist_gse_limit>1,1,ifelse(dist_gse_limit< -1,0,NA))]


hmda_1_summary <- hmda[actiontaken==1 & purposeofloan %in% c(1,3) & typeofloan==1 ,.(no_loans=.N,total_loan_amt=sum(amountofloan)),
                             by=.(RSSD,asofdate,jumbo)]

hmda_2_summary <- hmda_post[actiontaken==1 & purposeofloan %in% c(1,3) & typeofloan==1   ,.(no_loans=.N,total_loan_amt=sum(amountofloan)),
                             by=.(RSSD,asofdate,jumbo)]


hmda_summary <- rbind(hmda_1_summary,hmda_2_summary)

hmda_summary <- merge(hmda_summary,upbr_2016[,c("IDRSSD","jumbo_dependance","re_loan_frac_assets","jumbo_frac_assets","total_assets","jumbo_7590")],
                      by.x="RSSD",by.y="IDRSSD",all.x=T)

hmda_summary[,new_loans_assets:=total_loan_amt/total_assets]

hmda_summary[, normalized_amount := total_loan_amt / total_loan_amt[asofdate == 2016], by = .(RSSD,jumbo)]

hmda_summary[,log_total_loan_amt:=log(1+total_loan_amt)]

hmda_summary[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]
hmda_summary[,high_jumbo_assets2:=ifelse(jumbo_dependance %in% 5:6,1,0)]
hmda_summary_plot <- hmda_summary[asofdate<2022]
# hmda_summary_plot[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]
hmda_summary_plot <- hmda_summary_plot[,.(normalized_amount=mean(normalized_amount,na.rm=T)),by=.(asofdate,jumbo_7590,jumbo)]
g1 <- ggplot(hmda_summary_plot[jumbo==1 & !is.na(jumbo_7590)],
       aes(x=asofdate,y=normalized_amount,color=factor(jumbo_7590)))+
  geom_line()+
  theme(legend.position="bottom")+ggtitle("Jumbo")

g2 <- ggplot(hmda_summary_plot[jumbo==0 & !is.na(jumbo_7590)],
       aes(x=asofdate,y=normalized_amount,color=factor(jumbo_7590)))+
  geom_line()+
  theme(legend.position="bottom")+ggtitle("Non-Jumbo")

library(gridExtra)
grid.arrange(g1,g2,nrow=1)

r <- list()
r[[1]] <- felm(log_total_loan_amt~log(jumbo_frac_assets+0.00001)*I(asofdate>2017)|RSSD+asofdate|0|RSSD,data=hmda_summary[asofdate %in% 2012:2021 & jumbo==1 & total_assets<010e7])
r[[2]] <- felm(log_total_loan_amt~factor(jumbo_7590)*I(asofdate>2017)|RSSD+asofdate|0|RSSD,data=hmda_summary[asofdate %in% 2012:2021 & jumbo==1 & total_assets<010e7])
r[[3]] <- felm(log_total_loan_amt~log(jumbo_frac_assets+0.00001)*I(asofdate>2017)|RSSD+asofdate|0|RSSD,data=hmda_summary[asofdate %in% 2012:2021 & jumbo==0 & total_assets<10e7])
r[[4]] <- felm(log_total_loan_amt~factor(jumbo_7590)*I(asofdate>2017)|RSSD+asofdate|0|RSSD,data=hmda_summary[asofdate %in% 2012:2021 & jumbo==0 & total_assets<10e7])

stargazer(r,type="text",no.space = T,column.labels = c("New Purchases","Refinacnes"),column.separate = c(2,2),omit.stat = "ser")
## 
## =========================================================================================
##                                                             Dependent variable:          
##                                                   ---------------------------------------
##                                                             log_total_loan_amt           
##                                                      New Purchases        Refinacnes     
##                                                      (1)       (2)       (3)       (4)   
## -----------------------------------------------------------------------------------------
## log(jumbo_frac_assets + 1e-05)                                                           
##                                                    (0.000)             (0.000)           
## factor(jumbo_7590)2                                                                      
##                                                              (0.000)             (0.000) 
## factor(jumbo_7590)3                                                                      
##                                                              (0.000)             (0.000) 
## I(asofdate > 2017)                                                                       
##                                                    (0.000)   (0.000)   (0.000)   (0.000) 
## log(jumbo_frac_assets + 1e-05):I(asofdate > 2017) -0.149***           -0.061***          
##                                                    (0.019)             (0.018)           
## factor(jumbo_7590)2:I(asofdate > 2017)                      -0.230***            -0.072* 
##                                                              (0.055)             (0.043) 
## factor(jumbo_7590)3:I(asofdate > 2017)                      -0.266***           -0.233***
##                                                              (0.061)             (0.056) 
## -----------------------------------------------------------------------------------------
## Observations                                       23,452    23,452    27,139    27,139  
## R2                                                  0.835     0.835     0.907     0.907  
## Adjusted R2                                         0.805     0.805     0.893     0.893  
## =========================================================================================
## Note:                                                         *p<0.1; **p<0.05; ***p<0.01
r <- list()

r[[1]] <- felm(log_total_loan_amt~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=hmda_summary[jumbo==1])
r[[2]] <- felm(log_total_loan_amt~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=hmda_summary[jumbo==0])


g1 <- coef_plot_1reg(r[[1]],"high_jumbo_assets:factor(asofdate)",2012)+ggtitle("log(Total loan volume), Jumbo")
g2 <- coef_plot_1reg(r[[2]],"high_jumbo_assets:factor(asofdate)",2012)+ggtitle("log(Total loan volume), Non-jumbo")
grid.arrange(g1,g2,nrow=1)

hmda_merged <- rbind(hmda[actiontaken %in% c(1,3) & asofdate %in% 2012:2022,
                          c("actiontaken","amountofloan","applicantincome","asofdate","purposeofloan","dist_gse_limit","RSSD","county","typeofloan")],
                     hmda_post[actiontaken %in% c(1,3) & asofdate %in% 2012:2022,
                               c("actiontaken","amountofloan","applicantincome","asofdate","purposeofloan","dist_gse_limit","RSSD","county","typeofloan")])

hmda_merged <- merge(hmda_merged,
                     upbr_2016[,c("IDRSSD","jumbo_dependance","re_loan_frac_assets","jumbo_frac_assets","total_assets","jumbo_7590")],
                     by.x="RSSD",by.y="IDRSSD",all.x=T)

hmda_merged[,county_year:=paste(county,asofdate)]

# hmda_merged[,bank_county:=paste(county,RSSD)]

hmda_merged <- hmda_merged[applicantincome>0 & amountofloan>0]

hmda_merged[,loan_to_income:=amountofloan/applicantincome]

hmda_merged[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]
hmda_merged[,high_jumbo_assets2:=ifelse(jumbo_dependance %in% 5:6,1,0)]

hmda_merged[,approved:=ifelse(actiontaken==1,1,ifelse(actiontaken==3,0,NA))]

# hmda_merged[,jumbo_dependance_cat:=ifelse(jumbo_dependance<4,1,jumbo_dependance)]
hmda_merged_jumbo <- hmda_merged[dist_gse_limit>10 & dist_gse_limit<750]
hmda_merged_jumbo[,loan_amount_bin:=ntile(amountofloan,10),by=county_year]
hmda_merged_jumbo[,county_year_amt_bin:=paste(county_year,loan_amount_bin)]
hmda_merged_jumbo[,bank_county:=paste(county,RSSD)]
r <- list()

r[[1]] <- felm(approved~high_jumbo_assets*I(asofdate>2016)+log(applicantincome)+log(amountofloan)|county_year_amt_bin+RSSD|0|county,             data=hmda_merged_jumbo[purposeofloan==1 & total_assets < 1e9])

r[[2]] <- felm(approved~factor(jumbo_7590)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_jumbo[purposeofloan==1 & total_assets < 1e9])

r[[3]] <- felm(approved~log(0.01+jumbo_frac_assets)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_jumbo[purposeofloan==1  & total_assets < 1e9])

r[[4]] <- felm(approved~high_jumbo_assets*I(asofdate>2016)+log(applicantincome)+log(amountofloan)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_jumbo[purposeofloan==3 & total_assets < 1e9])

r[[5]] <- felm(approved~factor(jumbo_7590)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_jumbo[purposeofloan==3 & total_assets < 1e9])

r[[6]] <- felm(approved~log(0.01+jumbo_frac_assets)*I(asofdate>2016)+log(applicantincome)+log(amountofloan)|county_year_amt_bin+RSSD|0|county,      data=hmda_merged_jumbo[purposeofloan==3  & total_assets < 1e9])

stargazer(r,type="text",omit.stat = "ser")
## 
## ============================================================================================================
##                                                                      Dependent variable:                    
##                                                  -----------------------------------------------------------
##                                                                           approved                          
##                                                     (1)       (2)       (3)       (4)       (5)       (6)   
## ------------------------------------------------------------------------------------------------------------
## high_jumbo_assets                                                                                           
##                                                   (0.000)                       (0.000)                     
##                                                                                                             
## factor(jumbo_7590)2                                                                                         
##                                                             (0.000)                       (0.000)           
##                                                                                                             
## factor(jumbo_7590)3                                                                                         
##                                                             (0.000)                       (0.000)           
##                                                                                                             
## log(0.01 + jumbo_frac_assets)                                                                               
##                                                                       (0.000)                       (0.000) 
##                                                                                                             
## I(asofdate > 2016)                                                                                          
##                                                   (0.000)   (0.000)   (0.000)   (0.000)   (0.000)   (0.000) 
##                                                                                                             
## log(applicantincome)                             0.082***  0.082***  0.082***  0.130***  0.130***  0.130*** 
##                                                   (0.002)   (0.002)   (0.002)   (0.002)   (0.002)   (0.002) 
##                                                                                                             
## log(amountofloan)                                -0.105*** -0.105*** -0.105*** -0.177*** -0.177*** -0.179***
##                                                   (0.011)   (0.011)   (0.011)   (0.018)   (0.019)   (0.018) 
##                                                                                                             
## high_jumbo_assets:I(asofdate > 2016)             -0.044***                     -0.077***                    
##                                                   (0.004)                       (0.004)                     
##                                                                                                             
## factor(jumbo_7590)2:I(asofdate > 2016)                     -0.045***                     -0.087***          
##                                                             (0.004)                       (0.006)           
##                                                                                                             
## factor(jumbo_7590)3:I(asofdate > 2016)                     -0.042***                     -0.066***          
##                                                             (0.004)                       (0.004)           
##                                                                                                             
## log(0.01 + jumbo_frac_assets):I(asofdate > 2016)                     -0.009***                     -0.025***
##                                                                       (0.001)                       (0.002) 
##                                                                                                             
## ------------------------------------------------------------------------------------------------------------
## Observations                                      810,667   810,667   810,667   686,656   686,656   686,656 
## R2                                                 0.190     0.190     0.189     0.246     0.246     0.245  
## Adjusted R2                                        0.082     0.082     0.081     0.137     0.137     0.136  
## ============================================================================================================
## Note:                                                                            *p<0.1; **p<0.05; ***p<0.01
r <- list()

r[[1]] <- felm(approved~high_jumbo_assets*factor(asofdate)+log(applicantincome)+log(amountofloan)|county_year_amt_bin+RSSD|0|county,             data=hmda_merged_jumbo[purposeofloan==1 & asofdate %in% 2014:2021 & typeofloan==1])


r[[2]] <- felm(approved~high_jumbo_assets*factor(asofdate)+log(applicantincome)+log(amountofloan)|county_year_amt_bin+RSSD|0|county,             data=hmda_merged_jumbo[purposeofloan==3 & asofdate %in% 2014:2021 & typeofloan==1 ])

g1 <- coef_plot_1reg(r[[1]],"high_jumbo_assets:factor(asofdate)",2014)+ggtitle("Jumbo, Approval; New purchases")
g2 <- coef_plot_1reg(r[[2]],"high_jumbo_assets:factor(asofdate)",2014)+ggtitle("Jumbo, Approval; Refinances")

grid.arrange(g1,g2,nrow=1)

r <- list()

r[[1]] <- felm(loan_to_income~high_jumbo_assets*factor(asofdate)+log(applicantincome)|county_year_amt_bin+bank_county|0|county,             data=hmda_merged_jumbo[purposeofloan==1 & asofdate %in% 2014:2021 & typeofloan==1  & loan_to_income>1 & loan_to_income<10])


r[[2]] <- felm(loan_to_income~high_jumbo_assets*factor(asofdate)+log(applicantincome)|county_year_amt_bin+bank_county|0|county,             data=hmda_merged_jumbo[purposeofloan==3 & asofdate %in% 2014:2021 & typeofloan==1  & loan_to_income>1 & loan_to_income<10])

g1 <- coef_plot_1reg(r[[1]],"high_jumbo_assets:factor(asofdate)",2014)+ggtitle("Jumbo, Loan-to-income; New purchases")
g2 <- coef_plot_1reg(r[[2]],"high_jumbo_assets:factor(asofdate)",2014)+ggtitle("Jumbo, Loan-to-income; Refinances")

grid.arrange(g1,g2,nrow=1)

hmda_merged_conforming <- hmda_merged[dist_gse_limit< -10 & dist_gse_limit> -110]
hmda_merged_conforming[,loan_amount_bin:=ntile(amountofloan,10),by=county_year]
hmda_merged_conforming[,county_year_amt_bin:=paste(county_year,loan_amount_bin)]
r <- list()

r[[1]] <- felm(approved~high_jumbo_assets*factor(asofdate)+log(applicantincome)+log(amountofloan)|county_year_amt_bin+RSSD|0|county,             data=hmda_merged_conforming[purposeofloan==1 & asofdate %in% 2014:2021  & typeofloan==1 ])


r[[2]] <- felm(approved~high_jumbo_assets*factor(asofdate)+log(applicantincome)+log(amountofloan)|county_year_amt_bin+RSSD|0|county,             data=hmda_merged_conforming[purposeofloan==3 & asofdate %in% 2014:2021 & typeofloan==1 ])

g1 <- coef_plot_1reg(r[[1]],"high_jumbo_assets:factor(asofdate)",2014)+ggtitle("Conforming, Approval, New Purchase")
g2 <- coef_plot_1reg(r[[2]],"high_jumbo_assets:factor(asofdate)",2014)+ggtitle("Conforming, Approval, Refinance")

grid.arrange(g1,g2,nrow=1)

r <- list()

r[[1]] <- felm(loan_to_income~high_jumbo_assets*factor(asofdate)+log(applicantincome)|county_year_amt_bin+RSSD|0|county,             data=hmda_merged_conforming[purposeofloan==1 & asofdate %in% 2014:2021  & typeofloan==1  & loan_to_income>1 & loan_to_income<5])


r[[2]] <- felm(loan_to_income~high_jumbo_assets*factor(asofdate)+log(applicantincome)|county_year_amt_bin+RSSD|0|county,             data=hmda_merged_conforming[purposeofloan==3 & asofdate %in% 2014:2021 & typeofloan==1  & loan_to_income>1 & loan_to_income<5])

g1 <- coef_plot_1reg(r[[1]],"high_jumbo_assets:factor(asofdate)",2014)+ggtitle("Conforming, Loan-to-income, New Purchase")
g2 <- coef_plot_1reg(r[[2]],"high_jumbo_assets:factor(asofdate)",2014)+ggtitle("Conforming, Loan-to-income, Refinances")

grid.arrange(g1,g2,nrow=1)

upbr_merged <- merge(upbr,upbr_2016[,c("IDRSSD","jumbo_dependance","re_loan_frac_assets",
                                       "jumbo_frac_assets","jumbo_7590")],
                     by.x="IDRSSD",by.y="IDRSSD")

upbr_merged[,data_period:=as.Date(data_period,format="%m%d%Y")]

upbr_merged[,qtrs:=((year(data_period) - 2017) * 4) + quarter(data_period) -4 ]

upbr_merged[,re_loan_frac_assets:=sfr_first_lein_ma*100/total_assets]

upbr_merged[,unrealized_loss_equity:=(htm_value-htm_cost)/total_equity]

upbr_merged[,equity_assets:=total_equity/total_assets]
upbr_merged[,loans_assets:=total_loans/total_assets]
upbr_merged[,other_loans_assets:=(total_loans-sfr_first_lein_ma)/total_assets]
upbr_merged[,other_loans:=(total_loans-sfr_first_lein_ma)]

upbr_merged[,real_estate_loans_assets:=real_estate_loans/total_assets]
upbr_merged[,commercial_industrial_loans:=commercial_industrial_loans/total_assets]
upbr_merged[,cre_construction_land_dev_loans_assets:=cre_construction_land_dev_loans/total_assets]
upbr_merged[,mbs_assets:=mortgage_backed_securities/total_assets]
upbr_merged[,govt_assets:=govt_agency_securities/total_assets]
upbr_merged[,sfr_first_lein_ma_assets:=sfr_first_lein_ma/total_assets]
upbr_merged[,deposits_assets:=total_deposits/total_assets]
upbr_merged[,securities_assets:=total_securities/total_assets]
upbr_merged[,cash_assets:=cash/total_assets]

upbr_merged[,log_total_assets:=log(total_assets)]
upbr_merged[,log_maturity_security:=log(wa_maturiy_security)]
upbr_merged[,log_maturity_assets:=log(wa_maturiy_assets)]
upbr_merged[,log_salaries_employee_benefits:=log(1+salaries_employee_benefits)]
upbr_merged[,log_no_of_full_time_employees:=log(1+no_of_full_time_employees)]
upbr_merged[,rwa_assets:=risk_weighted_assets/total_assets]
upbr_merged[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]
dep_var <- "mtg_delinq_pct"
formula <- as.formula(paste( dep_var,"~ high_jumbo_assets * factor(qtrs) + log(total_assets) + equity_assets | IDRSSD + data_period | 0 | IDRSSD"))
lower_bound <- quantile(upbr_merged$mtg_delinq_pct, probs = 0.01, na.rm = TRUE)
upper_bound <- quantile(upbr_merged$mtg_delinq_pct, probs = 0.99, na.rm = TRUE)
filtered_data <- upbr_merged %>%
      filter((mtg_delinq_pct > lower_bound & mtg_delinq_pct < upper_bound) & qtrs <= 16 & qtrs>= -12)
coef_plot_1reg(felm(formula, data = filtered_data[qtrs <= 16 & qtrs>= -12]),"high_jumbo_assets:factor(qtrs)",-12)+ggtitle(dep_var)

dep_var <- "mtg_delinq_pct"
formula <- as.formula(paste( dep_var,"~ high_jumbo_assets * factor(qtrs) + log(total_assets) + equity_assets | IDRSSD + data_period | 0 | IDRSSD"))
lower_bound <- quantile(upbr_merged$mtg_delinq_pct, probs = 0.01, na.rm = TRUE)
upper_bound <- quantile(upbr_merged$mtg_delinq_pct, probs = 0.99, na.rm = TRUE)
filtered_data <- upbr_merged %>%
      filter((mtg_delinq_pct > lower_bound & mtg_delinq_pct < upper_bound) & qtrs <= 16 & qtrs>= -12 & total_assets>1e6 & total_assets<1e7)
coef_plot_1reg(felm(formula, data = filtered_data[qtrs <= 16 & qtrs>= -12]),"high_jumbo_assets:factor(qtrs)",-12)+ggtitle(dep_var)

run_bank_fe_regression  <- function(dep_var_name,min_qt,max_qt,sample_frac) {
formula <- as.formula(paste(dep_var_name, "~ high_jumbo_assets * factor(qtrs) + log(total_assets) + equity_assets | IDRSSD + data_period | 0 | IDRSSD"))
  
  lower_bound <- quantile(upbr_merged[[dep_var_name]], probs = 0.01, na.rm = TRUE)
  upper_bound <- quantile(upbr_merged[[dep_var_name]], probs = 0.99, na.rm = TRUE)
  
  filtered_data <- upbr_merged %>%
    filter((.data[[dep_var_name]] > lower_bound & .data[[dep_var_name]] < upper_bound) & qtrs <= max_qt & qtrs >= min_qt)
  
  rssd_list <- unique(filtered_data$IDRSSD)
  rssd_sample <- sample(rssd_list,size=length(rssd_list)*sample_frac,replace = F)
  
  plot_result <- coef_plot_1reg(felm(formula, data = filtered_data[IDRSSD %in% rssd_sample]), "high_jumbo_assets:factor(qtrs)", min_qt) + ggtitle(dep_var_name)
  
  return(plot_result)
}
dependent_vars <- c("loans_assets", "re_loans_sfr_assets", "other_loans_assets",
                     "nii_assets", "securities_assets","interest_expense_assets_2", 
                    "personnel_expense_to_assets","assets_per_employee",
                    "wa_maturiy_assets","log_no_of_full_time_employees","total_capital_ratio",
                    "roe","unused_commitments_assets_total","uninsured_deposits_assets",
                    "unused_commitments_assets_cre","unused_commitments_assets_sfr_construction",
                    "ci_assets","deposits_assets")

for(dv in dependent_vars) {
  print(run_bank_fe_regression(dv,-7,8,1))
}

# creates a data.table with county-level house prices (zhvi) for years 2015 and 2022

zrd <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Zillow Research Data/County_zhvi_uc_sfr_tier_0.33_0.67_sm_sa_month.csv")


zrd[,fips:=paste0(str_pad(StateCodeFIPS,2,pad="0"),str_pad(MunicipalCodeFIPS,3,pad="0"))]
zrd[,c("RegionID","RegionName","SizeRank","StateName","RegionType","State","Metro","StateCodeFIPS","MunicipalCodeFIPS")] <- NULL
zrd <- melt(zrd,id.vars = c("fips"))
zrd$month <- as.Date(as.character(zrd$variable),origin = "1970-01-01")
zrd$variable <- NULL
names(zrd) <- c("county","zhvi","month")
zrd[,asofdate:=year(month)]

zrd <- zrd[,.(zhvi=mean(zhvi,na.rm=T)),by=.(county,asofdate)]
zrd[, rank := frank(-zhvi, ties.method = "min"), by = asofdate]



hmda_county <- hmda[actiontaken==1 & purposeofloan %in% c(1) & typeofloan==1 & dist_gse_limit>10,.(amountofloan=sum(amountofloan,na.rm=T)),
                    by=.(RSSD,asofdate,county)]
temp <- hmda_post[actiontaken==1 & purposeofloan %in% c(1) & typeofloan==1 & dist_gse_limit>10,.(amountofloan=sum(amountofloan,na.rm=T)),
                    by=.(RSSD,asofdate,county)]

hmda_county <- rbind(hmda_county,temp)

hmda_county[, total_loan_by_rssd_asofdate := sum(amountofloan), by = .(RSSD, asofdate)]
hmda_county[, market_share := amountofloan / total_loan_by_rssd_asofdate]
hmda_county[, squared_market_share := market_share^2, by = .(RSSD, asofdate, county)]

hmda_county <- merge(hmda_county, zrd[,c("rank","county","asofdate","zhvi")],by=c("county","asofdate"))

# Step 3: Aggregate to calculate HHI for each RSSD-asofdate
HHI <- hmda_county[market_share>0, .(HHI = sum(squared_market_share),.N,zhvi=mean(zhvi),rank=mean(rank)), by = .(RSSD, asofdate)]

HHI <- merge(HHI,upbr_2016[,c("IDRSSD","jumbo_dependance","re_loan_frac_assets","jumbo_frac_assets","total_assets","jumbo_7590")],
                      by.x="RSSD",by.y="IDRSSD",all.x=T)

HHI[,high_jumbo_assets:=ifelse(jumbo_dependance %in% high_jumbo_cutoff,1,0)]



# set.seed(1982)
rssd_list <- unique(HHI$RSSD)
rssd_sample <- sample(rssd_list,size=length(rssd_list)*1,replace = F)


r <- list()
r[[1]] <- felm(HHI~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI[RSSD %in% rssd_sample])
r[[2]] <- felm(log(N)~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI[RSSD %in% rssd_sample])
# r[[3]] <- felm(log(rank)~factor(jumbo_7590)*I(asofdate>2016)|RSSD+asofdate|0|RSSD,data=HHI[RSSD %in% rssd_sample])

stargazer(r,type="text",no.space = T,column.labels = c("New Purchases","Refinacnes"),omit.stat = "ser")
## 
## ===================================================================
##                                            Dependent variable:     
##                                        ----------------------------
##                                              HHI          log(N)   
##                                         New Purchases   Refinacnes 
##                                              (1)           (2)     
## -------------------------------------------------------------------
## factor(jumbo_7590)2                                                
##                                            (0.000)       (0.000)   
## factor(jumbo_7590)3                                                
##                                            (0.000)       (0.000)   
## I(asofdate > 2016)                                                 
##                                            (0.000)       (0.000)   
## factor(jumbo_7590)2:I(asofdate > 2016)    0.043***       -0.075**  
##                                            (0.010)       (0.030)   
## factor(jumbo_7590)3:I(asofdate > 2016)    0.073***      -0.135***  
##                                            (0.010)       (0.034)   
## -------------------------------------------------------------------
## Observations                               20,901         20,901   
## R2                                          0.647         0.842    
## Adjusted R2                                 0.578         0.811    
## ===================================================================
## Note:                                   *p<0.1; **p<0.05; ***p<0.01
r[[3]] <-felm(HHI~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=HHI[RSSD %in% rssd_sample])
r[[4]] <-felm(log(N)~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=HHI[RSSD %in% rssd_sample])
# r[[6]] <-felm(log(rank)~high_jumbo_assets*factor(asofdate)|RSSD+asofdate|0|RSSD,data=HHI[RSSD %in% rssd_sample])

coef_plot_1reg(r[[3]],"high_jumbo_assets:factor(asofdate)",2012)+ggtitle("HHI, Jumbo")

coef_plot_1reg(r[[4]],"high_jumbo_assets:factor(asofdate)",2012)+ggtitle("No counties, Jumbo")

# coef_plot_1reg(r[[6]],"high_jumbo_assets:factor(asofdate)",2012)