rm(list=ls())
library(RPostgres)
library(data.table)
library(getPass)
library(lfe)
library(stargazer)
library(ggplot2)
library(fst)
library(stringr)
library(stringi)
library(lubridate)
files = paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/Ultimate Panel Data/",c("2014.fst","2015.fst","2016.fst","2017.fst","2018.fst","2019.fst"))

panel = lapply(files, read_fst, as.data.table = TRUE,columns=c("asofdate","respondentid","rssd","agencycode","reportername","topholderrssdid","parentrssd")) 
panel <- do.call(rbind , panel)

panel[,rssd:=as.numeric(rssd)]
panel[,parentrssd:=as.numeric(parentrssd)]
panel[,topholderrssdid:=as.numeric(topholderrssdid)]
panel[,asofdate:=as.integer(asofdate)]
panel[,hmda_id:=ifelse(asofdate<=2017,paste0(agencycode,"-",respondentid),respondentid)]

panel <- panel[!duplicated(panel[,c("hmda_id","asofdate")])]

panelcomplete1 <- panel[,c("hmda_id","rssd","parentrssd","topholderrssdid")]
panelcomplete1 <- panelcomplete1[!duplicated(panelcomplete1[,c("hmda_id")])]



files = paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/Ultimate Panel Data/",paste0(as.character(2004:2013),".fst"))

panel = lapply(files, read_fst, as.data.table = TRUE,columns=c("asofdate","respondentid","rssd","agencycode","parentidentifier")) 
panel <- do.call(rbind , panel)

panel[,rssd:=as.numeric(rssd)]
panel[,parentrssd:=as.numeric(parentidentifier)]
panel[,asofdate:=as.integer(asofdate)]
panel[,hmda_id:=ifelse(asofdate<=2017,paste0(agencycode,"-",respondentid),respondentid)]

panel <- panel[!duplicated(panel[,c("hmda_id","asofdate")])]

panel <- panel[,c("hmda_id","rssd","parentrssd")]
panel[,topholderrssdid:=NA]
panel <- panel[!duplicated(panel[,c("hmda_id")])]

panel <- panel[!hmda_id %in% panelcomplete1$hmda_id]

panelcomplete1 <- rbind(panelcomplete1,panel)



files = paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/Ultimate Panel Data/",paste0(as.character(1995:2019),".fst"))

panel = lapply(files, read_fst, as.data.table = TRUE,columns=c("asofdate","respondentid","agencycode")) 
panel <- do.call(rbind , panel)

panel[,asofdate:=as.integer(asofdate)]
panel[,hmda_id:=ifelse(asofdate<=2017,paste0(agencycode,"-",respondentid),respondentid)]

panel <- panel[!duplicated(panel[,c("hmda_id","asofdate")])]

panel <- merge(panel,panelcomplete1,by="hmda_id",all.x = T)

panel[is.na(panel)]<-0

panel[,unique_id:=ifelse(parentrssd>0,parentrssd,ifelse(rssd>0,rssd,hmda_id))]

panel <- panel[!duplicated(panel[,c("hmda_id")])]
setkey(panel,hmda_id)

rm(panelcomplete1)

files <- NULL
files  <- list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/pre2004/OO_NP/",full.names = TRUE)
files  <- c(files,list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/pre2004/OO_RF/",full.names = TRUE))
files  <- c(files,list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/OO_NP/",full.names = TRUE))
files  <- c(files,list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/OO_RF/",full.names = TRUE))


hmda = lapply(files, read_fst, as.data.table = TRUE, columns=c("asofdate","respondentid","agencycode","actiontaken","censustract","purposeofloan"))
hmda <- do.call(rbind , hmda)

hmda <- hmda[actiontaken=="1"]

hmda[,hmda_id:=ifelse(asofdate<=2017,paste0(agencycode,"-",respondentid),respondentid)]

hmda <- hmda[nchar(censustract)==11]

hmda[,countycode:=substr(censustract,1,5)]

setkey(hmda,countycode)

cbsa_fips <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/cbsa_countyfips.csv")
cbsa_fips[,fips:=ifelse(nchar(fips)==4,paste0("0",fips),paste0(fips))]
setkey(cbsa_fips,fips)

hmda <- merge(hmda,cbsa_fips,by.x="countycode",by.y="fips",all.x=T)

setkey(hmda,hmda_id)

hmda <- merge(hmda,panel[,c("hmda_id","unique_id")],by=c("hmda_id"))
yearrank <- hmda[,.N,by=.(asofdate,cbsa,unique_id)]
yearrank[,rank:=frankv(N,order=-1),by=.(asofdate,cbsa)]
yearrank[,rank:=ifelse(rank<=5,"top 5",ifelse(rank<=10,"top 5 to 10","other"))]
yearrank <- yearrank[asofdate %in% 1999:2021 & !is.na(cbsa)]

# yearranktop5 <- yearrank[rank=="top 5"]
hhi <- NULL
for(yr in unique(yearrank$asofdate)) {
  for(c in unique(yearrank$cbsa)) {
    temp <- yearrank[cbsa==c & asofdate==yr]
    temp[,ms2:=N/sum(N)]
    temp[,ms2:=ms2*ms2]
    h <-c(c,yr, sum(temp$ms2))
    if(is.null(hhi)) {
      hhi <- h
    } else {
      hhi <- rbind(hhi,h)
    }
  }
}

hhi <- as.data.table(hhi)
names(hhi) <- c("cbsa","asofdate","hhi")

yearrank <- yearrank[,.(N=sum(N)),by=.(asofdate,cbsa,rank)]
yearrank <- dcast(yearrank,asofdate+cbsa~rank)
yearrank[,top5pct:=`top 5`/(other+`top 5`+`top 5 to 10`)]
yearrank[,top10pct:=(`top 5`+`top 5 to 10`)/(other+`top 5`+`top 5 to 10`)]
yearrank <- yearrank[,c("asofdate","cbsa","top5pct","top10pct")]



yearranklag <- yearrank[,c("asofdate","cbsa","top5pct","top10pct")]
yearranklag[,asofdate:=asofdate+1]
names(yearranklag) <- c("asofdate","cbsa","top5pct_1","top10pct_1")
yearrank <- merge(yearrank,yearranklag,by=c("cbsa","asofdate"))

yearrank <- merge(yearrank,hhi,by=c("asofdate","cbsa"))

yearrank[,top5change:=log(top5pct/top5pct_1)]
yearrank[,top10change:=log(top10pct/top10pct_1)]
freddie <- list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Freddie/Acq",full.names = TRUE)
freddie = lapply(freddie, read_fst,as.data.table=T, columns = c("fico","dt_first_pi","cd_msa","ltv","dti","orig_upb","int_rt","prop_type","zipcode","id_loan","orig_loan_term","seller_name","loan_purpose","cltv"))
freddie <- do.call(rbind , freddie)

freddie <- freddie[orig_loan_term==360 & prop_type=="SF"]
freddie[,loanyr:=year(dt_first_pi)]
freddie[,msa:=cd_msa]
freddie[,c("orig_loan_term","dt_first_pi","prop_type","cd_msa"):=list(NULL)]
freddie[,seller_name:= ifelse(seller_name %in% c("JPMORGAN CHASE BANK, NA","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","CHASE MANHATTAN MORTGAGE CORPORATION","CHASE HOME FINANCE LLC","CHASE HOME FINANCE","CHASE HOME FINANCE, LLC","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","JPMORGAN CHASE BANK, N.A."),"JPMORGAN CHASE BANK, NA",seller_name)]
freddie[,seller_name:= ifelse(seller_name %in%  c("CITIMORTGAGE, INC.","ABN AMRO, NKA CITIMORTGAGE INC.","CITIMORTGAGE, INC."),"CITIMORTGAGE, INC.",seller_name)]
freddie[,seller_name:= ifelse(seller_name %in% c("WELLS FARGO HOME MORTGAGE, INC.","WELLS FARGO BANK, N.A."),"WELLS FARGO BANK, N.A.",seller_name)]

freddie[,bank:=seller_name]
# freddie[,bank:=ifelse(bank %in% unique(cbsa_bnk$bank),bank,"other")]
freddie[,freddie:=1]
freddie[,newpurchase:=ifelse(loan_purpose=="P",1,0)]
freddie[,ltvorg:=ltv]
freddie[,ltv:=cltv]
fannie <- list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Fannie/Acq",full.names = TRUE)
fannie = lapply(fannie, read_fst,as.data.table=T, columns = c("loan_identifier","seller_name","original_interest_rate","original_upb","original_loan_term","origination_date","original_ltv","original_dti","credit_score","property_type","property_state","zip_code","msa","loan_purpose","original_cltv"))
fannie <- do.call(rbind , fannie)

fannie <- fannie[original_loan_term==360 & property_type=="SF"]
fannie[,loanyr:=year(origination_date)]
fannie[,seller_name:= ifelse(seller_name %in% c("JPMORGAN CHASE BANK, NA","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","CHASE MANHATTAN MORTGAGE CORPORATION","CHASE HOME FINANCE LLC","CHASE HOME FINANCE","CHASE HOME FINANCE, LLC","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","JPMORGAN CHASE BANK, N.A."),"JPMORGAN CHASE BANK, NA",seller_name)]
fannie[,seller_name:= ifelse(seller_name %in%  c("CITIMORTGAGE, INC.","ABN AMRO, NKA CITIMORTGAGE INC.","CITIMORTGAGE, INC."),"CITIMORTGAGE, INC.",seller_name)]
fannie[,seller_name:= ifelse(seller_name %in% c("WELLS FARGO HOME MORTGAGE, INC.","WELLS FARGO BANK, N.A."),"WELLS FARGO BANK, N.A.",seller_name)]

fannie[,bank:=seller_name]
# fannie[,bank:=ifelse(bank %in% unique(cbsa_bnk$bank),bank,"other")]
fannie[,freddie:=0]
fannie[,ltvorg:=original_ltv]
setnames(fannie,"zip_code","zipcode")
setnames(fannie,"original_ltv","ltv")
setnames(fannie,"original_upb","orig_upb")
setnames(fannie,"original_interest_rate","int_rt")
setnames(fannie,"loan_identifier","id_loan")
setnames(fannie,"credit_score","fico")
setnames(fannie,"original_dti","dti")
setnames(fannie,"original_cltv","cltv")

fannie[,c("property_type","property_state","original_loan_term","origination_date"):=list(NULL)]
fannie[,loan_purpose:=ifelse(loan_purpose=="R","N",loan_purpose)]

fannie <- fannie[loan_purpose != "U"]
fannie[,id_loan:=as.character(id_loan)]

fannie[,newpurchase:=ifelse(loan_purpose=="P",1,0)]
fannie[,ltv:=cltv]

1 GSE

1.1 Top 5 - Level

regsample <- rbind(freddie,fannie)

regsample <- merge(regsample,yearrank,by.x=c("msa","loanyr"),by.y=c("cbsa","asofdate"))


regsample[,top5bin:=floor(top5pct*100/5)*5]
regsample[,top10bin:=floor(top10pct*100/5)*5]
regsample[,top5changebin:=floor(top5change*100/10)*10]
regsample[,hhibin:=floor(hhi*100/5)*5]
regsample[,hhibin:=ifelse(hhibin>15,20,hhibin)]



r <- list()
r[[1]] <- felm(int_rt~factor(top5bin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==1])
r[[2]] <- felm(int_rt~factor(top5bin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==0])
r[[3]] <- felm(int_rt~factor(top5changebin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==1 & abs(top5changebin)<=30])
r[[4]] <- felm(int_rt~factor(top5changebin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==0  & abs(top5changebin)<=30])

.coef_plot_2reg(r[[1]],"New purchases",r[[2]],"Refinances","factor(top5bin)",15)

1.2 Top 5 -Change

.coef_plot_2reg(r[[3]],"New purchases",r[[4]],"Refinances","factor(top5changebin)",-30)

1.3 HHI - Level

r[[5]] <- felm(int_rt~factor(hhibin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==1])
r[[6]] <- felm(int_rt~factor(hhibin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==0])

.coef_plot_2reg(r[[5]],"New purchases",r[[6]],"Refinances","factor(hhibin)",0)

r[[7]] <- felm(int_rt~hhi+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==1])

.printtable(r[[7]])
## 
## =========================================
##                   Dependent variable:    
##               ---------------------------
## -----------------------------------------
## hhi                    -1.121***         
##                         (0.176)          
## fico                  -0.00003***        
##                        (0.00000)         
## ltv                    0.004***          
##                        (0.0001)          
## dti                    0.0001***         
##                        (0.00001)         
## log(orig_upb)          -0.239***         
##                         (0.005)          
## freddie                  0.006           
##                         (0.004)          
## newpurchase                              
##                         (0.000)          
## -----------------------------------------
##                                          
## Observations          10,813,455         
## Adjusted R2              0.910           
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01
## 

1.4 Moodys

moodys <- read_fst("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Moodys/0001/LoanChars.fst",as.data.table = TRUE, columns=c("loanid","loanoriginationdate","zipcode","originalloanbalance","originalcltv","state","originator","armflag","originalfico","originalterm","originalltv","documentationtype","originalinterestrate","purposetype","assettype")) 

cbsa <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_CBSA.csv")
cbsa[,ZIP:=ifelse(nchar(ZIP)==3,paste0("00",ZIP),ifelse(nchar(ZIP)==4,paste0("0",ZIP),paste0(ZIP)))]
setorder(cbsa,ZIP,-RES_RATIO)
cbsa <- cbsa[!duplicated(cbsa[,c("ZIP")])]
cbsa[,c("RES_RATIO","BUS_RATIO","OTH_RATIO","TOT_RATIO"):=list(NULL)]
names(cbsa) <- c("zipcode","msa")
moodys <- merge(moodys,cbsa,by=c("zipcode"))

# moodys <- moodys[originalterm==360 & armflag=="F"]
moodys[,loanyr:=as.numeric(substr(loanoriginationdate,1,4))]
moodys[,seller_name:= originator]
moodys[,seller_name:= ifelse(seller_name %in% c("JPMORGAN CHASE BANK, NA","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","CHASE MANHATTAN MORTGAGE CORPORATION","CHASE HOME FINANCE LLC","CHASE HOME FINANCE","CHASE HOME FINANCE, LLC","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","JPMORGAN CHASE BANK, N.A.","JP MORGAN CHASE BANK NA","CHASE MANHATTAN MORTGAGE CORP"),"JPMORGAN CHASE BANK, NA",seller_name)]
moodys[,seller_name:= ifelse(seller_name %in%  c("B OF A"),"BANK OF AMERICA, N.A.",seller_name)]
moodys[,seller_name:= ifelse(seller_name %in% c("WELLS FARGO HOME MORTGAGE, INC.","WELLS FARGO BANK, N.A.","WELLS FARGO BANK N.A"," WELLS FARGO HOME MTG, INC"),"WELLS FARGO BANK, N.A.",seller_name)]

moodys[,int_rt:=originalinterestrate]
moodys[,dti:=0]
moodys[,ltv:=originalcltv]
moodys[,ltvorg:=originalltv]
moodys[,fico:=originalfico]
moodys[,orig_upb:= originalloanbalance]

# moodys[,c("originalinterestrate","originalltv","originalfico","originalloanbalance","msacode","csacode","divcode","loanoriginationdate","armflag","originalterm"):=list(NULL)]

moodys[,fulldocumentation:=ifelse(documentationtype=="FU",1,0)]
gc()
##              used    (Mb)  gc trigger    (Mb)   max used    (Mb)
## Ncells   39152162  2091.0    61864524  3304.0   43297122  2312.4
## Vcells 6026742338 45980.4 10084986618 76942.4 7813466244 59612.1
moodys[,bank:=seller_name]
# moodys[,bank:=ifelse(bank %in% unique(cbsa_bnk$bank),bank,"other")]
moodys[,newpurchase:=ifelse(purposetype=="PUR",1,0)]

moodys <- moodys[assettype != "UN"]
regsample <- merge(moodys,yearrank,by.x=c("msa","loanyr"),by.y=c("cbsa","asofdate"))

regsample[,bank_msa:=paste(seller_name,msa)]
regsample[,assettype:=ifelse(assettype=="Prime","0Prime",assettype)]
regsample[,top5bin:=floor(top5pct*100/5)*5]
regsample[,top10bin:=floor(top10pct*100/5)*5]
regsample[,top5changebin:=floor(top5change*100/10)*10]


r <- list()
r[[1]] <- felm(int_rt~factor(top5bin)+fico+ltv+fulldocumentation+log(orig_upb)+factor(assettype)|seller_name+loanyr+msa|0|msa,data=regsample[originalterm==360 & armflag=="F" & newpurchase==1])  
r[[2]] <- felm(int_rt~factor(top5bin)+fico+ltv+fulldocumentation+log(orig_upb)+factor(assettype)|seller_name+loanyr+msa|0|msa,data=regsample[originalterm==360 & armflag=="F"  & newpurchase==0])
  
r[[3]] <- felm(int_rt~factor(top5changebin)+fico+ltv+fulldocumentation+log(orig_upb)+factor(assettype)|seller_name+loanyr+msa|0|msa,data=regsample[originalterm==360 & armflag=="F" & newpurchase==1  & abs(top5changebin)<=30])  
r[[4]] <- felm(int_rt~factor(top5changebin)+fico+ltv+fulldocumentation+log(orig_upb)+factor(assettype)|seller_name+loanyr+msa|0|msa,data=regsample[originalterm==360 & armflag=="F"  & newpurchase==0  & abs(top5changebin)<=30])
# .printtable(r)

.coef_plot_2reg(r[[1]],"New purchases",r[[2]],"Refinances","factor(top5bin)",15)

.coef_plot_2reg(r[[3]],"New purchases",r[[4]],"Refinances","factor(top5changebin)",-30)