rm(list=ls())
library(RPostgres)
library(data.table)
library(getPass)
library(lfe)
library(stargazer)
library(ggplot2)
library(fst)
library(stringr)
library(stringi)
library(lubridate)
files = paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/Ultimate Panel Data/",c("2014.fst","2015.fst","2016.fst","2017.fst","2018.fst","2019.fst"))
panel = lapply(files, read_fst, as.data.table = TRUE,columns=c("asofdate","respondentid","rssd","agencycode","reportername","topholderrssdid","parentrssd"))
panel <- do.call(rbind , panel)
panel[,rssd:=as.numeric(rssd)]
panel[,parentrssd:=as.numeric(parentrssd)]
panel[,topholderrssdid:=as.numeric(topholderrssdid)]
panel[,asofdate:=as.integer(asofdate)]
panel[,hmda_id:=ifelse(asofdate<=2017,paste0(agencycode,"-",respondentid),respondentid)]
panel <- panel[!duplicated(panel[,c("hmda_id","asofdate")])]
panelcomplete1 <- panel[,c("hmda_id","rssd","parentrssd","topholderrssdid")]
panelcomplete1 <- panelcomplete1[!duplicated(panelcomplete1[,c("hmda_id")])]
files = paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/Ultimate Panel Data/",paste0(as.character(2004:2013),".fst"))
panel = lapply(files, read_fst, as.data.table = TRUE,columns=c("asofdate","respondentid","rssd","agencycode","parentidentifier"))
panel <- do.call(rbind , panel)
panel[,rssd:=as.numeric(rssd)]
panel[,parentrssd:=as.numeric(parentidentifier)]
panel[,asofdate:=as.integer(asofdate)]
panel[,hmda_id:=ifelse(asofdate<=2017,paste0(agencycode,"-",respondentid),respondentid)]
panel <- panel[!duplicated(panel[,c("hmda_id","asofdate")])]
panel <- panel[,c("hmda_id","rssd","parentrssd")]
panel[,topholderrssdid:=NA]
panel <- panel[!duplicated(panel[,c("hmda_id")])]
panel <- panel[!hmda_id %in% panelcomplete1$hmda_id]
panelcomplete1 <- rbind(panelcomplete1,panel)
files = paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/Ultimate Panel Data/",paste0(as.character(1995:2019),".fst"))
panel = lapply(files, read_fst, as.data.table = TRUE,columns=c("asofdate","respondentid","agencycode"))
panel <- do.call(rbind , panel)
panel[,asofdate:=as.integer(asofdate)]
panel[,hmda_id:=ifelse(asofdate<=2017,paste0(agencycode,"-",respondentid),respondentid)]
panel <- panel[!duplicated(panel[,c("hmda_id","asofdate")])]
panel <- merge(panel,panelcomplete1,by="hmda_id",all.x = T)
panel[is.na(panel)]<-0
panel[,unique_id:=ifelse(parentrssd>0,parentrssd,ifelse(rssd>0,rssd,hmda_id))]
panel <- panel[!duplicated(panel[,c("hmda_id")])]
setkey(panel,hmda_id)
rm(panelcomplete1)
files <- NULL
files <- list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/pre2004/OO_NP/",full.names = TRUE)
files <- c(files,list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/pre2004/OO_RF/",full.names = TRUE))
files <- c(files,list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/OO_NP/",full.names = TRUE))
files <- c(files,list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/OO_RF/",full.names = TRUE))
hmda = lapply(files, read_fst, as.data.table = TRUE, columns=c("asofdate","respondentid","agencycode","actiontaken","censustract","purposeofloan"))
hmda <- do.call(rbind , hmda)
hmda <- hmda[actiontaken=="1"]
hmda[,hmda_id:=ifelse(asofdate<=2017,paste0(agencycode,"-",respondentid),respondentid)]
hmda <- hmda[nchar(censustract)==11]
hmda[,countycode:=substr(censustract,1,5)]
setkey(hmda,countycode)
cbsa_fips <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/cbsa_countyfips.csv")
cbsa_fips[,fips:=ifelse(nchar(fips)==4,paste0("0",fips),paste0(fips))]
setkey(cbsa_fips,fips)
hmda <- merge(hmda,cbsa_fips,by.x="countycode",by.y="fips",all.x=T)
setkey(hmda,hmda_id)
hmda <- merge(hmda,panel[,c("hmda_id","unique_id")],by=c("hmda_id"))
yearrank <- hmda[,.N,by=.(asofdate,cbsa,unique_id)]
yearrank[,rank:=frankv(N,order=-1),by=.(asofdate,cbsa)]
yearrank[,rank:=ifelse(rank<=5,"top 5",ifelse(rank<=10,"top 5 to 10","other"))]
yearrank <- yearrank[asofdate %in% 1999:2021 & !is.na(cbsa)]
# yearranktop5 <- yearrank[rank=="top 5"]
hhi <- NULL
for(yr in unique(yearrank$asofdate)) {
for(c in unique(yearrank$cbsa)) {
temp <- yearrank[cbsa==c & asofdate==yr]
temp[,ms2:=N/sum(N)]
temp[,ms2:=ms2*ms2]
h <-c(c,yr, sum(temp$ms2))
if(is.null(hhi)) {
hhi <- h
} else {
hhi <- rbind(hhi,h)
}
}
}
hhi <- as.data.table(hhi)
names(hhi) <- c("cbsa","asofdate","hhi")
yearrank <- yearrank[,.(N=sum(N)),by=.(asofdate,cbsa,rank)]
yearrank <- dcast(yearrank,asofdate+cbsa~rank)
yearrank[,top5pct:=`top 5`/(other+`top 5`+`top 5 to 10`)]
yearrank[,top10pct:=(`top 5`+`top 5 to 10`)/(other+`top 5`+`top 5 to 10`)]
yearrank <- yearrank[,c("asofdate","cbsa","top5pct","top10pct")]
yearranklag <- yearrank[,c("asofdate","cbsa","top5pct","top10pct")]
yearranklag[,asofdate:=asofdate+1]
names(yearranklag) <- c("asofdate","cbsa","top5pct_1","top10pct_1")
yearrank <- merge(yearrank,yearranklag,by=c("cbsa","asofdate"))
yearrank <- merge(yearrank,hhi,by=c("asofdate","cbsa"))
yearrank[,top5change:=log(top5pct/top5pct_1)]
yearrank[,top10change:=log(top10pct/top10pct_1)]
freddie <- list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Freddie/Acq",full.names = TRUE)
freddie = lapply(freddie, read_fst,as.data.table=T, columns = c("fico","dt_first_pi","cd_msa","ltv","dti","orig_upb","int_rt","prop_type","zipcode","id_loan","orig_loan_term","seller_name","loan_purpose","cltv"))
freddie <- do.call(rbind , freddie)
freddie <- freddie[orig_loan_term==360 & prop_type=="SF"]
freddie[,loanyr:=year(dt_first_pi)]
freddie[,msa:=cd_msa]
freddie[,c("orig_loan_term","dt_first_pi","prop_type","cd_msa"):=list(NULL)]
freddie[,seller_name:= ifelse(seller_name %in% c("JPMORGAN CHASE BANK, NA","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","CHASE MANHATTAN MORTGAGE CORPORATION","CHASE HOME FINANCE LLC","CHASE HOME FINANCE","CHASE HOME FINANCE, LLC","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","JPMORGAN CHASE BANK, N.A."),"JPMORGAN CHASE BANK, NA",seller_name)]
freddie[,seller_name:= ifelse(seller_name %in% c("CITIMORTGAGE, INC.","ABN AMRO, NKA CITIMORTGAGE INC.","CITIMORTGAGE, INC."),"CITIMORTGAGE, INC.",seller_name)]
freddie[,seller_name:= ifelse(seller_name %in% c("WELLS FARGO HOME MORTGAGE, INC.","WELLS FARGO BANK, N.A."),"WELLS FARGO BANK, N.A.",seller_name)]
freddie[,bank:=seller_name]
# freddie[,bank:=ifelse(bank %in% unique(cbsa_bnk$bank),bank,"other")]
freddie[,freddie:=1]
freddie[,newpurchase:=ifelse(loan_purpose=="P",1,0)]
freddie[,ltvorg:=ltv]
freddie[,ltv:=cltv]
fannie <- list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Fannie/Acq",full.names = TRUE)
fannie = lapply(fannie, read_fst,as.data.table=T, columns = c("loan_identifier","seller_name","original_interest_rate","original_upb","original_loan_term","origination_date","original_ltv","original_dti","credit_score","property_type","property_state","zip_code","msa","loan_purpose","original_cltv"))
fannie <- do.call(rbind , fannie)
fannie <- fannie[original_loan_term==360 & property_type=="SF"]
fannie[,loanyr:=year(origination_date)]
fannie[,seller_name:= ifelse(seller_name %in% c("JPMORGAN CHASE BANK, NA","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","CHASE MANHATTAN MORTGAGE CORPORATION","CHASE HOME FINANCE LLC","CHASE HOME FINANCE","CHASE HOME FINANCE, LLC","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","JPMORGAN CHASE BANK, N.A."),"JPMORGAN CHASE BANK, NA",seller_name)]
fannie[,seller_name:= ifelse(seller_name %in% c("CITIMORTGAGE, INC.","ABN AMRO, NKA CITIMORTGAGE INC.","CITIMORTGAGE, INC."),"CITIMORTGAGE, INC.",seller_name)]
fannie[,seller_name:= ifelse(seller_name %in% c("WELLS FARGO HOME MORTGAGE, INC.","WELLS FARGO BANK, N.A."),"WELLS FARGO BANK, N.A.",seller_name)]
fannie[,bank:=seller_name]
# fannie[,bank:=ifelse(bank %in% unique(cbsa_bnk$bank),bank,"other")]
fannie[,freddie:=0]
fannie[,ltvorg:=original_ltv]
setnames(fannie,"zip_code","zipcode")
setnames(fannie,"original_ltv","ltv")
setnames(fannie,"original_upb","orig_upb")
setnames(fannie,"original_interest_rate","int_rt")
setnames(fannie,"loan_identifier","id_loan")
setnames(fannie,"credit_score","fico")
setnames(fannie,"original_dti","dti")
setnames(fannie,"original_cltv","cltv")
fannie[,c("property_type","property_state","original_loan_term","origination_date"):=list(NULL)]
fannie[,loan_purpose:=ifelse(loan_purpose=="R","N",loan_purpose)]
fannie <- fannie[loan_purpose != "U"]
fannie[,id_loan:=as.character(id_loan)]
fannie[,newpurchase:=ifelse(loan_purpose=="P",1,0)]
fannie[,ltv:=cltv]
GSE
Top 5 - Level
regsample <- rbind(freddie,fannie)
regsample <- merge(regsample,yearrank,by.x=c("msa","loanyr"),by.y=c("cbsa","asofdate"))
regsample[,top5bin:=floor(top5pct*100/5)*5]
regsample[,top10bin:=floor(top10pct*100/5)*5]
regsample[,top5changebin:=floor(top5change*100/10)*10]
regsample[,hhibin:=floor(hhi*100/5)*5]
regsample[,hhibin:=ifelse(hhibin>15,20,hhibin)]
r <- list()
r[[1]] <- felm(int_rt~factor(top5bin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==1])
r[[2]] <- felm(int_rt~factor(top5bin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==0])
r[[3]] <- felm(int_rt~factor(top5changebin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==1 & abs(top5changebin)<=30])
r[[4]] <- felm(int_rt~factor(top5changebin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==0 & abs(top5changebin)<=30])
.coef_plot_2reg(r[[1]],"New purchases",r[[2]],"Refinances","factor(top5bin)",15)

Top 5 -Change
.coef_plot_2reg(r[[3]],"New purchases",r[[4]],"Refinances","factor(top5changebin)",-30)

HHI - Level
r[[5]] <- felm(int_rt~factor(hhibin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==1])
r[[6]] <- felm(int_rt~factor(hhibin)+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==0])
.coef_plot_2reg(r[[5]],"New purchases",r[[6]],"Refinances","factor(hhibin)",0)

r[[7]] <- felm(int_rt~hhi+fico+ltv+dti+log(orig_upb)+freddie+newpurchase|seller_name+loanyr+msa|0|msa,data=regsample[newpurchase==1])
.printtable(r[[7]])
##
## =========================================
## Dependent variable:
## ---------------------------
## -----------------------------------------
## hhi -1.121***
## (0.176)
## fico -0.00003***
## (0.00000)
## ltv 0.004***
## (0.0001)
## dti 0.0001***
## (0.00001)
## log(orig_upb) -0.239***
## (0.005)
## freddie 0.006
## (0.004)
## newpurchase
## (0.000)
## -----------------------------------------
##
## Observations 10,813,455
## Adjusted R2 0.910
## =========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
##
Moodys
moodys <- read_fst("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Moodys/0001/LoanChars.fst",as.data.table = TRUE, columns=c("loanid","loanoriginationdate","zipcode","originalloanbalance","originalcltv","state","originator","armflag","originalfico","originalterm","originalltv","documentationtype","originalinterestrate","purposetype","assettype"))
cbsa <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_CBSA.csv")
cbsa[,ZIP:=ifelse(nchar(ZIP)==3,paste0("00",ZIP),ifelse(nchar(ZIP)==4,paste0("0",ZIP),paste0(ZIP)))]
setorder(cbsa,ZIP,-RES_RATIO)
cbsa <- cbsa[!duplicated(cbsa[,c("ZIP")])]
cbsa[,c("RES_RATIO","BUS_RATIO","OTH_RATIO","TOT_RATIO"):=list(NULL)]
names(cbsa) <- c("zipcode","msa")
moodys <- merge(moodys,cbsa,by=c("zipcode"))
# moodys <- moodys[originalterm==360 & armflag=="F"]
moodys[,loanyr:=as.numeric(substr(loanoriginationdate,1,4))]
moodys[,seller_name:= originator]
moodys[,seller_name:= ifelse(seller_name %in% c("JPMORGAN CHASE BANK, NA","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","CHASE MANHATTAN MORTGAGE CORPORATION","CHASE HOME FINANCE LLC","CHASE HOME FINANCE","CHASE HOME FINANCE, LLC","JPMORGAN CHASE BANK, NATIONAL ASSOCIATION","JPMORGAN CHASE BANK, N.A.","JP MORGAN CHASE BANK NA","CHASE MANHATTAN MORTGAGE CORP"),"JPMORGAN CHASE BANK, NA",seller_name)]
moodys[,seller_name:= ifelse(seller_name %in% c("B OF A"),"BANK OF AMERICA, N.A.",seller_name)]
moodys[,seller_name:= ifelse(seller_name %in% c("WELLS FARGO HOME MORTGAGE, INC.","WELLS FARGO BANK, N.A.","WELLS FARGO BANK N.A"," WELLS FARGO HOME MTG, INC"),"WELLS FARGO BANK, N.A.",seller_name)]
moodys[,int_rt:=originalinterestrate]
moodys[,dti:=0]
moodys[,ltv:=originalcltv]
moodys[,ltvorg:=originalltv]
moodys[,fico:=originalfico]
moodys[,orig_upb:= originalloanbalance]
# moodys[,c("originalinterestrate","originalltv","originalfico","originalloanbalance","msacode","csacode","divcode","loanoriginationdate","armflag","originalterm"):=list(NULL)]
moodys[,fulldocumentation:=ifelse(documentationtype=="FU",1,0)]
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 39152162 2091.0 61864524 3304.0 43297122 2312.4
## Vcells 6026742338 45980.4 10084986618 76942.4 7813466244 59612.1
moodys[,bank:=seller_name]
# moodys[,bank:=ifelse(bank %in% unique(cbsa_bnk$bank),bank,"other")]
moodys[,newpurchase:=ifelse(purposetype=="PUR",1,0)]
moodys <- moodys[assettype != "UN"]
regsample <- merge(moodys,yearrank,by.x=c("msa","loanyr"),by.y=c("cbsa","asofdate"))
regsample[,bank_msa:=paste(seller_name,msa)]
regsample[,assettype:=ifelse(assettype=="Prime","0Prime",assettype)]
regsample[,top5bin:=floor(top5pct*100/5)*5]
regsample[,top10bin:=floor(top10pct*100/5)*5]
regsample[,top5changebin:=floor(top5change*100/10)*10]
r <- list()
r[[1]] <- felm(int_rt~factor(top5bin)+fico+ltv+fulldocumentation+log(orig_upb)+factor(assettype)|seller_name+loanyr+msa|0|msa,data=regsample[originalterm==360 & armflag=="F" & newpurchase==1])
r[[2]] <- felm(int_rt~factor(top5bin)+fico+ltv+fulldocumentation+log(orig_upb)+factor(assettype)|seller_name+loanyr+msa|0|msa,data=regsample[originalterm==360 & armflag=="F" & newpurchase==0])
r[[3]] <- felm(int_rt~factor(top5changebin)+fico+ltv+fulldocumentation+log(orig_upb)+factor(assettype)|seller_name+loanyr+msa|0|msa,data=regsample[originalterm==360 & armflag=="F" & newpurchase==1 & abs(top5changebin)<=30])
r[[4]] <- felm(int_rt~factor(top5changebin)+fico+ltv+fulldocumentation+log(orig_upb)+factor(assettype)|seller_name+loanyr+msa|0|msa,data=regsample[originalterm==360 & armflag=="F" & newpurchase==0 & abs(top5changebin)<=30])
# .printtable(r)
.coef_plot_2reg(r[[1]],"New purchases",r[[2]],"Refinances","factor(top5bin)",15)

.coef_plot_2reg(r[[3]],"New purchases",r[[4]],"Refinances","factor(top5changebin)",-30)
