This paper compares the lending practices of mortgage brokers and retail lenders. We find that mortgage brokers are more likely to push borrowers to the limits permitted under regulations and guidelines. This may not be in the best interest of the borrowers, as we find that mortgages originated by brokers are 70% more likely to be delinquent conditional on observable borrower and loan characteristics.
Can the state-level differences pre-2007 predict broker concentration in states. If so, can that be a valid instrument?
can the big-4 share pre financial crisis predict broker originations later?
rm(list=ls())
library(data.table)
library(fst)
# library(MonetDBLite)
library(RSQLite)
library(DBI)
library(dplyr)
library(stringr)
library(readxl)
library(ggplot2)
library(lfe)
library(stargazer)
library(zipcodeR)
fannie_con <- dbConnect(RSQLite::SQLite(), "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/GSE db/Fannie/fannie.db")
freddie_con <- dbConnect(RSQLite::SQLite(), "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/GSE db/Freddie/freddie.db")
yrs <- 2015:2022
gse_limit_files <- list.files("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/GSE Limits",full.names = T,pattern = ".xls")
gse_limits <- list()
i=1
for(fl in gse_limit_files) {
yr <- substr(fl,117,120)
if(yr<=2014) {
temp <- read_xls(fl,sheet = 1,skip=2,col_names = F)
} else{
temp <- read_xlsx(fl,sheet = 1,skip=2,col_names = F)
}
names(temp) <- c("statefips","countyfips","countyname","cbsa","statecode","gse_limit","gse_limit_2","gse_limit_3","gse_limit_4")
temp <- data.table(temp)
temp[,year:=yr]
gse_limits[[i]] <- temp
i=i+1
}
gse_limits <- rbindlist(gse_limits)
gse_limits <- gse_limits[,.(gse_limit=median(gse_limit,na.rm=T)),by=year]
gse_limits[,year:=as.numeric(year)]
# gc()
# i=1
# freddie_per <- list()
# for(yr in yrs) {
# freddie_per[[i]] <- data.table(dbGetQuery(freddie_con,paste0("select distinct LoanSequenceNumber from performance_",yr," where CurrentLoanDelinquencyStatus in ('2','3','4','5','6')"))) #'1','2',
# i=i+1
# }
# freddie_per <- rbindlist(freddie_per)
# freddie_per <- data.table(freddie_per)
# saveRDS(freddie_per,"freddie_per.rds")
freddie_per <- readRDS("freddie_per.rds")
# gc()
# i=1
# fannie_per <- list()
# for(yr in yrs) {
# fannie_per[[i]] <- data.table(dbGetQuery(fannie_con,paste0("select distinct LoanIdentifier as LoanSequenceNumber from performance_",yr," where CurrentLoanDelinquencyStatus in ('2','3','4','5','6')"))) #'1','2',
# i=i+1
# }
# fannie_per <- rbindlist(fannie_per)
# fannie_per <- data.table(fannie_per)
# fannie_per[,LoanSequenceNumber:=as.character(LoanSequenceNumber)]
# saveRDS(fannie_per,"fannie_per.rds")
fannie_per <- readRDS("fannie_per.rds")
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 4908531 262.2 7552382 403.4 7552382 403.4
## Vcells 8900114 68.0 12773082 97.5 9952075 76.0
i=1
fannie <- list()
for(yr in yrs) {
fannie[[i]] <- data.table(dbGetQuery(fannie_con,paste0("select
LoanIdentifier LoanSequenceNumber,
Channel,
Debt_To_Income_DTI_ DTI,
LoanPurpose,
ZipCodeShort PostalCode,
BorrowerCreditScoreatOrigination CreditScore,
OriginalUPB,
OriginalLoanTerm,
OriginalLoantoValueRatio_LTV_ LTV,
OriginalCombinedLoantoValueRatio_CLTV_ CLTV,
FirstPaymentDate,
OriginalInterestRate,
NumberofBorrowers,
year
from origination_",yr," where OccupancyStatus='P' and PropertyType='SF' and NumberofUnits=1")))
fannie[[i]][,year:=yr]
i=i+1
}
fannie <- rbindlist(fannie)
fannie <- data.table(fannie)
fannie[,LoanSequenceNumber:=as.character(LoanSequenceNumber)]
fannie[,delinq60days:=ifelse(LoanSequenceNumber %in% fannie_per$LoanSequenceNumber,1,0)]
fannie[,freddie:=0]
fannie[,LoanPurpose:=ifelse(LoanPurpose=="R","N",LoanPurpose)]
fannie[,PostalCode:=PostalCode*100]
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 16799633 897.2 35961024 1920.6 16816077 898.1
## Vcells 190512093 1453.5 313654230 2393.0 261311858 1993.7
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 16799815 897.3 35961024 1920.6 16856853 900.3
## Vcells 190508370 1453.5 313654230 2393.0 261311858 1993.7
i=1
freddie <- list()
for(yr in yrs) {
freddie[[i]] <- data.table(dbGetQuery(freddie_con,paste0("select
LoanSequenceNumber,
Channel,
DTI,
LoanPurpose,
PostalCode,
CreditScore,
OriginalUPB,
OriginalLoanTerm,
LTV,
CLTV,
FirstPaymentDate,
OriginalInterestRate,
NumberofBorrowers
from origination_",yr," where OccupancyStatus='P' and PropertyType='SF' and NumberofUnits=1")))
freddie[[i]][,year:=yr]
i=i+1
}
freddie <- rbindlist(freddie)
freddie <- data.table(freddie)
freddie[,delinq60days:=ifelse(LoanSequenceNumber %in% freddie_per$LoanSequenceNumber,1,0)]
freddie[,freddie:=1]
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 26195370 1399.0 49064687 2620.4 26219572 1400.3
## Vcells 324781444 2477.9 542285709 4137.4 487099964 3716.3
gse <- rbind(fannie,freddie)
gse[,broker:=ifelse(Channel=="B",1,ifelse(Channel=="R",0,NA))]
gse[,bkr_cl:=ifelse(Channel %in% c("B","C"),1,0)]
gse[,dist_45:=45-DTI]
gse[,dist_50:=50-DTI]
gse <- merge(gse,gse_limits,by="year")
gse[,dist_gse_limit:=gse_limit-OriginalUPB]
gse[,zip_yr:=paste(PostalCode,year)]
gse[,amt:=floor(OriginalUPB/10000)*10]
gse[,rt:=floor(OriginalInterestRate*10)/10]
gse[,purposeofloan:=ifelse(LoanPurpose=="P",1,ifelse(LoanPurpose=="C",32,ifelse(LoanPurpose=="N",31,0)))]
gse[,term:=floor(OriginalLoanTerm/10)*10]
gse[,di:=ifelse(DTI %in% 37:49,DTI,ifelse(DTI<20,20,ifelse(DTI<30,30,ifelse(DTI<36,36,ifelse(DTI %in% 50:60,60,NA)))))]
gse[,match_string:=paste(PostalCode,amt,rt,term,purposeofloan,di,year,freddie)]
gse_duplicated_match_strings <- unique(gse[duplicated(gse[,c("match_string")])]$match_string)
ggplot(gse[DTI <= 55 & DTI > 20 & !is.na(broker)], aes(x = DTI, fill = factor(broker))) +
geom_density(alpha = 0.5, bw = 1) +
labs(x = "Debt-to-income", y = "Density") +
scale_fill_manual(
values = c("0" = "steelblue2", "1" = "red4"), # Customize colors as needed
labels = c("1" = "Mortgage Brokers", "0" = "Retail Lenders")
) +
theme(legend.title = element_blank())+
theme_minimal()
Robustness test: Use HMDA data to show that there is not such excessive bunching at the cutoffs; less likely to deny due to DTI.
ggplot(gse[dist_gse_limit< 50000 & dist_gse_limit> -100 & !is.na(broker) & year<2020],
aes(x=dist_gse_limit,fill=factor(broker))) +
geom_density(alpha = 0.5, bw = 5000) +
labs(x = "Distance to GSE Limit", y = "Density") +
scale_fill_manual(
name = "",
values = c("0" = "steelblue2", "1" = "red4"), # Customize colors as needed
labels = c("1" = "Mortgage Brokers", "0" = "Retail Lenders")
) +
theme(legend.title = element_blank())+
theme_minimal()
# ggplot(gse[dist_gse_limit< 50000 & dist_gse_limit> -100 & !is.na(broker) & year<2020],aes(x=dist_gse_limit,fill=factor(broker)))+geom_density(alpha=0.25,bw=5000)
r <- list()
r[[1]] <- felm(dist_gse_limit~broker+CreditScore+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[dist_gse_limit< 100000 & dist_gse_limit>0 ])
r[[2]] <- felm(dist_45~broker+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[DTI>40 & DTI<=45 ])
r[[3]] <- felm(dist_50~broker+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[DTI>45 & DTI<=50 ])
stargazer(r,type="text",no.space = T,omit.stat = "ser")
##
## =========================================================
## Dependent variable:
## ------------------------------------
## dist_gse_limit dist_45 dist_50
## (1) (2) (3)
## ---------------------------------------------------------
## broker -2,513.481*** -0.016*** -0.029***
## (115.719) (0.003) (0.004)
## CreditScore -0.627 0.0003*** 0.00004**
## (0.408) (0.00002) (0.00002)
## log(OriginalUPB) -0.012*** -0.043***
## (0.003) (0.003)
## OriginalLoanTerm -14.750*** -0.0003*** -0.0004***
## (0.799) (0.00001) (0.00002)
## LTV 1,042.235*** -0.002*** -0.003***
## (62.864) (0.0005) (0.001)
## CLTV -913.086*** 0.002*** 0.006***
## (56.194) (0.0005) (0.001)
## OriginalInterestRate 982.258*** -0.049*** -0.018***
## (124.331) (0.002) (0.002)
## freddie -376.593*** -0.065*** -0.068***
## (69.397) (0.002) (0.003)
## ---------------------------------------------------------
## Observations 1,782,540 3,081,018 1,627,106
## R2 0.046 0.008 0.013
## Adjusted R2 0.043 0.006 0.009
## =========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
r <- list()
r[[1]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[2]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[3]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("C") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[4]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("N") & OriginalUPB<1000000 & OriginalUPB>100000])
stargazer(r,type="text",no.space = T,omit.stat = "ser")
##
## ==================================================================
## Dependent variable:
## ---------------------------------------------
## delinq60days
## (1) (2) (3) (4)
## ------------------------------------------------------------------
## broker 0.002*** 0.004*** 0.002*** 0.001***
## (0.0002) (0.0003) (0.0002) (0.0002)
## CreditScore -0.00004*** -0.00003*** -0.0001*** -0.0001***
## (0.00000) (0.00000) (0.00001) (0.00000)
## DTI 0.0001*** 0.001*** 0.0004*** 0.0001***
## (0.00000) (0.00002) (0.00001) (0.00000)
## LTV 0.0003*** 0.0004*** 0.0002*** 0.0002***
## (0.00001) (0.00001) (0.00001) (0.00001)
## log(OriginalUPB) -0.005*** -0.006*** -0.003*** -0.002***
## (0.0003) (0.0004) (0.0003) (0.0002)
## OriginalLoanTerm 0.00001*** 0.00001*** 0.00002*** 0.00000***
## (0.00000) (0.00000) (0.00000) (0.00000)
## OriginalInterestRate 0.004*** 0.004*** 0.003*** 0.007***
## (0.0001) (0.0002) (0.0002) (0.0002)
## factor(LoanPurpose)N -0.002***
## (0.0001)
## factor(LoanPurpose)P -0.001***
## (0.0002)
## freddie 0.021*** 0.028*** 0.022*** 0.013***
## (0.0004) (0.001) (0.001) (0.0003)
## ------------------------------------------------------------------
## Observations 14,379,499 4,884,857 4,073,531 5,421,111
## R2 0.020 0.024 0.020 0.025
## Adjusted R2 0.020 0.023 0.018 0.024
## ==================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
r <- list()
r[[1]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2015])
r[[2]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2016])
r[[3]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2017])
r[[4]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2018])
r[[5]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2019])
r[[6]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2020])
r[[7]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2021])
stargazer(r,type="text",no.space = T,omit.stat="ser")
##
## ======================================================================================================
## Dependent variable:
## ---------------------------------------------------------------------------------
## delinq60days
## (1) (2) (3) (4) (5) (6) (7)
## ------------------------------------------------------------------------------------------------------
## broker 0.003*** 0.003*** 0.005*** 0.006*** 0.002*** 0.002*** 0.005***
## (0.001) (0.001) (0.001) (0.001) (0.001) (0.0002) (0.0002)
## CreditScore -0.0002*** -0.0001*** -0.0001*** -0.00003*** -0.00003*** -0.00004*** -0.00003***
## (0.00002) (0.00001) (0.00001) (0.00000) (0.00000) (0.00000) (0.00000)
## DTI 0.0001*** 0.0001*** 0.0001*** 0.0001*** 0.0004*** 0.0005*** 0.0003***
## (0.00000) (0.00000) (0.00000) (0.00001) (0.00002) (0.00001) (0.00001)
## LTV 0.0002*** 0.0002*** 0.0002*** 0.0003*** 0.001*** 0.0002*** 0.0002***
## (0.00002) (0.00002) (0.00002) (0.00002) (0.00002) (0.00001) (0.00001)
## log(OriginalUPB) -0.002*** 0.0001 -0.0004 -0.004*** -0.004*** -0.003*** -0.003***
## (0.001) (0.001) (0.001) (0.001) (0.001) (0.0003) (0.0002)
## OriginalLoanTerm 0.00002*** 0.00000 -0.00002*** -0.00001*** 0.00003*** -0.00000*** -0.00002***
## (0.00000) (0.00000) (0.00000) (0.00000) (0.00000) (0.00000) (0.00000)
## OriginalInterestRate 0.004*** 0.011*** 0.019*** 0.018*** 0.005*** 0.011*** 0.012***
## (0.001) (0.001) (0.001) (0.001) (0.0005) (0.0003) (0.0003)
## factor(LoanPurpose)N -0.002*** -0.003*** -0.0001 0.004*** -0.005*** 0.001*** 0.001***
## (0.0004) (0.0004) (0.0004) (0.001) (0.001) (0.0002) (0.0001)
## factor(LoanPurpose)P -0.002*** -0.001** 0.001** 0.001** -0.006*** 0.006*** 0.003***
## (0.001) (0.001) (0.001) (0.001) (0.001) (0.0003) (0.0003)
## ------------------------------------------------------------------------------------------------------
## Observations 1,112,187 1,372,291 1,079,768 937,591 1,252,911 3,597,081 3,683,207
## R2 0.025 0.020 0.019 0.011 0.012 0.009 0.007
## Adjusted R2 0.024 0.020 0.018 0.011 0.011 0.009 0.007
## ======================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
mb_data <- readRDS("zip_mortgage_broker_stats.rds")
mb_data[,ZIP3:=as.numeric(ZIP)]
mb_data[,ZIP3:=floor(ZIP3/100)*100]
mb_data_1 <- mb_data[is.finite(brokers_per_home),
.(brokers_per_home=mean(brokers_per_home,na.rm=T)),
by=ZIP3]
mb_data_2 <- mb_data[is.finite(brokers_per_mortgage),
.(brokers_per_mortgage = mean(brokers_per_mortgage,na.rm=T),
brokers_per_single_family_unit = mean(brokers_per_single_family_unit,na.rm=T),
brokers_per_occupied_unit = mean(brokers_per_occupied_unit,na.rm=T)),
by=ZIP3]
mb_data <- merge(mb_data_1,mb_data_2,by="ZIP3",all.x=T,all.y=T)
gse <- merge(gse,mb_data,by.x="PostalCode",by.y="ZIP3",all.x=T)
r <- list()
r[[1]] <- felm(dist_gse_limit~broker*log(1+brokers_per_mortgage)+CreditScore+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[dist_gse_limit< 100000 & dist_gse_limit>0 ])
r[[2]] <- felm(dist_45~broker*log(1+brokers_per_mortgage)+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[DTI>40 & DTI<=45 ])
r[[3]] <- felm(dist_50~broker*log(1+brokers_per_mortgage)+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[DTI>45 & DTI<=50 ])
stargazer(r,type="text",no.space = T,omit.stat = "ser")
##
## =========================================================================
## Dependent variable:
## ------------------------------------
## dist_gse_limit dist_45 dist_50
## (1) (2) (3)
## -------------------------------------------------------------------------
## broker -2,232.625*** -0.009* -0.022***
## (176.989) (0.005) (0.006)
## log(1 + brokers_per_mortgage)
## (0.000) (0.000) (0.000)
## CreditScore -0.662 0.0004*** 0.00005**
## (0.412) (0.00002) (0.00002)
## log(OriginalUPB) -0.012*** -0.043***
## (0.003) (0.003)
## OriginalLoanTerm -14.858*** -0.0003*** -0.0004***
## (0.801) (0.00001) (0.00002)
## LTV 1,040.490*** -0.002*** -0.003**
## (63.104) (0.0005) (0.001)
## CLTV -912.298*** 0.002*** 0.006***
## (56.411) (0.0005) (0.001)
## OriginalInterestRate 985.513*** -0.049*** -0.018***
## (124.922) (0.002) (0.002)
## freddie -374.736*** -0.065*** -0.068***
## (69.653) (0.002) (0.003)
## broker:log(1 + brokers_per_mortgage) -153.482** -0.004** -0.004
## (69.631) (0.002) (0.002)
## -------------------------------------------------------------------------
## Observations 1,773,330 3,029,576 1,604,370
## R2 0.046 0.008 0.013
## Adjusted R2 0.042 0.006 0.009
## =========================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
r <- list()
r[[1]] <- felm(delinq60days~broker*log(1+brokers_per_mortgage)+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[2]] <- felm(delinq60days~broker*log(1+brokers_per_mortgage)+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[3]] <- felm(delinq60days~broker*log(1+brokers_per_mortgage)+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("C") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[4]] <- felm(delinq60days~broker*log(1+brokers_per_mortgage)+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("N") & OriginalUPB<1000000 & OriginalUPB>100000])
stargazer(r,type="text",no.space = T,omit.stat = "ser")
##
## ==================================================================================
## Dependent variable:
## ---------------------------------------------
## delinq60days
## (1) (2) (3) (4)
## ----------------------------------------------------------------------------------
## broker 0.002*** 0.003*** 0.002*** 0.001***
## (0.0003) (0.0005) (0.0004) (0.0003)
## log(1 + brokers_per_mortgage)
## (0.000) (0.000) (0.000) (0.000)
## CreditScore -0.00004*** -0.00003*** -0.0001*** -0.0001***
## (0.00000) (0.00000) (0.00000) (0.00000)
## DTI 0.0001*** 0.001*** 0.0004*** 0.0001***
## (0.00000) (0.00002) (0.00001) (0.00000)
## LTV 0.0003*** 0.0004*** 0.0002*** 0.0002***
## (0.00001) (0.00001) (0.00001) (0.00001)
## log(OriginalUPB) -0.005*** -0.006*** -0.003*** -0.002***
## (0.0003) (0.0004) (0.0003) (0.0003)
## OriginalLoanTerm 0.00001*** 0.00001*** 0.00002*** 0.00000***
## (0.00000) (0.00000) (0.00000) (0.00000)
## OriginalInterestRate 0.004*** 0.004*** 0.003*** 0.007***
## (0.0001) (0.0002) (0.0002) (0.0002)
## factor(LoanPurpose)N -0.002***
## (0.0001)
## factor(LoanPurpose)P -0.001***
## (0.0003)
## freddie 0.021*** 0.028*** 0.022*** 0.013***
## (0.0005) (0.001) (0.001) (0.0003)
## broker:log(1 + brokers_per_mortgage) 0.0003 0.0005* 0.0003* -0.0002*
## (0.0002) (0.0002) (0.0002) (0.0001)
## ----------------------------------------------------------------------------------
## Observations 14,157,708 4,794,224 4,014,591 5,348,893
## R2 0.020 0.024 0.020 0.025
## Adjusted R2 0.020 0.023 0.018 0.024
## ==================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
hmda_con <- dbConnect(RSQLite::SQLite(), "C:/Users/dratnadiwakara2/Downloads/HMDA/hmda.db")
tract_zip <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/TRACT_ZIP_032016.csv")
setorder(tract_zip,-RES_RATIO)
tract_zip <- tract_zip[!duplicated(tract_zip[,c("TRACT")])]
tract_zip <- tract_zip[,c("TRACT","ZIP")]
tract_zip[,TRACT:=as.character(TRACT)]
tract_zip[,TRACT:=str_pad(TRACT,11,"left","0")]
tract_zip[,ZIP3:=floor(ZIP/100)*100]
yrs <- as.character(2018:2021)
hmda <- list()
i=1
for(yr in yrs) {
print(yr)
hmda[[i]] <- data.table(dbGetQuery(hmda_con,
paste0("select
asofdate,
debt_to_income_ratio,
amountofloan,
submission_of_application,
initially_payable_to_institution,
purposeofloan,
actiontaken,
censustract,
interest_rate,
rate_spread,
loan_term,
applicant_age,
property_value,
applicantincome,
applicantrace1,
total_loan_costs,
origination_charges,
typeofpurchaser,
co_applicant_age
from lar_",yr," where
occupancy_type=1 and
derived_dwelling_category='Single Family (1-4 Units):Site-Built' and
typeofpurchaser in (2,3) ")))
i=i+1
}
## [1] "2018"
## [1] "2019"
## [1] "2020"
## [1] "2021"
hmda <- rbindlist(hmda)
hmda <- data.table(hmda)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 45321047 2420.5 91813432 4903.4 49569801 2647.4
## Vcells 2160026068 16479.7 3468155997 26460.0 2890007631 22049.1
hmda <- merge(hmda,tract_zip,by.x="censustract",by.y="TRACT")
hmda[,amt:=floor(amountofloan/10)*10]
hmda[,rt:=floor(interest_rate*10)/10]
hmda[,term:=floor(loan_term/10)*10]
hmda[,freddie:=ifelse(typeofpurchaser==3,1,0)]
hmda[,match_string:=paste(ZIP3,amt,rt,term,purposeofloan,debt_to_income_ratio,asofdate,freddie)]
hmda_duplicated_match_strings <- unique(hmda[duplicated(hmda[,c("match_string")])]$match_string)
hmda_gse <- merge(hmda[!match_string %in% hmda_duplicated_match_strings],
gse[!match_string %in% gse_duplicated_match_strings],
by="match_string")
setnames(hmda_gse,"freddie.x","freddie")
hmda_gse[,county_year:=paste(substr(censustract,1,5),year)]
hmda_gse[,joint_applicant:=ifelse(!is.na(co_applicant_age),1,0)]
hmda_gse[,loan_cost_frac:=total_loan_costs/(amountofloan*1000)]
hmda_gse <- hmda_gse[applicantincome>0 ]
hmda_gse[,rate_spread:=as.numeric(rate_spread)]
r <- list()
r[[1]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[2]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("P") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[3]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("C") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[4]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("N") & OriginalUPB<1000000 & OriginalUPB>100000])
stargazer(r,type="text",no.space = T,omit.stat = "ser")
##
## ====================================================================
## Dependent variable:
## -----------------------------------------------
## delinq60days
## (1) (2) (3) (4)
## --------------------------------------------------------------------
## broker 0.005*** 0.005*** 0.004*** 0.004***
## (0.0004) (0.001) (0.001) (0.0005)
## CreditScore -0.00003*** -0.00002*** -0.0001*** -0.00004***
## (0.00000) (0.00000) (0.00002) (0.00001)
## DTI 0.0002*** 0.001*** 0.0004*** 0.0001***
## (0.00002) (0.00003) (0.00004) (0.00001)
## LTV 0.0003*** 0.001*** 0.0001*** 0.0002***
## (0.00001) (0.00002) (0.00002) (0.00002)
## log(OriginalUPB) 0.003*** -0.005*** 0.003*** 0.003***
## (0.001) (0.001) (0.001) (0.001)
## OriginalLoanTerm -0.00004*** -0.00005*** -0.00002*** -0.00002***
## (0.00000) (0.00001) (0.00000) (0.00000)
## OriginalInterestRate 0.021*** 0.027*** 0.016*** 0.016***
## (0.001) (0.001) (0.001) (0.001)
## factor(LoanPurpose)N 0.0002
## (0.0003)
## factor(LoanPurpose)P 0.002***
## (0.0005)
## freddie 0.019*** 0.023*** 0.020*** 0.007***
## (0.001) (0.001) (0.001) (0.001)
## applicant_age -0.0001*** 0.0001** -0.0003*** -0.00003**
## (0.00001) (0.00002) (0.00002) (0.00002)
## log(applicantincome) -0.009*** -0.004*** -0.008*** -0.006***
## (0.0004) (0.001) (0.001) (0.0005)
## --------------------------------------------------------------------
## Observations 1,642,625 534,501 480,449 627,675
## R2 0.032 0.046 0.053 0.040
## Adjusted R2 0.025 0.026 0.032 0.025
## ====================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
r <- list()
r[[1]] <- felm(loan_cost_frac~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+factor(LoanPurpose)+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & loan_cost_frac>0.01 & loan_cost_frac<0.1])
r[[2]] <- felm(OriginalInterestRate~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & loan_cost_frac>0.01 & loan_cost_frac<0.1])
r[[3]] <- felm(rate_spread~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+factor(LoanPurpose)+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & loan_cost_frac>0.01 & loan_cost_frac<0.1 & abs(rate_spread)<5])
stargazer(r,type="text",no.space = T,omit.stat = "ser")
##
## ====================================================================
## Dependent variable:
## -----------------------------------------------
## loan_cost_frac OriginalInterestRate rate_spread
## (1) (2) (3)
## --------------------------------------------------------------------
## broker 0.001*** -0.000*** -0.065***
## (0.0001) (0.000) (0.003)
## CreditScore -0.00000*** 0.000*** -0.0002***
## (0.00000) (0.000) (0.00001)
## DTI 0.00000*** 0.000*** 0.002***
## (0.00000) (0.000) (0.0002)
## LTV 0.0001*** 0.000*** 0.008***
## (0.00000) (0.000) (0.0001)
## log(OriginalUPB) -0.010*** -0.000*** -0.348***
## (0.0001) (0.000) (0.004)
## OriginalLoanTerm 0.00000*** 0.000 -0.0002***
## (0.00000) (0.000) (0.00001)
## OriginalInterestRate 1.000***
## (0.000)
## factor(LoanPurpose)N -0.003*** -0.000*** -0.201***
## (0.00004) (0.000) (0.002)
## factor(LoanPurpose)P -0.003*** -0.000*** -0.212***
## (0.0001) (0.000) (0.004)
## freddie -0.013*** -0.000*** -0.346***
## (0.0001) (0.000) (0.006)
## applicant_age 0.00003*** -0.000*** -0.0005***
## (0.00000) (0.000) (0.0001)
## log(applicantincome) -0.0004*** 0.000*** 0.065***
## (0.00003) (0.000) (0.002)
## --------------------------------------------------------------------
## Observations 1,227,156 1,227,156 1,212,487
## R2 0.339 1.000 0.283
## Adjusted R2 0.332 1.000 0.277
## ====================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
r <- list()
r[[1]] <- felm(dist_gse_limit~broker+CreditScore+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie+applicant_age|zip_yr+LoanPurpose|0|PostalCode,hmda_gse[dist_gse_limit< 100000 & dist_gse_limit>0 ])
r[[2]] <- felm(dist_45~broker+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie+applicant_age|zip_yr+LoanPurpose|0|PostalCode,hmda_gse[DTI>40 & DTI<=45 ])
r[[3]] <- felm(dist_50~broker+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie+applicant_age|zip_yr+LoanPurpose|0|PostalCode,hmda_gse[DTI>44 & DTI<=50 ])
stargazer(r,type="text",no.space = T,omit.stat = "ser")
##
## ========================================================
## Dependent variable:
## -----------------------------------
## dist_gse_limit dist_45 dist_50
## (1) (2) (3)
## --------------------------------------------------------
## broker -2,074.354*** -0.013* -0.004
## (186.911) (0.007) (0.009)
## CreditScore 0.253 0.0002*** -0.001***
## (0.480) (0.00004) (0.0003)
## log(OriginalUPB) -0.004 -0.130***
## (0.007) (0.010)
## OriginalLoanTerm -5.225*** -0.0002*** -0.0001**
## (1.541) (0.00004) (0.0001)
## LTV 1,236.256*** -0.002*** 0.004
## (40.611) (0.001) (0.005)
## CLTV -1,082.361*** 0.002** 0.008
## (31.968) (0.001) (0.005)
## OriginalInterestRate 1,057.029*** -0.046*** 0.166***
## (244.916) (0.006) (0.012)
## freddie -4,672.006*** 0.022** 0.818***
## (585.379) (0.009) (0.018)
## applicant_age -29.867*** -0.002*** -0.004***
## (9.086) (0.0002) (0.0003)
## --------------------------------------------------------
## Observations 180,262 318,946 257,990
## R2 0.047 0.015 0.057
## Adjusted R2 0.031 0.004 0.044
## ========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01