This paper compares the lending practices of mortgage brokers and retail lenders. We find that mortgage brokers are more likely to push borrowers to the limits permitted under regulations and guidelines. This may not be in the best interest of the borrowers, as we find that mortgages originated by brokers are 70% more likely to be delinquent conditional on observable borrower and loan characteristics.

Can the state-level differences pre-2007 predict broker concentration in states. If so, can that be a valid instrument?

can the big-4 share pre financial crisis predict broker originations later?

rm(list=ls())
library(data.table)
library(fst)
# library(MonetDBLite)
library(RSQLite)
library(DBI)  
library(dplyr)
library(stringr)
library(readxl)
library(ggplot2)
library(lfe)
library(stargazer)
library(zipcodeR)

fannie_con <- dbConnect(RSQLite::SQLite(), "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/GSE db/Fannie/fannie.db")
freddie_con <- dbConnect(RSQLite::SQLite(), "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/GSE db/Freddie/freddie.db")
yrs <- 2015:2022
gse_limit_files <- list.files("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/GSE Limits",full.names = T,pattern = ".xls")
gse_limits <- list()
i=1
for(fl in gse_limit_files) {
  yr <- substr(fl,117,120)
  if(yr<=2014) {
    temp <- read_xls(fl,sheet = 1,skip=2,col_names = F)  
  } else{
    temp <- read_xlsx(fl,sheet = 1,skip=2,col_names = F)  
  }
  
  names(temp) <- c("statefips","countyfips","countyname","cbsa","statecode","gse_limit","gse_limit_2","gse_limit_3","gse_limit_4")
  temp <- data.table(temp)
  temp[,year:=yr]
  gse_limits[[i]] <- temp
  i=i+1
}

gse_limits <- rbindlist(gse_limits)
gse_limits <- gse_limits[,.(gse_limit=median(gse_limit,na.rm=T)),by=year]
gse_limits[,year:=as.numeric(year)]
# gc()
# i=1
# freddie_per <- list()
# for(yr in yrs) {
#   freddie_per[[i]] <- data.table(dbGetQuery(freddie_con,paste0("select distinct LoanSequenceNumber from performance_",yr," where CurrentLoanDelinquencyStatus in ('2','3','4','5','6')")))  #'1','2',
#   i=i+1
# }
# freddie_per <- rbindlist(freddie_per)
# freddie_per <- data.table(freddie_per)
# saveRDS(freddie_per,"freddie_per.rds")

freddie_per <- readRDS("freddie_per.rds")
# gc()
# i=1
# fannie_per <- list()
# for(yr in yrs) {
#   fannie_per[[i]] <- data.table(dbGetQuery(fannie_con,paste0("select distinct LoanIdentifier as LoanSequenceNumber from performance_",yr," where CurrentLoanDelinquencyStatus in ('2','3','4','5','6')")))  #'1','2',
#   i=i+1
# }
# fannie_per <- rbindlist(fannie_per)
# fannie_per <- data.table(fannie_per)
# fannie_per[,LoanSequenceNumber:=as.character(LoanSequenceNumber)]
# saveRDS(fannie_per,"fannie_per.rds")
fannie_per <- readRDS("fannie_per.rds")
gc()
##           used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 4908531 262.2    7552382 403.4  7552382 403.4
## Vcells 8900114  68.0   12773082  97.5  9952075  76.0
i=1
fannie <- list()
for(yr in yrs) {
  fannie[[i]] <- data.table(dbGetQuery(fannie_con,paste0("select
                                                             LoanIdentifier LoanSequenceNumber,
                                                             Channel,
                                                             Debt_To_Income_DTI_ DTI,
                                                             LoanPurpose,
                                                             ZipCodeShort PostalCode,
                                                             BorrowerCreditScoreatOrigination CreditScore,
                                                             OriginalUPB,
                                                             OriginalLoanTerm,
                                                             OriginalLoantoValueRatio_LTV_ LTV,
                                                             OriginalCombinedLoantoValueRatio_CLTV_ CLTV,
                                                             FirstPaymentDate,
                                                             OriginalInterestRate,
                                                             NumberofBorrowers,
                                                             year
                                                           from origination_",yr," where OccupancyStatus='P' and PropertyType='SF' and NumberofUnits=1")))  
  fannie[[i]][,year:=yr]
  i=i+1
}
fannie <- rbindlist(fannie)
fannie <- data.table(fannie)
fannie[,LoanSequenceNumber:=as.character(LoanSequenceNumber)]
fannie[,delinq60days:=ifelse(LoanSequenceNumber %in% fannie_per$LoanSequenceNumber,1,0)]
fannie[,freddie:=0]
fannie[,LoanPurpose:=ifelse(LoanPurpose=="R","N",LoanPurpose)]
fannie[,PostalCode:=PostalCode*100]
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  16799633  897.2   35961024 1920.6  16816077  898.1
## Vcells 190512093 1453.5  313654230 2393.0 261311858 1993.7
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  16799815  897.3   35961024 1920.6  16856853  900.3
## Vcells 190508370 1453.5  313654230 2393.0 261311858 1993.7
i=1
freddie <- list()
for(yr in yrs) {
  freddie[[i]] <- data.table(dbGetQuery(freddie_con,paste0("select
                                                             LoanSequenceNumber,
                                                             Channel,
                                                             DTI,
                                                             LoanPurpose,
                                                             PostalCode,
                                                             CreditScore,
                                                             OriginalUPB,
                                                             OriginalLoanTerm,
                                                             LTV,
                                                             CLTV,
                                                             FirstPaymentDate,
                                                             OriginalInterestRate,
                                                             NumberofBorrowers
                                                           from origination_",yr," where OccupancyStatus='P' and PropertyType='SF' and NumberofUnits=1")))  
  freddie[[i]][,year:=yr]
  i=i+1
}
freddie <- rbindlist(freddie)
freddie <- data.table(freddie)
freddie[,delinq60days:=ifelse(LoanSequenceNumber %in% freddie_per$LoanSequenceNumber,1,0)]
freddie[,freddie:=1]
gc()
##             used   (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  26195370 1399.0   49064687 2620.4  26219572 1400.3
## Vcells 324781444 2477.9  542285709 4137.4 487099964 3716.3
gse <- rbind(fannie,freddie)


gse[,broker:=ifelse(Channel=="B",1,ifelse(Channel=="R",0,NA))]
gse[,bkr_cl:=ifelse(Channel %in% c("B","C"),1,0)]

gse[,dist_45:=45-DTI]
gse[,dist_50:=50-DTI]


gse <- merge(gse,gse_limits,by="year")
gse[,dist_gse_limit:=gse_limit-OriginalUPB]

gse[,zip_yr:=paste(PostalCode,year)]

gse[,amt:=floor(OriginalUPB/10000)*10]
gse[,rt:=floor(OriginalInterestRate*10)/10]
gse[,purposeofloan:=ifelse(LoanPurpose=="P",1,ifelse(LoanPurpose=="C",32,ifelse(LoanPurpose=="N",31,0)))]
gse[,term:=floor(OriginalLoanTerm/10)*10]
gse[,di:=ifelse(DTI %in% 37:49,DTI,ifelse(DTI<20,20,ifelse(DTI<30,30,ifelse(DTI<36,36,ifelse(DTI %in% 50:60,60,NA)))))]


gse[,match_string:=paste(PostalCode,amt,rt,term,purposeofloan,di,year,freddie)]

gse_duplicated_match_strings <- unique(gse[duplicated(gse[,c("match_string")])]$match_string)
ggplot(gse[DTI <= 55 & DTI > 20 & !is.na(broker)], aes(x = DTI, fill = factor(broker))) +
  geom_density(alpha = 0.5, bw = 1) +
  labs(x = "Debt-to-income", y = "Density") +
  scale_fill_manual(
    values = c("0" = "steelblue2", "1" = "red4"), # Customize colors as needed
    labels = c("1" = "Mortgage Brokers", "0" = "Retail Lenders")
  ) +
  theme(legend.title = element_blank())+
  theme_minimal()

Robustness test: Use HMDA data to show that there is not such excessive bunching at the cutoffs; less likely to deny due to DTI.

ggplot(gse[dist_gse_limit< 50000 & dist_gse_limit> -100   & !is.na(broker) & year<2020],
       aes(x=dist_gse_limit,fill=factor(broker))) +
  geom_density(alpha = 0.5, bw = 5000) +
  labs(x = "Distance to GSE Limit", y = "Density") +
  scale_fill_manual(
    name = "",
    values = c("0" = "steelblue2", "1" = "red4"), # Customize colors as needed
    labels = c("1" = "Mortgage Brokers", "0" = "Retail Lenders")
  ) +
  theme(legend.title = element_blank())+
  theme_minimal()

# ggplot(gse[dist_gse_limit< 50000 & dist_gse_limit> -100   & !is.na(broker) & year<2020],aes(x=dist_gse_limit,fill=factor(broker)))+geom_density(alpha=0.25,bw=5000)
r <- list()

r[[1]] <- felm(dist_gse_limit~broker+CreditScore+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[dist_gse_limit< 100000 & dist_gse_limit>0 ])
r[[2]] <- felm(dist_45~broker+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[DTI>40 & DTI<=45 ])
r[[3]] <- felm(dist_50~broker+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[DTI>45 & DTI<=50 ])

stargazer(r,type="text",no.space = T,omit.stat = "ser")
## 
## =========================================================
##                              Dependent variable:         
##                      ------------------------------------
##                      dist_gse_limit  dist_45    dist_50  
##                           (1)          (2)        (3)    
## ---------------------------------------------------------
## broker               -2,513.481***  -0.016***  -0.029*** 
##                        (115.719)     (0.003)    (0.004)  
## CreditScore              -0.627     0.0003***  0.00004** 
##                         (0.408)     (0.00002)  (0.00002) 
## log(OriginalUPB)                    -0.012***  -0.043*** 
##                                      (0.003)    (0.003)  
## OriginalLoanTerm       -14.750***   -0.0003*** -0.0004***
##                         (0.799)     (0.00001)  (0.00002) 
## LTV                   1,042.235***  -0.002***  -0.003*** 
##                         (62.864)     (0.0005)   (0.001)  
## CLTV                  -913.086***    0.002***   0.006*** 
##                         (56.194)     (0.0005)   (0.001)  
## OriginalInterestRate   982.258***   -0.049***  -0.018*** 
##                        (124.331)     (0.002)    (0.002)  
## freddie               -376.593***   -0.065***  -0.068*** 
##                         (69.397)     (0.002)    (0.003)  
## ---------------------------------------------------------
## Observations           1,782,540    3,081,018  1,627,106 
## R2                       0.046        0.008      0.013   
## Adjusted R2              0.043        0.006      0.009   
## =========================================================
## Note:                         *p<0.1; **p<0.05; ***p<0.01
r <- list()

r[[1]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[2]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[3]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("C") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[4]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("N") & OriginalUPB<1000000 & OriginalUPB>100000])


stargazer(r,type="text",no.space = T,omit.stat = "ser")
## 
## ==================================================================
##                                   Dependent variable:             
##                      ---------------------------------------------
##                                      delinq60days                 
##                          (1)         (2)        (3)        (4)    
## ------------------------------------------------------------------
## broker                0.002***    0.004***    0.002***   0.001*** 
##                       (0.0002)    (0.0003)    (0.0002)   (0.0002) 
## CreditScore          -0.00004*** -0.00003*** -0.0001*** -0.0001***
##                       (0.00000)   (0.00000)  (0.00001)  (0.00000) 
## DTI                   0.0001***   0.001***   0.0004***  0.0001*** 
##                       (0.00000)   (0.00002)  (0.00001)  (0.00000) 
## LTV                   0.0003***   0.0004***  0.0002***  0.0002*** 
##                       (0.00001)   (0.00001)  (0.00001)  (0.00001) 
## log(OriginalUPB)      -0.005***   -0.006***  -0.003***  -0.002*** 
##                       (0.0003)    (0.0004)    (0.0003)   (0.0002) 
## OriginalLoanTerm     0.00001***  0.00001***  0.00002*** 0.00000***
##                       (0.00000)   (0.00000)  (0.00000)  (0.00000) 
## OriginalInterestRate  0.004***    0.004***    0.003***   0.007*** 
##                       (0.0001)    (0.0002)    (0.0002)   (0.0002) 
## factor(LoanPurpose)N  -0.002***                                   
##                       (0.0001)                                    
## factor(LoanPurpose)P  -0.001***                                   
##                       (0.0002)                                    
## freddie               0.021***    0.028***    0.022***   0.013*** 
##                       (0.0004)     (0.001)    (0.001)    (0.0003) 
## ------------------------------------------------------------------
## Observations         14,379,499   4,884,857  4,073,531  5,421,111 
## R2                      0.020       0.024      0.020      0.025   
## Adjusted R2             0.020       0.023      0.018      0.024   
## ==================================================================
## Note:                                  *p<0.1; **p<0.05; ***p<0.01
r <- list()

r[[1]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2015])
r[[2]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2016])
r[[3]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2017])
r[[4]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2018])
r[[5]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2019])
r[[6]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2020])
r[[7]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & year==2021])

stargazer(r,type="text",no.space = T,omit.stat="ser")
## 
## ======================================================================================================
##                                                     Dependent variable:                               
##                      ---------------------------------------------------------------------------------
##                                                        delinq60days                                   
##                         (1)        (2)         (3)         (4)         (5)         (6)         (7)    
## ------------------------------------------------------------------------------------------------------
## broker                0.003***   0.003***   0.005***    0.006***    0.002***    0.002***    0.005***  
##                       (0.001)    (0.001)     (0.001)     (0.001)     (0.001)    (0.0002)    (0.0002)  
## CreditScore          -0.0002*** -0.0001*** -0.0001***  -0.00003*** -0.00003*** -0.00004*** -0.00003***
##                      (0.00002)  (0.00001)   (0.00001)   (0.00000)   (0.00000)   (0.00000)   (0.00000) 
## DTI                  0.0001***  0.0001***   0.0001***   0.0001***   0.0004***   0.0005***   0.0003*** 
##                      (0.00000)  (0.00000)   (0.00000)   (0.00001)   (0.00002)   (0.00001)   (0.00001) 
## LTV                  0.0002***  0.0002***   0.0002***   0.0003***   0.001***    0.0002***   0.0002*** 
##                      (0.00002)  (0.00002)   (0.00002)   (0.00002)   (0.00002)   (0.00001)   (0.00001) 
## log(OriginalUPB)     -0.002***    0.0001     -0.0004    -0.004***   -0.004***   -0.003***   -0.003*** 
##                       (0.001)    (0.001)     (0.001)     (0.001)     (0.001)    (0.0003)    (0.0002)  
## OriginalLoanTerm     0.00002***  0.00000   -0.00002*** -0.00001*** 0.00003***  -0.00000*** -0.00002***
##                      (0.00000)  (0.00000)   (0.00000)   (0.00000)   (0.00000)   (0.00000)   (0.00000) 
## OriginalInterestRate  0.004***   0.011***   0.019***    0.018***    0.005***    0.011***    0.012***  
##                       (0.001)    (0.001)     (0.001)     (0.001)    (0.0005)    (0.0003)    (0.0003)  
## factor(LoanPurpose)N -0.002***  -0.003***    -0.0001    0.004***    -0.005***   0.001***    0.001***  
##                       (0.0004)   (0.0004)   (0.0004)     (0.001)     (0.001)    (0.0002)    (0.0001)  
## factor(LoanPurpose)P -0.002***   -0.001**    0.001**     0.001**    -0.006***   0.006***    0.003***  
##                       (0.001)    (0.001)     (0.001)     (0.001)     (0.001)    (0.0003)    (0.0003)  
## ------------------------------------------------------------------------------------------------------
## Observations         1,112,187  1,372,291   1,079,768    937,591    1,252,911   3,597,081   3,683,207 
## R2                     0.025      0.020       0.019       0.011       0.012       0.009       0.007   
## Adjusted R2            0.024      0.020       0.018       0.011       0.011       0.009       0.007   
## ======================================================================================================
## Note:                                                                      *p<0.1; **p<0.05; ***p<0.01
mb_data <- readRDS("zip_mortgage_broker_stats.rds")
mb_data[,ZIP3:=as.numeric(ZIP)]
mb_data[,ZIP3:=floor(ZIP3/100)*100]

mb_data_1 <- mb_data[is.finite(brokers_per_home),
                    .(brokers_per_home=mean(brokers_per_home,na.rm=T)),
                   by=ZIP3]

mb_data_2 <- mb_data[is.finite(brokers_per_mortgage),
                      .(brokers_per_mortgage = mean(brokers_per_mortgage,na.rm=T),
                        brokers_per_single_family_unit = mean(brokers_per_single_family_unit,na.rm=T),
                        brokers_per_occupied_unit = mean(brokers_per_occupied_unit,na.rm=T)),
                   by=ZIP3]

mb_data <- merge(mb_data_1,mb_data_2,by="ZIP3",all.x=T,all.y=T)
gse <- merge(gse,mb_data,by.x="PostalCode",by.y="ZIP3",all.x=T)
r <- list()

r[[1]] <- felm(dist_gse_limit~broker*log(1+brokers_per_mortgage)+CreditScore+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[dist_gse_limit< 100000 & dist_gse_limit>0 ])
r[[2]] <- felm(dist_45~broker*log(1+brokers_per_mortgage)+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[DTI>40 & DTI<=45 ])
r[[3]] <- felm(dist_50~broker*log(1+brokers_per_mortgage)+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie|zip_yr+LoanPurpose|0|PostalCode,gse[DTI>45 & DTI<=50 ])

stargazer(r,type="text",no.space = T,omit.stat = "ser")
## 
## =========================================================================
##                                              Dependent variable:         
##                                      ------------------------------------
##                                      dist_gse_limit  dist_45    dist_50  
##                                           (1)          (2)        (3)    
## -------------------------------------------------------------------------
## broker                               -2,232.625***   -0.009*   -0.022*** 
##                                        (176.989)     (0.005)    (0.006)  
## log(1 + brokers_per_mortgage)                                            
##                                         (0.000)      (0.000)    (0.000)  
## CreditScore                              -0.662     0.0004***  0.00005** 
##                                         (0.412)     (0.00002)  (0.00002) 
## log(OriginalUPB)                                    -0.012***  -0.043*** 
##                                                      (0.003)    (0.003)  
## OriginalLoanTerm                       -14.858***   -0.0003*** -0.0004***
##                                         (0.801)     (0.00001)  (0.00002) 
## LTV                                   1,040.490***  -0.002***   -0.003** 
##                                         (63.104)     (0.0005)   (0.001)  
## CLTV                                  -912.298***    0.002***   0.006*** 
##                                         (56.411)     (0.0005)   (0.001)  
## OriginalInterestRate                   985.513***   -0.049***  -0.018*** 
##                                        (124.922)     (0.002)    (0.002)  
## freddie                               -374.736***   -0.065***  -0.068*** 
##                                         (69.653)     (0.002)    (0.003)  
## broker:log(1 + brokers_per_mortgage)   -153.482**    -0.004**    -0.004  
##                                         (69.631)     (0.002)    (0.002)  
## -------------------------------------------------------------------------
## Observations                           1,773,330    3,029,576  1,604,370 
## R2                                       0.046        0.008      0.013   
## Adjusted R2                              0.042        0.006      0.009   
## =========================================================================
## Note:                                         *p<0.1; **p<0.05; ***p<0.01
r <- list()

r[[1]] <- felm(delinq60days~broker*log(1+brokers_per_mortgage)+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[2]] <- felm(delinq60days~broker*log(1+brokers_per_mortgage)+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("P") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[3]] <- felm(delinq60days~broker*log(1+brokers_per_mortgage)+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("C") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[4]] <- felm(delinq60days~broker*log(1+brokers_per_mortgage)+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie|zip_yr|0|PostalCode,data=gse[LoanPurpose %in% c("N") & OriginalUPB<1000000 & OriginalUPB>100000])


stargazer(r,type="text",no.space = T,omit.stat = "ser")
## 
## ==================================================================================
##                                                   Dependent variable:             
##                                      ---------------------------------------------
##                                                      delinq60days                 
##                                          (1)         (2)        (3)        (4)    
## ----------------------------------------------------------------------------------
## broker                                0.002***    0.003***    0.002***   0.001*** 
##                                       (0.0003)    (0.0005)    (0.0004)   (0.0003) 
## log(1 + brokers_per_mortgage)                                                     
##                                        (0.000)     (0.000)    (0.000)    (0.000)  
## CreditScore                          -0.00004*** -0.00003*** -0.0001*** -0.0001***
##                                       (0.00000)   (0.00000)  (0.00000)  (0.00000) 
## DTI                                   0.0001***   0.001***   0.0004***  0.0001*** 
##                                       (0.00000)   (0.00002)  (0.00001)  (0.00000) 
## LTV                                   0.0003***   0.0004***  0.0002***  0.0002*** 
##                                       (0.00001)   (0.00001)  (0.00001)  (0.00001) 
## log(OriginalUPB)                      -0.005***   -0.006***  -0.003***  -0.002*** 
##                                       (0.0003)    (0.0004)    (0.0003)   (0.0003) 
## OriginalLoanTerm                     0.00001***  0.00001***  0.00002*** 0.00000***
##                                       (0.00000)   (0.00000)  (0.00000)  (0.00000) 
## OriginalInterestRate                  0.004***    0.004***    0.003***   0.007*** 
##                                       (0.0001)    (0.0002)    (0.0002)   (0.0002) 
## factor(LoanPurpose)N                  -0.002***                                   
##                                       (0.0001)                                    
## factor(LoanPurpose)P                  -0.001***                                   
##                                       (0.0003)                                    
## freddie                               0.021***    0.028***    0.022***   0.013*** 
##                                       (0.0005)     (0.001)    (0.001)    (0.0003) 
## broker:log(1 + brokers_per_mortgage)   0.0003      0.0005*    0.0003*    -0.0002* 
##                                       (0.0002)    (0.0002)    (0.0002)   (0.0001) 
## ----------------------------------------------------------------------------------
## Observations                         14,157,708   4,794,224  4,014,591  5,348,893 
## R2                                      0.020       0.024      0.020      0.025   
## Adjusted R2                             0.020       0.023      0.018      0.024   
## ==================================================================================
## Note:                                                  *p<0.1; **p<0.05; ***p<0.01
hmda_con <- dbConnect(RSQLite::SQLite(), "C:/Users/dratnadiwakara2/Downloads/HMDA/hmda.db")
tract_zip <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/TRACT_ZIP_032016.csv")
setorder(tract_zip,-RES_RATIO)
tract_zip <- tract_zip[!duplicated(tract_zip[,c("TRACT")])]
tract_zip <- tract_zip[,c("TRACT","ZIP")]
tract_zip[,TRACT:=as.character(TRACT)]
tract_zip[,TRACT:=str_pad(TRACT,11,"left","0")]
tract_zip[,ZIP3:=floor(ZIP/100)*100]
yrs <- as.character(2018:2021)
hmda <- list()
i=1
for(yr in yrs) {
  print(yr)
  hmda[[i]] <- data.table(dbGetQuery(hmda_con,
                                     paste0("select 
                                                asofdate,
                                                debt_to_income_ratio,
                                                amountofloan,
                                                submission_of_application,
                                                initially_payable_to_institution,
                                                purposeofloan,
                                                actiontaken,
                                                censustract,
                                                interest_rate,
                                                rate_spread,
                                                loan_term,
                                                applicant_age,
                                                property_value,
                                                applicantincome,
                                                applicantrace1,
                                                total_loan_costs,
                                                origination_charges,
                                                typeofpurchaser,
                                                co_applicant_age
                                            from lar_",yr," where 
                                            occupancy_type=1 and 
                                            derived_dwelling_category='Single Family (1-4 Units):Site-Built' and
                                            typeofpurchaser in (2,3)  ")))
  i=i+1
}
## [1] "2018"
## [1] "2019"
## [1] "2020"
## [1] "2021"
hmda <- rbindlist(hmda)
hmda <- data.table(hmda)
gc()
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   45321047  2420.5   91813432  4903.4   49569801  2647.4
## Vcells 2160026068 16479.7 3468155997 26460.0 2890007631 22049.1
hmda <- merge(hmda,tract_zip,by.x="censustract",by.y="TRACT")

hmda[,amt:=floor(amountofloan/10)*10]
hmda[,rt:=floor(interest_rate*10)/10]
hmda[,term:=floor(loan_term/10)*10]
hmda[,freddie:=ifelse(typeofpurchaser==3,1,0)]

hmda[,match_string:=paste(ZIP3,amt,rt,term,purposeofloan,debt_to_income_ratio,asofdate,freddie)]

hmda_duplicated_match_strings <- unique(hmda[duplicated(hmda[,c("match_string")])]$match_string)
hmda_gse <- merge(hmda[!match_string %in% hmda_duplicated_match_strings],
                  gse[!match_string %in% gse_duplicated_match_strings],
                  by="match_string")
setnames(hmda_gse,"freddie.x","freddie")



hmda_gse[,county_year:=paste(substr(censustract,1,5),year)]

hmda_gse[,joint_applicant:=ifelse(!is.na(co_applicant_age),1,0)]

hmda_gse[,loan_cost_frac:=total_loan_costs/(amountofloan*1000)]

hmda_gse <- hmda_gse[applicantincome>0 ]

hmda_gse[,rate_spread:=as.numeric(rate_spread)]
r <- list()

r[[1]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[2]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("P") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[3]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("C") & OriginalUPB<1000000 & OriginalUPB>100000])
r[[4]] <- felm(delinq60days~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("N") & OriginalUPB<1000000 & OriginalUPB>100000])


stargazer(r,type="text",no.space = T,omit.stat = "ser")
## 
## ====================================================================
##                                    Dependent variable:              
##                      -----------------------------------------------
##                                       delinq60days                  
##                          (1)         (2)         (3)         (4)    
## --------------------------------------------------------------------
## broker                0.005***    0.005***    0.004***    0.004***  
##                       (0.0004)     (0.001)     (0.001)    (0.0005)  
## CreditScore          -0.00003*** -0.00002*** -0.0001***  -0.00004***
##                       (0.00000)   (0.00000)   (0.00002)   (0.00001) 
## DTI                   0.0002***   0.001***    0.0004***   0.0001*** 
##                       (0.00002)   (0.00003)   (0.00004)   (0.00001) 
## LTV                   0.0003***   0.001***    0.0001***   0.0002*** 
##                       (0.00001)   (0.00002)   (0.00002)   (0.00002) 
## log(OriginalUPB)      0.003***    -0.005***   0.003***    0.003***  
##                        (0.001)     (0.001)     (0.001)     (0.001)  
## OriginalLoanTerm     -0.00004*** -0.00005*** -0.00002*** -0.00002***
##                       (0.00000)   (0.00001)   (0.00000)   (0.00000) 
## OriginalInterestRate  0.021***    0.027***    0.016***    0.016***  
##                        (0.001)     (0.001)     (0.001)     (0.001)  
## factor(LoanPurpose)N   0.0002                                       
##                       (0.0003)                                      
## factor(LoanPurpose)P  0.002***                                      
##                       (0.0005)                                      
## freddie               0.019***    0.023***    0.020***    0.007***  
##                        (0.001)     (0.001)     (0.001)     (0.001)  
## applicant_age        -0.0001***   0.0001**   -0.0003***  -0.00003** 
##                       (0.00001)   (0.00002)   (0.00002)   (0.00002) 
## log(applicantincome)  -0.009***   -0.004***   -0.008***   -0.006*** 
##                       (0.0004)     (0.001)     (0.001)    (0.0005)  
## --------------------------------------------------------------------
## Observations          1,642,625    534,501     480,449     627,675  
## R2                      0.032       0.046       0.053       0.040   
## Adjusted R2             0.025       0.026       0.032       0.025   
## ====================================================================
## Note:                                    *p<0.1; **p<0.05; ***p<0.01
r <- list()

r[[1]] <- felm(loan_cost_frac~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+factor(LoanPurpose)+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & loan_cost_frac>0.01 & loan_cost_frac<0.1])

r[[2]] <- felm(OriginalInterestRate~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+OriginalInterestRate+factor(LoanPurpose)+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & loan_cost_frac>0.01 & loan_cost_frac<0.1])

r[[3]] <- felm(rate_spread~broker+CreditScore+DTI+LTV+log(OriginalUPB)+OriginalLoanTerm+factor(LoanPurpose)+freddie+applicant_age+log(applicantincome)|county_year+applicantrace1|0|PostalCode,data=hmda_gse[LoanPurpose %in% c("P","C","N") & OriginalUPB<1000000 & OriginalUPB>100000 & loan_cost_frac>0.01 & loan_cost_frac<0.1 & abs(rate_spread)<5])

stargazer(r,type="text",no.space = T,omit.stat = "ser")
## 
## ====================================================================
##                                    Dependent variable:              
##                      -----------------------------------------------
##                      loan_cost_frac OriginalInterestRate rate_spread
##                           (1)               (2)              (3)    
## --------------------------------------------------------------------
## broker                  0.001***         -0.000***        -0.065*** 
##                         (0.0001)          (0.000)          (0.003)  
## CreditScore           -0.00000***         0.000***       -0.0002*** 
##                        (0.00000)          (0.000)         (0.00001) 
## DTI                    0.00000***         0.000***        0.002***  
##                        (0.00000)          (0.000)         (0.0002)  
## LTV                    0.0001***          0.000***        0.008***  
##                        (0.00000)          (0.000)         (0.0001)  
## log(OriginalUPB)       -0.010***         -0.000***        -0.348*** 
##                         (0.0001)          (0.000)          (0.004)  
## OriginalLoanTerm       0.00000***          0.000         -0.0002*** 
##                        (0.00000)          (0.000)         (0.00001) 
## OriginalInterestRate                      1.000***                  
##                                           (0.000)                   
## factor(LoanPurpose)N   -0.003***         -0.000***        -0.201*** 
##                        (0.00004)          (0.000)          (0.002)  
## factor(LoanPurpose)P   -0.003***         -0.000***        -0.212*** 
##                         (0.0001)          (0.000)          (0.004)  
## freddie                -0.013***         -0.000***        -0.346*** 
##                         (0.0001)          (0.000)          (0.006)  
## applicant_age          0.00003***        -0.000***       -0.0005*** 
##                        (0.00000)          (0.000)         (0.0001)  
## log(applicantincome)   -0.0004***         0.000***        0.065***  
##                        (0.00003)          (0.000)          (0.002)  
## --------------------------------------------------------------------
## Observations           1,227,156         1,227,156        1,212,487 
## R2                       0.339             1.000            0.283   
## Adjusted R2              0.332             1.000            0.277   
## ====================================================================
## Note:                                    *p<0.1; **p<0.05; ***p<0.01
r <- list()

r[[1]] <- felm(dist_gse_limit~broker+CreditScore+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie+applicant_age|zip_yr+LoanPurpose|0|PostalCode,hmda_gse[dist_gse_limit< 100000 & dist_gse_limit>0 ])
r[[2]] <- felm(dist_45~broker+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie+applicant_age|zip_yr+LoanPurpose|0|PostalCode,hmda_gse[DTI>40 & DTI<=45 ])
r[[3]] <- felm(dist_50~broker+CreditScore+log(OriginalUPB)+OriginalLoanTerm+LTV+CLTV+OriginalInterestRate+freddie+applicant_age|zip_yr+LoanPurpose|0|PostalCode,hmda_gse[DTI>44 & DTI<=50 ])

stargazer(r,type="text",no.space = T,omit.stat = "ser")
## 
## ========================================================
##                              Dependent variable:        
##                      -----------------------------------
##                      dist_gse_limit  dist_45    dist_50 
##                           (1)          (2)        (3)   
## --------------------------------------------------------
## broker               -2,074.354***   -0.013*    -0.004  
##                        (186.911)     (0.007)    (0.009) 
## CreditScore              0.253      0.0002***  -0.001***
##                         (0.480)     (0.00004)  (0.0003) 
## log(OriginalUPB)                      -0.004   -0.130***
##                                      (0.007)    (0.010) 
## OriginalLoanTerm       -5.225***    -0.0002*** -0.0001**
##                         (1.541)     (0.00004)  (0.0001) 
## LTV                   1,236.256***  -0.002***    0.004  
##                         (40.611)     (0.001)    (0.005) 
## CLTV                 -1,082.361***   0.002**     0.008  
##                         (31.968)     (0.001)    (0.005) 
## OriginalInterestRate  1,057.029***  -0.046***  0.166*** 
##                        (244.916)     (0.006)    (0.012) 
## freddie              -4,672.006***   0.022**   0.818*** 
##                        (585.379)     (0.009)    (0.018) 
## applicant_age          -29.867***   -0.002***  -0.004***
##                         (9.086)      (0.0002)  (0.0003) 
## --------------------------------------------------------
## Observations            180,262      318,946    257,990 
## R2                       0.047        0.015      0.057  
## Adjusted R2              0.031        0.004      0.044  
## ========================================================
## Note:                        *p<0.1; **p<0.05; ***p<0.01