rm(list=ls())
library(data.table)
library(RSQLite)
library(DBI)
library(dplyr)
library(stargazer)
library(lfe)
library(ggplot2)
library(tidyverse)
library(stringr)

1 Documentation

dbdir = "/data/hmda.db"
con <- dbConnect(RSQLite::SQLite(),dbdir)
lar <- data.table(dbGetQuery(con,"
                            SELECT asofdate,respondentid,censustract,conforming_loan_limit,derived_race,derived_ethnicity,typeofpurchaser,typeofloan,purposeofloan,amountofloan,combined_loan_to_value_ratio,interest_rate,total_loan_costs,total_points_and_fees,origination_charges,discount_points,lender_credits,loan_term,property_value,occupancy,applicantincome,debt_to_income_ratio,applicant_age
                            FROM lar 
                            where actiontaken=1 
                            and propertytype=1
                            and lienstatus=1
                            and business_or_commercial_purpose=2
                            and hoepastatus in (2,3)
                            and occupancy=1"))
lar <- lar %>% mutate_at(.vars  =vars(interest_rate,combined_loan_to_value_ratio,property_value,total_loan_costs,total_points_and_fees,origination_charges,discount_points,loan_term,lender_credits),.funs= as.numeric)
lar[,applicant_age:=ifelse(applicant_age %in% c("<25"),20,
                            ifelse(applicant_age %in% c("25-34"),30,
                                   ifelse(applicant_age %in% c("35-44"),40,
                                          ifelse(applicant_age %in% c("45-54"),50,
                                                 ifelse(applicant_age %in% c("55-64"),60,
                                                        ifelse(applicant_age %in% c("65-74"),70,
                                                               ifelse(applicant_age %in% c(">74"),80,NA)))))))]




lar[,debt_to_income_ratio:=ifelse(debt_to_income_ratio %in% c("<20%"),0.15,
                            ifelse(debt_to_income_ratio %in% c("20%-<30%"),0.25,
                                   ifelse(debt_to_income_ratio %in% c("30%-<36%"),0.33,
                                          ifelse(debt_to_income_ratio %in% c("50%-60%"),0.55,
                                                 ifelse(debt_to_income_ratio %in% c(">60%"),0.65,
                                                        ifelse(debt_to_income_ratio %in% c("Exempt"),NA,debt_to_income_ratio))))))]
lar[,debt_to_income_ratio:=as.numeric(debt_to_income_ratio)]
lar[,debt_to_income_ratio:=ifelse(debt_to_income_ratio>1,debt_to_income_ratio/100,debt_to_income_ratio)]

lar[,loan_term:=ifelse(loan_term>=100 & loan_term<=120,120,
                       ifelse(loan_term>120 & loan_term<= 180,180,
                              ifelse(loan_term>180 & loan_term <= 240,240,
                                     ifelse(loan_term>240 & loan_term<=300,300,
                                            ifelse(loan_term>300 & loan_term<=400,360,loan_term)))))]

lar <- lar[loan_term>=120 & loan_term<=360]

lar[,conforming:=ifelse(conforming_loan_limit %in% c("C","U"),1,0)]
lar[,newpurchase:=ifelse(purposeofloan== 1,1,0)]
lar[,refinance:=ifelse(purposeofloan== 31,1,0)]
lar[,cashoutrefinance:=ifelse(purposeofloan== 32,1,0)]
lar[,white:=ifelse(derived_race=="White",1,0)]
lar[,costfrac:=total_loan_costs/(amountofloan*1000)]
lar[,origfeefrac:=origination_charges/(amountofloan*1000)]
lar[,amt100k:=amountofloan/100]
lar[,county_year:=paste(substr(censustract,1,5),asofdate)]
lar[,tract_year:=paste(censustract,asofdate)]
lar[,tract_bank:=paste(censustract,respondentid)]
lar[,raceethnicity:=ifelse(derived_race %in% c("White") & derived_ethnicity %in% c("Hispanic or Latino"),"Hispanic",
                    ifelse(derived_race %in% c("Race Not Available","Native Hawaiian or Other Pacific Islander","Joint","Free Form Text Only","American Indian or Alaska Native","2 or more minority races"),"0ther",derived_race))]
acs <- fread("/data/acs_2018_educ.csv")
acs[,GEOID:=as.character(GEOID)]
acs[,GEOID:=str_pad(GEOID,11,pad="0")]

lar <- merge(lar,acs,by.x="censustract",by.y="GEOID")
stargazer(lar[, c("asofdate","conforming","newpurchase","refinance","cashoutrefinance","white","amountofloan","combined_loan_to_value_ratio","interest_rate","total_loan_costs","total_points_and_fees","origination_charges","discount_points","lender_credits","loan_term","property_value","applicantincome","debt_to_income_ratio","applicant_age","costfrac","collegefraction_adults")], type = "text", summary.stat = c("mean", "sd","p25","median","p75","n"),notes = "",digits = 4)
## 
## ==========================================================================================================
## Statistic                        Mean        St. Dev.      Pctl(25)      Median      Pctl(75)       N     
## ----------------------------------------------------------------------------------------------------------
## asofdate                      2,018.5630      0.4960        2,018        2,019        2,019     12,302,554
## conforming                      0.9411        0.2354          1            1            1       12,302,554
## newpurchase                     0.5788        0.4938          0            1            1       12,302,554
## refinance                       0.2022        0.4016          0            0            0       12,302,554
## cashoutrefinance                0.1787        0.3831          0            0            0       12,302,554
## white                           0.7258        0.4461          0            1            1       12,302,554
## amountofloan                   277.5110      243.3015        155          225          335      12,302,554
## combined_loan_to_value_ratio   95.9614     18,854.7300     70.0000      80.0000      95.0000    11,775,278
## interest_rate                   4.4918       108.4403       3.8750       4.3750       4.7500    12,262,047
## total_loan_costs              4,990.0350   15,781.3500    2,751.7500   4,082.5500   6,294.1000  11,533,731
## total_points_and_fees         3,302.3710    4,304.1550    1,195.0000   2,196.5500   4,237.5000    57,901  
## origination_charges           1,861.5570    2,678.3050     700.0000    1,245.0000   2,311.7000  11,523,176
## discount_points               2,064.5890    2,951.9840     562.5000    1,344.8000   2,764.8700  4,220,640 
## lender_credits                2,097.7970  1,918,876.0000   92.5000      455.5000    1,336.5720  4,653,900 
## loan_term                      336.5434      59.3454         360          360          360      12,302,554
## property_value               389,693.0000 2,175,562.0000 195,000.0000 285,000.0000 435,000.0000 12,101,451
## applicantincome                121.8229     1,457.0100     56.0000      84.0000      128.0000   11,835,142
## debt_to_income_ratio            0.3660        0.1051        0.2500       0.3800       0.4400    11,785,698
## applicant_age                  45.9469       14.6155       30.0000      40.0000      60.0000    12,296,493
## costfrac                        0.0224        0.0769        0.0105       0.0189       0.0310    11,533,731
## collegefraction_adults          0.2758        0.0717        0.2310       0.2803       0.3252    12,279,537
## ----------------------------------------------------------------------------------------------------------
## 
templar <- lar[applicantincome>0 & newpurchase==1 & typeofloan==1 & loan_term==360 & combined_loan_to_value_ratio==80 & amountofloan>=100 & amountofloan<=400 & conforming==1]

templar[,amountbin:=floor(amountofloan/25)*25]

templar <- templar[raceethnicity %in% c("Hispanic","White","Black or African American"),.(cost=mean(costfrac,na.rm=T)),by=.(amountbin,raceethnicity)]
ggplot(data=templar,aes(x=amountbin,y=cost,color=raceethnicity))+geom_line()+theme_minimal()+theme(legend.position = "bottom",legend.title = element_blank())+labs(x="Loan amount $'000",y="Closing costs as a % of loan amount")

r <- list()

r[[1]] <-felm(I(costfrac*100)~log(amt100k)+interest_rate+log(applicantincome)+log(applicant_age)+factor(raceethnicity)+loan_term+combined_loan_to_value_ratio+debt_to_income_ratio+conforming|censustract+asofdate|0|censustract,data=lar[applicantincome>0 & newpurchase==1 & typeofloan==1 & loan_term==360])
r[[2]] <-felm(I(costfrac*100)~log(amt100k)+interest_rate+log(applicantincome)+log(applicant_age)+factor(raceethnicity)+loan_term+combined_loan_to_value_ratio+debt_to_income_ratio+conforming+cashoutrefinance|censustract+asofdate|0|censustract,data=lar[applicantincome>0 & newpurchase==0 & typeofloan==1  & loan_term==360])

stargazer(r,no.space = T,align = T,omit.stat=c("ser","f", "rsq"),style = "qje",omit.table.layout = "n", dep.var.labels.include = FALSE,type="text")
## 
## ===================================================================
##                                                   (1)       (2)    
## -------------------------------------------------------------------
## log(amt100k)                                   -2.118*** -1.604*** 
##                                                 (0.054)   (0.034)  
## interest_rate                                    0.004     0.0001  
##                                                 (0.003)   (0.0001) 
## log(applicantincome)                           0.551***   0.157*** 
##                                                 (0.047)   (0.022)  
## log(applicant_age)                             -0.041**   0.054*** 
##                                                 (0.020)   (0.010)  
## factor(raceethnicity)Asian                      -0.002   -0.173*** 
##                                                 (0.034)   (0.007)  
## factor(raceethnicity)Black or African American  0.043*    0.019**  
##                                                 (0.023)   (0.008)  
## factor(raceethnicity)Hispanic                   0.051**  -0.075*** 
##                                                 (0.021)   (0.006)  
## factor(raceethnicity)White                     -0.071**  -0.161*** 
##                                                 (0.031)   (0.003)  
## loan_term                                                          
##                                                 (0.000)   (0.000)  
## combined_loan_to_value_ratio                   -0.000**  -0.00000**
##                                                 (0.000)  (0.00000) 
## debt_to_income_ratio                           1.469***   1.117*** 
##                                                 (0.076)   (0.066)  
## conforming                                     -0.607*** -0.628*** 
##                                                 (0.011)   (0.011)  
## cashoutrefinance                                          0.212*** 
##                                                           (0.003)  
## N                                              4,129,272 2,266,431 
## Adjusted R2                                      0.007     0.117   
## ===================================================================
r <- list()

r[[1]] <-felm(I(costfrac*100)~log(amt100k)+interest_rate+log(applicantincome)+log(applicant_age)+factor(raceethnicity)+loan_term+combined_loan_to_value_ratio+debt_to_income_ratio+conforming+log(collegefraction_adults)|county_year|0|censustract,data=lar[applicantincome>0 & newpurchase==1 & typeofloan==1 & loan_term==360 & collegefraction_adults>0])
r[[2]] <-felm(I(costfrac*100)~log(amt100k)+interest_rate+log(applicantincome)+log(applicant_age)+factor(raceethnicity)+loan_term+combined_loan_to_value_ratio+debt_to_income_ratio+conforming+cashoutrefinance+log(collegefraction_adults)|county_year|0|censustract,data=lar[applicantincome>0 & newpurchase==0 & typeofloan==1  & loan_term==360 & collegefraction_adults>0])

stargazer(r,no.space = T,align = T,omit.stat=c("ser","f", "rsq"),style = "qje",omit.table.layout = "n", dep.var.labels.include = FALSE,type="text")
## 
## ==================================================================
##                                                   (1)       (2)   
## ------------------------------------------------------------------
## log(amt100k)                                   -2.086*** -1.633***
##                                                 (0.074)   (0.030) 
## interest_rate                                    0.004    0.0001  
##                                                 (0.003)  (0.0001) 
## log(applicantincome)                           0.562***  0.150*** 
##                                                 (0.058)   (0.022) 
## log(applicant_age)                              -0.007   0.055*** 
##                                                 (0.007)   (0.009) 
## factor(raceethnicity)Asian                      -0.017   -0.202***
##                                                 (0.043)   (0.007) 
## factor(raceethnicity)Black or African American   0.034   0.059*** 
##                                                 (0.032)   (0.007) 
## factor(raceethnicity)Hispanic                    0.045   -0.062***
##                                                 (0.033)   (0.005) 
## factor(raceethnicity)White                     -0.074**  -0.169***
##                                                 (0.035)   (0.003) 
## loan_term                                                         
##                                                 (0.000)   (0.000) 
## combined_loan_to_value_ratio                   -0.000**  -0.00000*
##                                                 (0.000)  (0.00000)
## debt_to_income_ratio                           1.487***  1.119*** 
##                                                 (0.078)   (0.063) 
## conforming                                     -0.631*** -0.702***
##                                                 (0.012)   (0.010) 
## cashoutrefinance                                         0.215*** 
##                                                           (0.003) 
## log(collegefraction_adults)                    -0.060*** -0.046***
##                                                 (0.007)   (0.008) 
## N                                              4,125,808 2,265,385
## Adjusted R2                                      0.005     0.110  
## ==================================================================