Documentation
dbdir = "/data/hmda.db"
con <- dbConnect(RSQLite::SQLite(),dbdir)
lar <- data.table(dbGetQuery(con,"
SELECT asofdate,respondentid,censustract,conforming_loan_limit,derived_race,derived_ethnicity,typeofpurchaser,typeofloan,purposeofloan,amountofloan,combined_loan_to_value_ratio,interest_rate,total_loan_costs,total_points_and_fees,origination_charges,discount_points,lender_credits,loan_term,property_value,occupancy,applicantincome,debt_to_income_ratio,applicant_age
FROM lar
where actiontaken=1
and propertytype=1
and lienstatus=1
and business_or_commercial_purpose=2
and hoepastatus in (2,3)
and occupancy=1"))
lar <- lar %>% mutate_at(.vars =vars(interest_rate,combined_loan_to_value_ratio,property_value,total_loan_costs,total_points_and_fees,origination_charges,discount_points,loan_term,lender_credits),.funs= as.numeric)
lar[,applicant_age:=ifelse(applicant_age %in% c("<25"),20,
ifelse(applicant_age %in% c("25-34"),30,
ifelse(applicant_age %in% c("35-44"),40,
ifelse(applicant_age %in% c("45-54"),50,
ifelse(applicant_age %in% c("55-64"),60,
ifelse(applicant_age %in% c("65-74"),70,
ifelse(applicant_age %in% c(">74"),80,NA)))))))]
lar[,debt_to_income_ratio:=ifelse(debt_to_income_ratio %in% c("<20%"),0.15,
ifelse(debt_to_income_ratio %in% c("20%-<30%"),0.25,
ifelse(debt_to_income_ratio %in% c("30%-<36%"),0.33,
ifelse(debt_to_income_ratio %in% c("50%-60%"),0.55,
ifelse(debt_to_income_ratio %in% c(">60%"),0.65,
ifelse(debt_to_income_ratio %in% c("Exempt"),NA,debt_to_income_ratio))))))]
lar[,debt_to_income_ratio:=as.numeric(debt_to_income_ratio)]
lar[,debt_to_income_ratio:=ifelse(debt_to_income_ratio>1,debt_to_income_ratio/100,debt_to_income_ratio)]
lar[,loan_term:=ifelse(loan_term>=100 & loan_term<=120,120,
ifelse(loan_term>120 & loan_term<= 180,180,
ifelse(loan_term>180 & loan_term <= 240,240,
ifelse(loan_term>240 & loan_term<=300,300,
ifelse(loan_term>300 & loan_term<=400,360,loan_term)))))]
lar <- lar[loan_term>=120 & loan_term<=360]
lar[,conforming:=ifelse(conforming_loan_limit %in% c("C","U"),1,0)]
lar[,newpurchase:=ifelse(purposeofloan== 1,1,0)]
lar[,refinance:=ifelse(purposeofloan== 31,1,0)]
lar[,cashoutrefinance:=ifelse(purposeofloan== 32,1,0)]
lar[,white:=ifelse(derived_race=="White",1,0)]
lar[,costfrac:=total_loan_costs/(amountofloan*1000)]
lar[,origfeefrac:=origination_charges/(amountofloan*1000)]
lar[,amt100k:=amountofloan/100]
lar[,county_year:=paste(substr(censustract,1,5),asofdate)]
lar[,tract_year:=paste(censustract,asofdate)]
lar[,tract_bank:=paste(censustract,respondentid)]
lar[,raceethnicity:=ifelse(derived_race %in% c("White") & derived_ethnicity %in% c("Hispanic or Latino"),"Hispanic",
ifelse(derived_race %in% c("Race Not Available","Native Hawaiian or Other Pacific Islander","Joint","Free Form Text Only","American Indian or Alaska Native","2 or more minority races"),"0ther",derived_race))]
acs <- fread("/data/acs_2018_educ.csv")
acs[,GEOID:=as.character(GEOID)]
acs[,GEOID:=str_pad(GEOID,11,pad="0")]
lar <- merge(lar,acs,by.x="censustract",by.y="GEOID")
stargazer(lar[, c("asofdate","conforming","newpurchase","refinance","cashoutrefinance","white","amountofloan","combined_loan_to_value_ratio","interest_rate","total_loan_costs","total_points_and_fees","origination_charges","discount_points","lender_credits","loan_term","property_value","applicantincome","debt_to_income_ratio","applicant_age","costfrac","collegefraction_adults")], type = "text", summary.stat = c("mean", "sd","p25","median","p75","n"),notes = "",digits = 4)
##
## ==========================================================================================================
## Statistic Mean St. Dev. Pctl(25) Median Pctl(75) N
## ----------------------------------------------------------------------------------------------------------
## asofdate 2,018.5630 0.4960 2,018 2,019 2,019 12,302,554
## conforming 0.9411 0.2354 1 1 1 12,302,554
## newpurchase 0.5788 0.4938 0 1 1 12,302,554
## refinance 0.2022 0.4016 0 0 0 12,302,554
## cashoutrefinance 0.1787 0.3831 0 0 0 12,302,554
## white 0.7258 0.4461 0 1 1 12,302,554
## amountofloan 277.5110 243.3015 155 225 335 12,302,554
## combined_loan_to_value_ratio 95.9614 18,854.7300 70.0000 80.0000 95.0000 11,775,278
## interest_rate 4.4918 108.4403 3.8750 4.3750 4.7500 12,262,047
## total_loan_costs 4,990.0350 15,781.3500 2,751.7500 4,082.5500 6,294.1000 11,533,731
## total_points_and_fees 3,302.3710 4,304.1550 1,195.0000 2,196.5500 4,237.5000 57,901
## origination_charges 1,861.5570 2,678.3050 700.0000 1,245.0000 2,311.7000 11,523,176
## discount_points 2,064.5890 2,951.9840 562.5000 1,344.8000 2,764.8700 4,220,640
## lender_credits 2,097.7970 1,918,876.0000 92.5000 455.5000 1,336.5720 4,653,900
## loan_term 336.5434 59.3454 360 360 360 12,302,554
## property_value 389,693.0000 2,175,562.0000 195,000.0000 285,000.0000 435,000.0000 12,101,451
## applicantincome 121.8229 1,457.0100 56.0000 84.0000 128.0000 11,835,142
## debt_to_income_ratio 0.3660 0.1051 0.2500 0.3800 0.4400 11,785,698
## applicant_age 45.9469 14.6155 30.0000 40.0000 60.0000 12,296,493
## costfrac 0.0224 0.0769 0.0105 0.0189 0.0310 11,533,731
## collegefraction_adults 0.2758 0.0717 0.2310 0.2803 0.3252 12,279,537
## ----------------------------------------------------------------------------------------------------------
##
templar <- lar[applicantincome>0 & newpurchase==1 & typeofloan==1 & loan_term==360 & combined_loan_to_value_ratio==80 & amountofloan>=100 & amountofloan<=400 & conforming==1]
templar[,amountbin:=floor(amountofloan/25)*25]
templar <- templar[raceethnicity %in% c("Hispanic","White","Black or African American"),.(cost=mean(costfrac,na.rm=T)),by=.(amountbin,raceethnicity)]
ggplot(data=templar,aes(x=amountbin,y=cost,color=raceethnicity))+geom_line()+theme_minimal()+theme(legend.position = "bottom",legend.title = element_blank())+labs(x="Loan amount $'000",y="Closing costs as a % of loan amount")

r <- list()
r[[1]] <-felm(I(costfrac*100)~log(amt100k)+interest_rate+log(applicantincome)+log(applicant_age)+factor(raceethnicity)+loan_term+combined_loan_to_value_ratio+debt_to_income_ratio+conforming|censustract+asofdate|0|censustract,data=lar[applicantincome>0 & newpurchase==1 & typeofloan==1 & loan_term==360])
r[[2]] <-felm(I(costfrac*100)~log(amt100k)+interest_rate+log(applicantincome)+log(applicant_age)+factor(raceethnicity)+loan_term+combined_loan_to_value_ratio+debt_to_income_ratio+conforming+cashoutrefinance|censustract+asofdate|0|censustract,data=lar[applicantincome>0 & newpurchase==0 & typeofloan==1 & loan_term==360])
stargazer(r,no.space = T,align = T,omit.stat=c("ser","f", "rsq"),style = "qje",omit.table.layout = "n", dep.var.labels.include = FALSE,type="text")
##
## ===================================================================
## (1) (2)
## -------------------------------------------------------------------
## log(amt100k) -2.118*** -1.604***
## (0.054) (0.034)
## interest_rate 0.004 0.0001
## (0.003) (0.0001)
## log(applicantincome) 0.551*** 0.157***
## (0.047) (0.022)
## log(applicant_age) -0.041** 0.054***
## (0.020) (0.010)
## factor(raceethnicity)Asian -0.002 -0.173***
## (0.034) (0.007)
## factor(raceethnicity)Black or African American 0.043* 0.019**
## (0.023) (0.008)
## factor(raceethnicity)Hispanic 0.051** -0.075***
## (0.021) (0.006)
## factor(raceethnicity)White -0.071** -0.161***
## (0.031) (0.003)
## loan_term
## (0.000) (0.000)
## combined_loan_to_value_ratio -0.000** -0.00000**
## (0.000) (0.00000)
## debt_to_income_ratio 1.469*** 1.117***
## (0.076) (0.066)
## conforming -0.607*** -0.628***
## (0.011) (0.011)
## cashoutrefinance 0.212***
## (0.003)
## N 4,129,272 2,266,431
## Adjusted R2 0.007 0.117
## ===================================================================
r <- list()
r[[1]] <-felm(I(costfrac*100)~log(amt100k)+interest_rate+log(applicantincome)+log(applicant_age)+factor(raceethnicity)+loan_term+combined_loan_to_value_ratio+debt_to_income_ratio+conforming+log(collegefraction_adults)|county_year|0|censustract,data=lar[applicantincome>0 & newpurchase==1 & typeofloan==1 & loan_term==360 & collegefraction_adults>0])
r[[2]] <-felm(I(costfrac*100)~log(amt100k)+interest_rate+log(applicantincome)+log(applicant_age)+factor(raceethnicity)+loan_term+combined_loan_to_value_ratio+debt_to_income_ratio+conforming+cashoutrefinance+log(collegefraction_adults)|county_year|0|censustract,data=lar[applicantincome>0 & newpurchase==0 & typeofloan==1 & loan_term==360 & collegefraction_adults>0])
stargazer(r,no.space = T,align = T,omit.stat=c("ser","f", "rsq"),style = "qje",omit.table.layout = "n", dep.var.labels.include = FALSE,type="text")
##
## ==================================================================
## (1) (2)
## ------------------------------------------------------------------
## log(amt100k) -2.086*** -1.633***
## (0.074) (0.030)
## interest_rate 0.004 0.0001
## (0.003) (0.0001)
## log(applicantincome) 0.562*** 0.150***
## (0.058) (0.022)
## log(applicant_age) -0.007 0.055***
## (0.007) (0.009)
## factor(raceethnicity)Asian -0.017 -0.202***
## (0.043) (0.007)
## factor(raceethnicity)Black or African American 0.034 0.059***
## (0.032) (0.007)
## factor(raceethnicity)Hispanic 0.045 -0.062***
## (0.033) (0.005)
## factor(raceethnicity)White -0.074** -0.169***
## (0.035) (0.003)
## loan_term
## (0.000) (0.000)
## combined_loan_to_value_ratio -0.000** -0.00000*
## (0.000) (0.00000)
## debt_to_income_ratio 1.487*** 1.119***
## (0.078) (0.063)
## conforming -0.631*** -0.702***
## (0.012) (0.010)
## cashoutrefinance 0.215***
## (0.003)
## log(collegefraction_adults) -0.060*** -0.046***
## (0.007) (0.008)
## N 4,125,808 2,265,385
## Adjusted R2 0.005 0.110
## ==================================================================