rm(list=ls())
library(RPostgres)
library(data.table)
library(getPass)
library(lfe)
library(stargazer)
library(ggplot2)
library(fst)
library(stringr)
library(stringi)
library(lubridate)
library(FinCal)
library(knitr)

source("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/functions.R")
files = paste0("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/Ultimate Panel Data/",c("2018.fst","2019.fst"))

panel = lapply(files, read_fst, as.data.table = TRUE) 
panel <- do.call(rbind , panel)
setorder(panel,-asofdate)

panel <- panel[!duplicated(panel[,c("respondentid")])]
panel[,totalassets:=as.numeric(totalassets)]
panel <- panel[,c("respondentid","rssd","totalassets")]


fintech <- c(3914377,3870099,3881554,4437716,3958157,3870679,5134115) 
bank <- c(3284070,480228,339858,112837,476810,3378018,277053,193247,598534,3202962,255574,723112,3072606,2712969,
          4114567,146672,739560,258771,413208,852218, 1583845,2489805,212465,617677,694904,817824,238791,233031,
          911973,443205,817477,619877,451965)
mtg <- c(3876998,4327938,4320416,4437873,3955754,3876112,3870606,3871751,3844465,4186573,3955923,3871519,3955651,
         3861163,3954355,3247123,3966116,1569885,4325019,4438209,3842407,3955491,4186591,3873456,3897500,3877575,
         2888798,3332935,3875179,5023545,3875692,1072246,583295,3870651,3913099,fintech)

bigbanks <- c(497404,112837,504713,852218,817824,480228,476810,451965)


files <- NULL
files  <- c(files,list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/OO_NP/",full.names = TRUE))
# files  <- c(files,list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/HMDA/OO_RF/",full.names = TRUE))

files <- files[substr(files,100,103) %in% c("2018","2019")] #c("2014","2015","2016","2017","2018")

hmda = lapply(files, read_fst, as.data.table = TRUE,
              columns=c("asofdate","respondentid","agencycode","state","actiontaken","censustract","applicantrace1","applicantincome","purposeofloan","amountofloan","interest_rate","ratespread","loan_term","property_value","debt_to_income_ratio","applicant_age","denialreason1","denialreason2","conforming_loan_limit","combined_loan_to_value_ratio","typeofloan","typeofpurchaser","co_applicant_race_1"))
hmda <- do.call(rbind , hmda)

hmda[,approved:=ifelse(actiontaken %in% c(1,2),1,0)]
hmda[,nonwhite:=ifelse(applicantrace1==5,0,1)]
hmda[,tract_yr:=paste(censustract,asofdate)]

hmda[,interest_rate:=as.numeric(interest_rate)]
hmda[,loan_term:=as.numeric(loan_term)]


hmda[,newpurchase:=ifelse(purposeofloan==1,1,0)]


hmda[,joint:=ifelse(co_applicant_race_1 %in% c(7,8),0,1)]

setkey(hmda,censustract)


hmda[,combined_loan_to_value_ratio:=as.numeric(combined_loan_to_value_ratio)]

hmda[,applicant_age:=ifelse(applicant_age %in% c("<25"),20,
                            ifelse(applicant_age %in% c("25-34"),30,
                                   ifelse(applicant_age %in% c("35-44"),40,
                                          ifelse(applicant_age %in% c("45-54"),50,
                                                 ifelse(applicant_age %in% c("55-64"),60,
                                                        ifelse(applicant_age %in% c("65-74"),70,
                                                               ifelse(applicant_age %in% c(">74"),80,NA)))))))]


hmda[,dti:=ifelse(debt_to_income_ratio %in% c("<20%"),0.15,
                            ifelse(debt_to_income_ratio %in% c("20%-<30%"),0.25,
                                   ifelse(debt_to_income_ratio %in% c("30%-<36%"),0.33,
                                          ifelse(debt_to_income_ratio %in% c("50%-60%"),0.55,
                                                 ifelse(debt_to_income_ratio %in% c(">60%"),0.65,
                                                        ifelse(debt_to_income_ratio %in% c("Exempt"),NA,debt_to_income_ratio))))))]
hmda[,dti:=as.numeric(dti)]
hmda[,dti:=ifelse(dti>1,dti/100,dti)]


hmda[,race:=ifelse(applicantrace1=="5","white",ifelse(applicantrace1=="3","black","0ther"))]

hmda[,dtigt43:=ifelse(debt_to_income_ratio %in% as.character(44:49),1,ifelse(debt_to_income_ratio %in% as.character(37:43),0,NA))]

hmda[,denied:=ifelse(actiontaken %in% c(3,7,5),1,0)]
hmda <- hmda[amountofloan>0 & applicantincome>0]


hmda[,loantoincome:=amountofloan/applicantincome]

hmda <- merge(hmda,panel,by="respondentid")

hmda[,bank:=ifelse(rssd %in% bank,1,ifelse(rssd %in% mtg,0,NA))]

hmda[,countycode:=substr(censustract,1,5)]
hmda[,county_yr:=paste(countycode,asofdate)]
hmda[,sold:=ifelse(typeofpurchaser>0,1,0)]
hmda[,soldgse:=ifelse(typeofpurchaser %in% c(1,3),1,0)]

hmda[,dti:=dti*100]

1 Research Question

How did DTI constraints impact migration? We test wheter households who lived in areas where DTI constraints more likely to bind migrated to areas where DTI constraints are less likely to bind.

2 DTI Caps and Mortgage Lending

The following figures shows that the DTI caps have a significant on mortgage credit. These plots use 2018 and 2019 HMDA data (prior data does not have DTI information). Each figure below plots the \(\beta\) estimates of the following regression for the sample indicated in the title.

\[ denied = \Sigma_{dti} \beta_{dti} * dti + log(loan amount) + log(income)+age+race+CountyFE+LenderFE+YearFE\]


Mean denial rate for conforming mortgages 9.6%
Mean denial rate for non-conforming mortgages 10.7%

Exact values for dti is only available if the dti is between 36 and 49. Other dti values are grouped into categories.

2.1 Conforming Mortgages 1

r <- list()
r[[1]] <- felm(denied~factor(dti)+log(amountofloan)+log(applicantincome)+applicant_age+factor(race)|countycode+asofdate+respondentid|0|countycode,hmda[actiontaken<=3 & conforming_loan_limit=="C"])
r[[2]] <- felm(denied~factor(dti)+log(amountofloan)+log(applicantincome)+applicant_age+factor(race)|countycode+asofdate+respondentid|0|countycode,hmda[actiontaken<=3 & conforming_loan_limit!="C"])
r[[3]] <- felm(denied~factor(dti)+log(amountofloan)+log(applicantincome)+applicant_age+factor(race)|countycode+asofdate+respondentid|0|countycode,hmda[actiontaken<=3 & conforming_loan_limit=="C" & dti %in% 36:49])
r[[4]] <- felm(denied~factor(dti)+log(amountofloan)+log(applicantincome)+applicant_age+factor(race)|countycode+asofdate+respondentid|0|countycode,hmda[actiontaken<=3 & conforming_loan_limit!="C" & dti %in% 36:49])

.coef_plot_1reg_line(r[[1]],"factor(dti)",15)

2.2 Conforming Mortgages 2

This plot zooms in on dti 36 to 49 range

.coef_plot_1reg_line(r[[3]],"factor(dti)",36)

2.3 Non-conforming Mortgages 1

.coef_plot_1reg_line(r[[2]],"factor(dti)",15)

2.4 Non-conforming Mortgages 2

This plot zooms in on dti 36 to 49 range

.coef_plot_1reg_line(r[[4]],"factor(dti)",36)

3 DTI Caps became a thing after the financial crisis

The figure below plots the distribution of dti for mortgages sold to freddie mac. It shows that maximum dti was capped at 50% after the financial crisis.

Several regulations.
  • QRM
  • QM
  • GSE Eligibility criteria
freddie <- list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Freddie/Acq",full.names = TRUE)
freddie = lapply(freddie, read_fst,as.data.table=T, columns = c("fico","dt_first_pi","dti","prop_type","zipcode","orig_loan_term","loan_purpose"))
freddie <- do.call(rbind , freddie)

freddie <- freddie[orig_loan_term==360 & prop_type=="SF" & loan_purpose=="P"]
freddie[,loanyr:=year(dt_first_pi)]

ggplot()+geom_density(data=freddie[loanyr>=2000 & dti<=65],aes(x=dti),fill="royalblue",alpha=0.4)+
  geom_vline(xintercept=50,color="darkred")+theme_minimal()+
  facet_wrap(~loanyr)+labs(x="Debt-to-income",y="Density")

zhvi <- readRDS("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Zillow Research Data/County_Zhvi_SingleFamilyResidence.rds")
zhvi <- as.data.table(zhvi)
zhvi[,year:=year(month)]
zhvi <- zhvi[,.(zhvi=mean(zhvi,na.rm=T)),by=.(fips,year)]
names(zhvi) <- c("fips","year","zhvi")
zhvi[,fips:=as.numeric(fips)]
zhvi <- zhvi[year %in% 2004:2017]

agi <- list.files(pattern="*.fst",path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/IRS SOI",full.names = TRUE)
agi = lapply(agi, read_fst, as.data.table = TRUE)
agi <- do.call(rbind , agi)
temp <- agi[year==2016]
temp[,year:=2017]
agi <- rbind(agi,temp)
temp <- agi[year==2005]
temp[,year:=2004]
agi <- rbind(agi,temp)
agi[,zipcode:=as.numeric(zipcode)]

ziptofips <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/county_zip.csv")
ziptofips <- ziptofips[,c("COUNTY","ZIP","RES_RATIO")]
agi <- merge(agi,ziptofips,by.x=c("zipcode"),by.y="ZIP",allow.cartesian=TRUE)
agi[,agi:=agi*RES_RATIO]
agi <- agi[,.(income=sum(agi)),by=.(COUNTY,year)]

zhvi <- merge(zhvi,agi,by.x=c("fips","year"),by.y=c("COUNTY","year"))

zhvi[,pricetoincome:=zhvi/income]
zhvi <- zhvi[income>10000 & income<1000000]

unemp <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/publicdata/LaborForceData/blsunempdata.csv",select = c("state","county","year","unemprate"))
unemp[,state:=str_pad(state,2,side="left",pad="0")]
unemp[,county:=str_pad(county,3,side="left",pad="0")]
unemp[,fips:=paste0(state,county)]
unemp[,c("state","county"):=list(NULL)]
unemp[,fips:=as.numeric(fips)]

4 House price-to-income ratio

We use county-level house price-to-income ratio as a measure of how binding the dti constraints would be for the borrowers. The borrowers who live in high house price-to-income ratio are more likely to be credit constrained.

The figure below shows the geographic distribution of the price-to-income ratio in 2010. House price data is from Zillow (ZHVI) and income data is from Individual Income Tax Statistics from IRS.

4.1 Map

## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US States", layer: "cb_2014_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US Counties\cb_2013_us_county_20m", layer: "cb_2013_us_county_20m"
## with 3221 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER

4.2 Time series

ptoi <- zhvi[,.(pricetoincome=mean(pricetoincome,na.rm=T)),by=year]
ggplot(ptoi)+geom_line(aes(x=year,y=pricetoincome))+theme_minimal()+labs(x="",y="House Price/Income")

5 Migration Data

We obtained county-level U.S. Population migration data from IRS (https://www.irs.gov/statistics/soi-tax-stats-migration-data). The figure below shows the time series of total outmigration.

outmigration <- read_fst("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/IRS Migration Data/out_migration_1997_2017.fst",as.data.table = T)
outmigration <- outmigration[fromcounty != "000"]
outmigration[,fromcounty:=paste0(fromstate,fromcounty)]
outmigration[,tocounty:=paste0(tostate,tocounty)]
outmigration <- outmigration[fromcounty==tocounty | tocounty=="96000"]
outmigration[,samecounty:=ifelse(fromcounty==tocounty,1,0)]
outmigration[,c("fromstate","tostate","file","statecode","desc","tocounty","noexceptions","agi"):=list(NULL)]
outmigration <- outmigration[fromyear>=2003]
outmigration[,fromcounty:=as.numeric(fromcounty)]

outmigration <- dcast(outmigration,fromcounty+fromyear~samecounty,fun.aggregate = sum,value.var = "noreturns")
outmigration <- outmigration[!is.na(fromcounty) & fromcounty!= 1000]
names(outmigration) <- c("fromcounty","fromyear","nomigrate","nostay")
fracmigrate <- outmigration[,.(nomigrate=sum(nomigrate,na.rm=T)/1e6),by=fromyear]
ggplot(fracmigrate)+geom_line(aes(x=fromyear,y=nomigrate))+theme_minimal()+labs(x="",y="Millions of households")+ylim(c(0,11))

outmigration <- merge(outmigration,zhvi,by.x=c("fromcounty","fromyear"),by.y=c("fips","year"))
outmigration <- outmigration[!duplicated(outmigration)]

outmigration <- merge(outmigration,unemp,by.x=c("fromcounty","fromyear"),by.y=c("fips","year"),all.x=T)
outmigration[,fracmigrated:=nomigrate/(nomigrate+nostay)]

6 Out-migration county-year panel

First, we constructed a county-year panel using the migration data. Each row in this panel indicate total out-migration from the county in the given year.

We run the following regression using this panel.
\[log(No\text{ }migrated) = \Sigma_t\beta_t log(Price\text{ }to\text{ }income) \times t + UnempRate+log(income)+log(houseprice)+CountyFE+YearFE\] The figure below plots the \(\beta_t\) estimates. The results suggest that outmigration from high price-to-income counties increased post DTI constraints.

r <- felm(log(nomigrate)~log(pricetoincome)*factor(fromyear)+unemprate+log(income)+log(zhvi)|fromcounty+fromyear|0|fromcounty,data=outmigration[fromyear>=2005])
.coef_plot_1reg_line(r,"log(pricetoincome):factor(fromyear)",2005)

outmigration <- read_fst("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/IRS Migration Data/out_migration_1997_2017.fst",as.data.table = T)
outmigration[,fromcounty:=paste0(fromstate,fromcounty)]
outmigration[,tocounty:=paste0(tostate,tocounty)]
outmigration <- outmigration[fromcounty!="01000" & tocounty != "000" & !tocounty %in% c("96000","01000","97000","98000","97001","97003","59001","59003","59004","59005","59002","59006","59007","59008","59009")]
outmigration[,c("fromstate","tostate","file","statecode","desc"):=list(NULL)]
outmigration <- outmigration[fromyear>=2003]
outmigration[,samecounty:=ifelse(fromcounty==tocounty,1,0)]
outmigration[,fromcounty:=as.numeric(fromcounty)]
outmigration[,tocounty:=as.numeric(tocounty)]

outmigration <- outmigration[fromcounty != tocounty]
outmigration[,fromto:=paste(fromcounty,tocounty)]

outmigration <- merge(outmigration,zhvi,by.x=c("fromcounty","fromyear"),by.y=c("fips","year"))
outmigration <- outmigration[!duplicated(outmigration)]

fromtocount <- outmigration[,.N,by=fromto]
outmigration <- outmigration[fromto %in% fromtocount[N>=3]$fromto]

outmigration <- merge(outmigration,unemp,by.x=c("fromcounty","fromyear"),by.y=c("fips","year"),all.x=T)

names(zhvi) <- c("fips","year","tozhvi","toagi","topricetoincome")
outmigration <- merge(outmigration,zhvi,by.x=c("tocounty","fromyear"),by.y=c("fips","year"),all.x=T)
names(zhvi) <- c("fips","year","zhvi","income","pricetoincome")
outmigration[,lowhouseprice:=ifelse(tozhvi<zhvi*1.1,1,0)]
outmigration[,lowpricetoincome:=ifelse(topricetoincome<pricetoincome,1,0)]

7 From County - To County - Year Panel

Next, we test whether households from high price-to-income counties are more likely to migrate to lower house price counties after the DTI constrations. To this end, we constructed a from county-to county-year panel. Each row indcates the number of households migrated from county a to county b in year t. Using this panel we run the following regression.


\[log(No\text{ }migrated) = \Sigma_t\beta_t \times \color{red}{low\text{ }price\text{ }county} \times log(Price\text{ }to\text{ }income) \times t + UnempRate+log(income)+log(houseprice)+FromCountyFE+ToCountyFE+YearFE\]
low price county takes the value 1 if the house price in ‘to county’ is less than that of the ‘from county’.

The results suggest that outmigration from high price-to-income counties is higher if the house price in the ‘to county’ is lower.

r <- felm(log(noreturns)~log(pricetoincome)*lowhouseprice*factor(fromyear)+unemprate+log(income)+log(zhvi)|fromcounty+tocounty+fromyear|0|fromcounty+fromyear,data=outmigration[fromyear>=2007])
.coef_plot_1reg_line(r,"log(pricetoincome):lowhouseprice:factor(fromyear)",2007)