rm(list=ls())
library(data.table)
## Warning: package 'data.table' was built under R version 3.6.3
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 3.6.3
library(DBI)
## Warning: package 'DBI' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(lfe)
## Warning: package 'lfe' was built under R version 3.6.3
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.6.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages ------ tidyverse 1.3.0 --
## v tibble  3.0.0     v purrr   0.3.3
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts --------- tidyverse_conflicts() --
## x dplyr::between()   masks data.table::between()
## x tidyr::expand()    masks Matrix::expand()
## x dplyr::filter()    masks stats::filter()
## x dplyr::first()     masks data.table::first()
## x dplyr::lag()       masks stats::lag()
## x dplyr::last()      masks data.table::last()
## x tidyr::pack()      masks Matrix::pack()
## x purrr::transpose() masks data.table::transpose()
## x tidyr::unpack()    masks Matrix::unpack()
library(stringr)
library(fst)
## Warning: package 'fst' was built under R version 3.6.3
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.6.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.6.3
## rgdal: version: 1.4-8, (SVN revision 845)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/dratnadiwakara2/Documents/R/R-3.6.1/library/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/dratnadiwakara2/Documents/R/R-3.6.1/library/rgdal/proj
##  Linking to sp version: 1.4-1
library(rgeos)
## Warning: package 'rgeos' was built under R version 3.6.3
## rgeos version: 0.5-2, (SVN revision 621)
##  GEOS runtime version: 3.6.1-CAPI-1.10.1 
##  Linking to sp version: 1.4-1 
##  Polygon checking: TRUE

Number of Institutional SFR Purchases

sfr_homes <- data.table(readRDS("Cloud Lab/sfr_homes.rds"))
ggplot(data=sfr_homes[,.N,by=year])+geom_col(aes(x=year,y=N))

Number of Single Family Homes owned by institutions

sfr_count <- readRDS("Cloud Lab/sfr_count.rds")
sfr_count_summary <- sfr_count[,.(no_sfr_homes=sum(no_sfr_homes,na.rm=T)),by=yr]
ggplot(sfr_count_summary,aes(x=yr,y=no_sfr_homes))+geom_line()

acs_data <- fread("Cloud Lab/acs_tract_data.csv")

tract_zip <- fread("Cloud Lab/TRACT_ZIP_032016.csv")

acs_data <- merge(acs_data,tract_zip,by.x="censustract",by.y="TRACT",allow.cartesian = T)

acs_data <- acs_data[,.(median_age=mean(median_age,na.rm=T),
                        median_income=mean(median_income,na.rm=T),
                        house_value=mean(house_value,na.rm=T),
                        median_year_structure_built=mean(median_year_structure_built,na.rm=T),
                        owner_occupied=sum(RES_RATIO*owner_occupied,na.rm=T),
                        renter_occupied=sum(RES_RATIO*renter_occupied,na.rm=T),
                        no_of_homes=sum(RES_RATIO*no_of_homes,na.rm=T),
                        population=sum(RES_RATIO*population,na.rm=T),
                        white_population=sum(RES_RATIO*white_population,na.rm=T),
                        college_educated_no=sum(RES_RATIO*college_educated_no,na.rm=T)
                        ),
              by=.(ZIP,year)]

acs_data[,population_lag5:=lag(population,5),by=ZIP]
acs_data[,college_educated_no_lag5:=lag(college_educated_no,5),by=ZIP]
acs_data[,median_income_lag:=lag(median_income,5),by=ZIP]
acs_data[,population_gr:=(population/population_lag5) - 1]
acs_data[,college_educated_gr:=(college_educated_no/college_educated_no_lag5) - 1]
acs_data[,income_gr:=(median_income/median_income_lag) - 1]
sfr_count[,zipcode:=as.integer(zipcode)]
## Warning in eval(jsub, SDenv, parent.frame()): NAs introduced by coercion
sfr_count_merged <- merge(sfr_count,acs_data,by.x=c("zipcode","yr"),by.y=c("ZIP","year"),all.y=T)
sfr_count_merged[,no_sfr_homes:=ifelse(is.na(no_sfr_homes),0,no_sfr_homes)]
sfr_count_merged[,frac_sfr:=no_sfr_homes/no_of_homes]
sfr_count_merged <- sfr_count_merged[is.finite(frac_sfr)]
sfr_count_merged[,id:=str_pad(as.character(zipcode),pad="0",width = 5,side = "left")]
sfr_count_merged[,sfr_frac_cat:=ifelse(frac_sfr>0.03,"3% or more",
                                       ifelse(frac_sfr>0.01,"1% to 3%",
                                              ifelse(frac_sfr>0.0,"0% to 1%","No SFR")))]
counties <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_COUNTY_092016.csv")
setorder(counties,ZIP,-RES_RATIO)
counties <- counties[!duplicated(ZIP)]
counties <- counties[ZIP %in% sfr_count_merged[yr==2019 & no_sfr_homes>10]$zipcode]

zipcode_subset <- str_pad(as.character(sfr_count_merged[yr==2019 & no_sfr_homes>10]$zipcode),width = 5,side = "left",pad = "0")
us_zip <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Shapefiles/US Zip","tl_2019_us_zcta510")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US Zip", layer: "tl_2019_us_zcta510"
## with 33144 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND10 AWATER10
us_zip <- fortify(us_zip,region="GEOID10")

us_zip <- data.table(us_zip)



us_states <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Shapefiles/US States","cb_2014_us_state_20m")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US States", layer: "cb_2014_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
## Warning in readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana
## State University/Raw Data/Shapefiles/US States", : Z-dimension discarded
us_states <- data.table(fortify(us_states,region="GEOID"))
us_states[,id:=as.numeric(id)]


us_counties <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Shapefiles/US Counties/cb_2013_us_county_20m","cb_2013_us_county_20m")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US Counties\cb_2013_us_county_20m", layer: "cb_2013_us_county_20m"
## with 3221 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND AWATER
us_counties <- fortify(us_counties,region="GEOID")
us_counties <- data.table(us_counties)

us_counties <- us_counties[as.integer(id) %in% counties$COUNTY]
us_zip_map <- merge(us_zip,sfr_count_merged[yr==2019 & no_sfr_homes>10,c("id","frac_sfr","sfr_frac_cat","no_sfr_homes")],by="id")

us_zip_map <- us_zip_map[substr(id,1,2)!= "99" & long< (-50) & lat>25 & !is.na(sfr_frac_cat)]
map1 <- ggplot()+
    geom_polygon(data=us_counties[long > (-130)], aes(x=long,y=lat,group=group),fill="goldenrod1",color=NA)+
  geom_polygon(data=us_zip_map, aes(x=long,y=lat,group=group),color=NA,fill="dodgerblue")+
  # scale_fill_manual(values = c("darkturquoise", "dodgerblue", "dodgerblue4","white"))+
 scale_fill_gradientn(colors=c("ivory1","honeydew1","darkseagreen3","darkturquoise","dodgerblue","dodgerblue4"))+
  geom_polygon(data=us_states[! us_states$id %in% c(2,15,72)], aes(x=long,y=lat,group=group),fill=NA,color="gray50")+
  theme_minimal()+
  theme(axis.title=element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank(),
        legend.position = "bottom",panel.grid = element_blank())+ guides(fill=guide_legend(title="Fraction of SFR homes"))

# ggsave("map2.png",map1,width = 11, height = 8)
alt text here

alt text here

SFR Homes vs. Other Homes

Was able to match 165294 SFR homes with ZTRAX data and there are 12 million other homes.

home_chars <- readRDS("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Projects/ZTRAX AVM/home_char.rds")

home_chars_zip <- home_chars[propertyzip %in% zipcode_subset]
home_chars_zip[,sfr:=ifelse(importparcelid %in% sfr_homes$importparcelid,1,0)]


areas <- readRDS("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Projects/ZTRAX AVM/areas.rds")

home_chars_zip <- merge(home_chars_zip,areas,by="rowid")

home_chars_zip <- home_chars_zip[lotsizeacres>0 & buildingareasqft > 500 & buildingareasqft<10000 & yearbuilt> 1920 & totalbedrooms %in% 1:10 ]

This table shows the key characteristics of the two samples. Column 1: non-SFR homes, column 2: SFR homes

summary_stats <- 
  home_chars_zip[,c("lotsizeacres","yearbuilt","noofstories","totalbedrooms","buildingareasqft","sfr")] %>%
  group_by(sfr) %>%
  summarise_all(funs(mean,median,sd),na.rm=T)
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
print(t(summary_stats))
##                                 [,1]         [,2]
## sfr                        0.0000000    1.0000000
## lotsizeacres_mean        110.6067205   32.8598838
## yearbuilt_mean          1983.3307768 1993.1486745
## noofstories_mean           1.3191327    1.4174492
## totalbedrooms_mean         3.2736025    3.3541871
## buildingareasqft_mean   1981.8753083 1921.6952581
## lotsizeacres_median        0.2090000    0.1840000
## yearbuilt_median        1987.0000000 2000.0000000
## noofstories_median         1.0000000    1.0000000
## totalbedrooms_median       3.0000000    3.0000000
## buildingareasqft_median 1760.0000000 1792.0000000
## lotsizeacres_sd         4950.5187483  815.9185597
## yearbuilt_sd              22.6218059   19.5057974
## noofstories_sd             0.5200679    0.5927055
## totalbedrooms_sd           0.8092896    0.6696256
## buildingareasqft_sd      944.4253040  706.8738659

This regression is aimed at understanding differences in home characteristics based on whether the home is SFR or not.

r <- list()
r[[1]] <- felm(log(lotsizeacres)~sfr+log(buildingareasqft)+totalbedrooms|propertyzip+yearbuilt,data=home_chars_zip)
r[[2]] <- felm(log(yearbuilt)~sfr+log(buildingareasqft)+totalbedrooms+log(lotsizeacres)|propertyzip,data=home_chars_zip)
r[[3]] <- felm(totalbedrooms~sfr+log(buildingareasqft)+log(lotsizeacres)|propertyzip+yearbuilt,data=home_chars_zip)
r[[4]] <- felm(log(buildingareasqft)~sfr+totalbedrooms+log(lotsizeacres)|propertyzip+yearbuilt,data=home_chars_zip)

stargazer(r,type="text",no.space=T,omit.stat = "ser")
## 
## ==========================================================================================
##                                               Dependent variable:                         
##                       --------------------------------------------------------------------
##                       log(lotsizeacres) log(yearbuilt) totalbedrooms log(buildingareasqft)
##                              (1)             (2)            (3)               (4)         
## ------------------------------------------------------------------------------------------
## sfr                       -0.162***        0.003***      0.088***          -0.069***      
##                            (0.002)        (0.00002)       (0.001)           (0.001)       
## log(buildingareasqft)     0.865***         0.010***      1.224***                         
##                            (0.001)        (0.00001)       (0.001)                         
## totalbedrooms             -0.060***        0.001***                        0.217***       
##                           (0.0003)        (0.00000)                        (0.0001)       
## log(lotsizeacres)                         -0.002***      -0.042***         0.109***       
##                                           (0.00000)      (0.0002)          (0.0001)       
## ------------------------------------------------------------------------------------------
## Observations             12,258,469       12,258,469    12,258,469        12,258,469      
## R2                          0.702           0.608          0.467             0.664        
## Adjusted R2                 0.702           0.607          0.466             0.664        
## ==========================================================================================
## Note:                                                          *p<0.1; **p<0.05; ***p<0.01
counties2 <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_COUNTY_092016.csv")

counties2 <- counties2[COUNTY %in% counties$COUNTY]
acs_data[,sfr_zip:=ifelse(ZIP %in% as.numeric(zipcode_subset),1,0)]
acs_data[,sfr_county:=ifelse(ZIP %in% as.numeric(counties2$ZIP),1,0)]
summary_stats_acs <- 
  acs_data[year==2019 & sfr_county==1,c("median_age","median_income","house_value","median_year_structure_built","no_of_homes","population","population_gr","income_gr","sfr_zip")] %>%
  group_by(sfr_zip) %>%
  summarise_all(funs(mean,median,sd),na.rm=T)

# print(t(summary_stats_acs))
summary_stats_acs <- 
  acs_data[year==2019 ,c("median_age","median_income","house_value","median_year_structure_built","no_of_homes","population","population_gr","income_gr","sfr_county")] %>%
  group_by(sfr_county) %>%
  summarise_all(funs(mean,median,sd),na.rm=T)

# print(t(summary_stats_acs))
counties3 <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_COUNTY_092016.csv")
setorder(counties3,ZIP,-RES_RATIO)
counties3 <- counties2[!duplicated(ZIP)]
zhvi <- readRDS("Cloud Lab/zip_zhvi.rds")
zhvi[,year:=year(month)]
zhvi <- zhvi[,.(zhvi_uc_sfr_tier_33_67=mean(zhvi_uc_sfr_tier_33_67,na.rm=T),
                zhvi_bdrmcnt_3 = mean(zhvi_bdrmcnt_3,na.rm=T)),
             by=.(zipcode,year)]

zhvi[,zhvi_uc_sfr_tier_33_67_lag3:=lag(zhvi_uc_sfr_tier_33_67,3),by=zipcode]
zhvi[,zhvi_uc_sfr_tier_33_67_lag10:=lag(zhvi_uc_sfr_tier_33_67,10),by=zipcode]

zhvi[,zhvi_bdrmcnt_3_lag3:=lag(zhvi_bdrmcnt_3,3),by=zipcode]
zhvi[,zhvi_bdrmcnt_3_lag10:=lag(zhvi_bdrmcnt_3,10),by=zipcode]

zhvi_2022 <- zhvi[year==2022]

zhvi_2022 <- merge(zhvi_2022,counties3,by.x="zipcode",by.y="ZIP")
geodata <- fread("https://raw.githubusercontent.com/scpike/us-state-county-zip/master/geo-data.csv")
geodata <- geodata[,c("zipcode","state_abbr")]
geodata[,zipcode:=as.integer(zipcode)]
## Warning in eval(jsub, SDenv, parent.frame()): NAs introduced by coercion
zhvi_2022 <- merge(zhvi_2022,sfr_count_merged[yr==2019 ],by="zipcode")

zhvi_2022[,zhvi_bdrmcnt_3_gr3 := zhvi_bdrmcnt_3/zhvi_bdrmcnt_3_lag3]
zhvi_2022[,zhvi_bdrmcnt_3_gr10 := zhvi_bdrmcnt_3/zhvi_bdrmcnt_3_lag10]
zhvi_2022[,zhvi_uc_sfr_tier_33_67_gr3 := zhvi_uc_sfr_tier_33_67/zhvi_uc_sfr_tier_33_67_lag3]
zhvi_2022[,zhvi_uc_sfr_tier_33_67_gr10 := zhvi_uc_sfr_tier_33_67/zhvi_uc_sfr_tier_33_67_lag10]

temp <- zhvi_2022[no_sfr_homes==0]
temp <- temp[,frac_sfr_bin:=0]
zhvi_2022 <- zhvi_2022[no_sfr_homes>0]
zhvi_2022[,frac_sfr_bin:=ntile(frac_sfr,5)]
zhvi_2022 <- rbind(temp,zhvi_2022)

zhvi_2022 <- merge(zhvi_2022,geodata,by="zipcode")
zhvi_2022[,house_age:=2022-median_year_structure_built]
zhvi_2022[,renter_frac:=renter_occupied/(owner_occupied+renter_occupied)]
zhvi_2022[,white_frac:=white_population/population]
zhvi_2022[,college_frac:=college_educated_no/population]

Impact on House Price Growth

control_vars = "factor(frac_sfr_bin)+log(median_income)+log(house_value)+log(no_of_homes)+log(population)+log(1+house_age)+renter_frac+white_frac+college_frac+median_age+population_gr+college_educated_gr+income_gr|COUNTY|0|COUNTY"
r <- list()
r[[1]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr10)~",control_vars)),data=zhvi_2022) #[zipcode %in% zori_2022$zipcode]
r[[2]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr3)~",control_vars)),data=zhvi_2022)
r[[3]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr10)~",control_vars)),data=zhvi_2022)
r[[4]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr3)~",control_vars)),data=zhvi_2022)

stargazer(r,type="text",no.space = T,dep.var.labels.include = F,omit.stat = "ser")
## 
## =============================================================
##                                 Dependent variable:          
##                       ---------------------------------------
##                          (1)       (2)       (3)       (4)   
## -------------------------------------------------------------
## factor(frac_sfr_bin)1  -0.007    0.006**   -0.004    0.007** 
##                        (0.007)   (0.003)   (0.007)   (0.003) 
## factor(frac_sfr_bin)2   0.007   0.015***   0.011**  0.016*** 
##                        (0.005)   (0.003)   (0.005)   (0.003) 
## factor(frac_sfr_bin)3  0.020**  0.018***  0.025***  0.020*** 
##                        (0.008)   (0.004)   (0.008)   (0.003) 
## factor(frac_sfr_bin)4 0.050***  0.025***  0.050***  0.024*** 
##                        (0.009)   (0.004)   (0.008)   (0.004) 
## factor(frac_sfr_bin)5 0.078***  0.032***  0.073***  0.031*** 
##                        (0.011)   (0.004)   (0.010)   (0.004) 
## log(median_income)    -0.144***  -0.025*  -0.183***  -0.026* 
##                        (0.049)   (0.013)   (0.045)   (0.014) 
## log(house_value)       -0.018   -0.040***  -0.012   -0.036***
##                        (0.026)   (0.010)   (0.024)   (0.010) 
## log(no_of_homes)      -0.071**   0.021**   -0.013   0.036*** 
##                        (0.034)   (0.010)   (0.031)   (0.009) 
## log(population)        0.071**  -0.022**    0.011   -0.037***
##                        (0.034)   (0.010)   (0.031)   (0.009) 
## log(1 + house_age)    0.018***  0.005***  0.018***   0.004** 
##                        (0.005)   (0.002)   (0.005)   (0.002) 
## renter_frac            -0.045   -0.066***  -0.035   -0.063***
##                        (0.036)   (0.013)   (0.037)   (0.013) 
## white_frac            -0.143*** -0.100*** -0.135*** -0.105***
##                        (0.035)   (0.015)   (0.033)   (0.015) 
## college_frac            0.072   0.119***    0.066    0.086*  
##                        (0.094)   (0.045)   (0.093)   (0.045) 
## median_age            -0.003***  -0.0001  -0.004***  -0.0004 
##                        (0.001)  (0.0004)   (0.001)  (0.0004) 
## population_gr         -0.099***  -0.007   -0.080**   -0.004  
##                        (0.033)   (0.018)   (0.031)   (0.017) 
## college_educated_gr    0.036*    -0.002     0.021    -0.001  
##                        (0.019)   (0.010)   (0.017)   (0.009) 
## income_gr             0.210***   -0.007   0.192***    0.001  
##                        (0.035)   (0.015)   (0.034)   (0.015) 
## -------------------------------------------------------------
## Observations            5,928     6,079     6,202     6,315  
## R2                      0.801     0.714     0.793     0.698  
## Adjusted R2             0.790     0.698     0.782     0.682  
## =============================================================
## Note:                             *p<0.1; **p<0.05; ***p<0.01

Impact on House Price Growth

Subset: Zipcodes in counties where there are zipcodes with SFR homes.

control_vars = "factor(frac_sfr_bin)+log(median_income)+log(house_value)+log(no_of_homes)+log(population)+log(1+house_age)+renter_frac+white_frac+college_frac+median_age+population_gr+college_educated_gr+income_gr|COUNTY|0|COUNTY"
r <- list()
r[[1]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr10)~",control_vars)),data=zhvi_2022[zipcode %in% counties2$ZIP]) #[zipcode %in% zori_2022$zipcode]
r[[2]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr3)~",control_vars)),data=zhvi_2022[zipcode %in% counties2$ZIP])
r[[3]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr10)~",control_vars)),data=zhvi_2022[zipcode %in% counties2$ZIP])
r[[4]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr3)~",control_vars)),data=zhvi_2022[zipcode %in% counties2$ZIP])

stargazer(r,type="text",no.space = T,dep.var.labels.include = F,omit.stat = "ser")
## 
## =============================================================
##                                 Dependent variable:          
##                       ---------------------------------------
##                          (1)       (2)       (3)       (4)   
## -------------------------------------------------------------
## factor(frac_sfr_bin)1  -0.007    0.006**   -0.004    0.007** 
##                        (0.007)   (0.003)   (0.007)   (0.003) 
## factor(frac_sfr_bin)2   0.007   0.015***   0.011**  0.016*** 
##                        (0.005)   (0.003)   (0.005)   (0.003) 
## factor(frac_sfr_bin)3  0.020**  0.018***  0.025***  0.020*** 
##                        (0.008)   (0.004)   (0.008)   (0.003) 
## factor(frac_sfr_bin)4 0.050***  0.025***  0.050***  0.024*** 
##                        (0.009)   (0.004)   (0.008)   (0.004) 
## factor(frac_sfr_bin)5 0.078***  0.032***  0.073***  0.031*** 
##                        (0.011)   (0.004)   (0.010)   (0.004) 
## log(median_income)    -0.144***  -0.025*  -0.183***  -0.026* 
##                        (0.049)   (0.013)   (0.045)   (0.014) 
## log(house_value)       -0.018   -0.040***  -0.012   -0.036***
##                        (0.026)   (0.010)   (0.024)   (0.010) 
## log(no_of_homes)      -0.071**   0.021**   -0.013   0.036*** 
##                        (0.034)   (0.010)   (0.031)   (0.009) 
## log(population)        0.071**  -0.022**    0.011   -0.037***
##                        (0.034)   (0.010)   (0.031)   (0.009) 
## log(1 + house_age)    0.018***  0.005***  0.018***   0.004** 
##                        (0.005)   (0.002)   (0.005)   (0.002) 
## renter_frac            -0.045   -0.066***  -0.035   -0.063***
##                        (0.036)   (0.013)   (0.037)   (0.013) 
## white_frac            -0.143*** -0.100*** -0.135*** -0.105***
##                        (0.035)   (0.015)   (0.033)   (0.015) 
## college_frac            0.072   0.119***    0.066    0.086*  
##                        (0.094)   (0.045)   (0.093)   (0.045) 
## median_age            -0.003***  -0.0001  -0.004***  -0.0004 
##                        (0.001)  (0.0004)   (0.001)  (0.0004) 
## population_gr         -0.099***  -0.007   -0.080**   -0.004  
##                        (0.033)   (0.018)   (0.031)   (0.017) 
## college_educated_gr    0.036*    -0.002     0.021    -0.001  
##                        (0.019)   (0.010)   (0.017)   (0.009) 
## income_gr             0.210***   -0.007   0.192***    0.001  
##                        (0.035)   (0.015)   (0.034)   (0.015) 
## -------------------------------------------------------------
## Observations            5,928     6,079     6,202     6,315  
## R2                      0.801     0.714     0.793     0.698  
## Adjusted R2             0.790     0.698     0.782     0.682  
## =============================================================
## Note:                             *p<0.1; **p<0.05; ***p<0.01
zori <- data.table(readRDS("Cloud Lab/Zip_ZORI_AllHomes.rds"))

zori[,year:=year(month)]
zori <- zori[,.(Zip_ZORI_AllHomes=mean(Zip_ZORI_AllHomes,na.rm=T)),
             by=.(zipcode,year)]

zori[,Zip_ZORI_AllHomes_lag3:=lag(Zip_ZORI_AllHomes,3),by=zipcode]
zori[,Zip_ZORI_AllHomes_lag5:=lag(Zip_ZORI_AllHomes,5),by=zipcode]


zori_2022 <- zori[year==2022]


zori_2022 <- merge(zori_2022,sfr_count_merged[yr==2019 ],by="zipcode")

zori_2022[,Zip_ZORI_AllHomes_gr3 := Zip_ZORI_AllHomes/Zip_ZORI_AllHomes_lag3]
zori_2022[,Zip_ZORI_AllHomes_gr5 := Zip_ZORI_AllHomes/Zip_ZORI_AllHomes_lag5]

temp <- zori_2022[no_sfr_homes==0]
temp <- temp[,frac_sfr_bin:=0]
zori_2022 <- zori_2022[no_sfr_homes>0]
zori_2022[,frac_sfr_bin:=ntile(frac_sfr,5)]
zori_2022 <- rbind(temp,zori_2022)

zori_2022 <- merge(zori_2022,geodata,by="zipcode")
zori_2022[,house_age:=2022-median_year_structure_built]
zori_2022[,renter_frac:=renter_occupied/(owner_occupied+renter_occupied)]
zori_2022[,white_frac:=white_population/population]
zori_2022[,college_frac:=college_educated_no/population]

zori_2022 <- merge(zori_2022,counties3,by.x="zipcode",by.y="ZIP")

Impact on Rent

control_vars = "factor(frac_sfr_bin)+log(median_income)+log(house_value)+log(no_of_homes)+log(population)+log(1+house_age)+renter_frac+white_frac+college_frac+median_age+population_gr+college_educated_gr+income_gr|COUNTY|0|COUNTY"
r <- list()
r[[1]] <- felm(as.formula(paste0("log(Zip_ZORI_AllHomes_gr5)~",control_vars)),data=zori_2022)
r[[2]] <- felm(as.formula(paste0("log(Zip_ZORI_AllHomes_gr3)~",control_vars)),data=zori_2022)


stargazer(r,type="text",no.space = T,dep.var.labels.include = F,omit.stat = "ser")
## 
## ==================================================
##                           Dependent variable:     
##                       ----------------------------
##                            (1)            (2)     
## --------------------------------------------------
## factor(frac_sfr_bin)1     0.001         -0.005    
##                          (0.008)        (0.008)   
## factor(frac_sfr_bin)2     0.007          0.001    
##                          (0.009)        (0.009)   
## factor(frac_sfr_bin)3     0.008         -0.002    
##                          (0.009)        (0.009)   
## factor(frac_sfr_bin)4     0.010         -0.002    
##                          (0.009)        (0.008)   
## factor(frac_sfr_bin)5     0.017*        -0.0004   
##                          (0.010)        (0.008)   
## log(median_income)        -0.022        -0.011    
##                          (0.018)        (0.013)   
## log(house_value)         -0.033*        -0.016    
##                          (0.018)        (0.013)   
## log(no_of_homes)          0.037          0.043    
##                          (0.030)        (0.030)   
## log(population)           -0.043        -0.045    
##                          (0.030)        (0.029)   
## log(1 + house_age)        -0.004       -0.006***  
##                          (0.002)        (0.002)   
## renter_frac             -0.092***      -0.104***  
##                          (0.029)        (0.030)   
## white_frac               -0.038*       -0.041***  
##                          (0.022)        (0.015)   
## college_frac              -0.043        -0.013    
##                          (0.077)        (0.060)   
## median_age                -0.001        -0.001    
##                          (0.001)        (0.001)   
## population_gr           -0.098***      -0.065***  
##                          (0.027)        (0.020)   
## college_educated_gr      0.043**        0.036**   
##                          (0.019)        (0.015)   
## income_gr                 0.024         -0.015    
##                          (0.025)        (0.018)   
## --------------------------------------------------
## Observations              1,671          1,671    
## R2                        0.805          0.810    
## Adjusted R2               0.781          0.786    
## ==================================================
## Note:                  *p<0.1; **p<0.05; ***p<0.01

Impact on House Price

Subset: Only zipcodes with rent data available

control_vars = "factor(frac_sfr_bin)+log(median_income)+log(house_value)+log(no_of_homes)+log(population)+log(1+house_age)+renter_frac+white_frac+college_frac+median_age+population_gr+college_educated_gr+income_gr|COUNTY|0|COUNTY"
r <- list()
r[[1]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr10)~",control_vars)),data=zhvi_2022[zipcode %in% zori_2022$zipcode]) #[zipcode %in% zori_2022$zipcode]
r[[2]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr3)~",control_vars)),data=zhvi_2022[zipcode %in% zori_2022$zipcode])
r[[3]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr10)~",control_vars)),data=zhvi_2022[zipcode %in% zori_2022$zipcode])
r[[4]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr3)~",control_vars)),data=zhvi_2022[zipcode %in% zori_2022$zipcode])

stargazer(r,type="text",no.space = T,dep.var.labels.include = F,omit.stat = "ser")
## 
## =============================================================
##                                 Dependent variable:          
##                       ---------------------------------------
##                          (1)       (2)       (3)       (4)   
## -------------------------------------------------------------
## factor(frac_sfr_bin)1  -0.007     0.003     0.002     0.006  
##                        (0.012)   (0.008)   (0.013)   (0.007) 
## factor(frac_sfr_bin)2  -0.005     0.010     0.001     0.010  
##                        (0.015)   (0.009)   (0.016)   (0.009) 
## factor(frac_sfr_bin)3   0.015     0.012     0.015     0.013  
##                        (0.016)   (0.008)   (0.016)   (0.009) 
## factor(frac_sfr_bin)4  0.044**    0.012    0.037*     0.009  
##                        (0.020)   (0.009)   (0.021)   (0.009) 
## factor(frac_sfr_bin)5 0.072***   0.018**   0.057**    0.014  
##                        (0.027)   (0.009)   (0.027)   (0.009) 
## log(median_income)     -0.071    -0.011    -0.109*   -0.019  
##                        (0.067)   (0.018)   (0.062)   (0.019) 
## log(house_value)      -0.137*** -0.054*** -0.155*** -0.053***
##                        (0.029)   (0.014)   (0.028)   (0.015) 
## log(no_of_homes)      -0.188***  -0.035*  -0.083**   -0.013  
##                        (0.038)   (0.020)   (0.036)   (0.019) 
## log(population)       0.163***    0.028    0.061*     0.007  
##                        (0.036)   (0.018)   (0.036)   (0.017) 
## log(1 + house_age)     0.020**    0.003   0.020***    0.002  
##                        (0.008)   (0.003)   (0.006)   (0.003) 
## renter_frac             0.052   -0.054**    0.030   -0.073***
##                        (0.066)   (0.022)   (0.065)   (0.021) 
## white_frac            -0.165*** -0.089*** -0.160*** -0.102***
##                        (0.061)   (0.022)   (0.058)   (0.023) 
## college_frac          -0.333***   0.096   -0.485***   0.022  
##                        (0.113)   (0.072)   (0.113)   (0.074) 
## median_age              0.001    0.001**   -0.0005   0.001*  
##                        (0.002)   (0.001)   (0.001)   (0.001) 
## population_gr         -0.336*** -0.068*** -0.330*** -0.063***
##                        (0.044)   (0.020)   (0.047)   (0.021) 
## college_educated_gr   0.106***   0.018*   0.121***   0.023*  
##                        (0.028)   (0.011)   (0.028)   (0.012) 
## income_gr             0.203***   -0.020   0.249***   -0.005  
##                        (0.061)   (0.021)   (0.066)   (0.022) 
## -------------------------------------------------------------
## Observations            1,654     1,662     1,662     1,667  
## R2                      0.854     0.850     0.854     0.836  
## Adjusted R2             0.835     0.832     0.835     0.816  
## =============================================================
## Note:                             *p<0.1; **p<0.05; ***p<0.01