rm(list=ls())
library(data.table)
## Warning: package 'data.table' was built under R version 3.6.3
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 3.6.3
library(DBI)
## Warning: package 'DBI' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(lfe)
## Warning: package 'lfe' was built under R version 3.6.3
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.6.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages ------ tidyverse 1.3.0 --
## v tibble 3.0.0 v purrr 0.3.3
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts --------- tidyverse_conflicts() --
## x dplyr::between() masks data.table::between()
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks data.table::first()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x tidyr::pack() masks Matrix::pack()
## x purrr::transpose() masks data.table::transpose()
## x tidyr::unpack() masks Matrix::unpack()
library(stringr)
library(fst)
## Warning: package 'fst' was built under R version 3.6.3
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.6.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.6.3
## rgdal: version: 1.4-8, (SVN revision 845)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/dratnadiwakara2/Documents/R/R-3.6.1/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/dratnadiwakara2/Documents/R/R-3.6.1/library/rgdal/proj
## Linking to sp version: 1.4-1
library(rgeos)
## Warning: package 'rgeos' was built under R version 3.6.3
## rgeos version: 0.5-2, (SVN revision 621)
## GEOS runtime version: 3.6.1-CAPI-1.10.1
## Linking to sp version: 1.4-1
## Polygon checking: TRUE
sfr_homes <- data.table(readRDS("Cloud Lab/sfr_homes.rds"))
ggplot(data=sfr_homes[,.N,by=year])+geom_col(aes(x=year,y=N))
sfr_count <- readRDS("Cloud Lab/sfr_count.rds")
sfr_count_summary <- sfr_count[,.(no_sfr_homes=sum(no_sfr_homes,na.rm=T)),by=yr]
ggplot(sfr_count_summary,aes(x=yr,y=no_sfr_homes))+geom_line()
acs_data <- fread("Cloud Lab/acs_tract_data.csv")
tract_zip <- fread("Cloud Lab/TRACT_ZIP_032016.csv")
acs_data <- merge(acs_data,tract_zip,by.x="censustract",by.y="TRACT",allow.cartesian = T)
acs_data <- acs_data[,.(median_age=mean(median_age,na.rm=T),
median_income=mean(median_income,na.rm=T),
house_value=mean(house_value,na.rm=T),
median_year_structure_built=mean(median_year_structure_built,na.rm=T),
owner_occupied=sum(RES_RATIO*owner_occupied,na.rm=T),
renter_occupied=sum(RES_RATIO*renter_occupied,na.rm=T),
no_of_homes=sum(RES_RATIO*no_of_homes,na.rm=T),
population=sum(RES_RATIO*population,na.rm=T),
white_population=sum(RES_RATIO*white_population,na.rm=T),
college_educated_no=sum(RES_RATIO*college_educated_no,na.rm=T)
),
by=.(ZIP,year)]
acs_data[,population_lag5:=lag(population,5),by=ZIP]
acs_data[,college_educated_no_lag5:=lag(college_educated_no,5),by=ZIP]
acs_data[,median_income_lag:=lag(median_income,5),by=ZIP]
acs_data[,population_gr:=(population/population_lag5) - 1]
acs_data[,college_educated_gr:=(college_educated_no/college_educated_no_lag5) - 1]
acs_data[,income_gr:=(median_income/median_income_lag) - 1]
sfr_count[,zipcode:=as.integer(zipcode)]
## Warning in eval(jsub, SDenv, parent.frame()): NAs introduced by coercion
sfr_count_merged <- merge(sfr_count,acs_data,by.x=c("zipcode","yr"),by.y=c("ZIP","year"),all.y=T)
sfr_count_merged[,no_sfr_homes:=ifelse(is.na(no_sfr_homes),0,no_sfr_homes)]
sfr_count_merged[,frac_sfr:=no_sfr_homes/no_of_homes]
sfr_count_merged <- sfr_count_merged[is.finite(frac_sfr)]
sfr_count_merged[,id:=str_pad(as.character(zipcode),pad="0",width = 5,side = "left")]
sfr_count_merged[,sfr_frac_cat:=ifelse(frac_sfr>0.03,"3% or more",
ifelse(frac_sfr>0.01,"1% to 3%",
ifelse(frac_sfr>0.0,"0% to 1%","No SFR")))]
counties <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_COUNTY_092016.csv")
setorder(counties,ZIP,-RES_RATIO)
counties <- counties[!duplicated(ZIP)]
counties <- counties[ZIP %in% sfr_count_merged[yr==2019 & no_sfr_homes>10]$zipcode]
zipcode_subset <- str_pad(as.character(sfr_count_merged[yr==2019 & no_sfr_homes>10]$zipcode),width = 5,side = "left",pad = "0")
us_zip <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Shapefiles/US Zip","tl_2019_us_zcta510")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US Zip", layer: "tl_2019_us_zcta510"
## with 33144 features
## It has 9 fields
## Integer64 fields read as strings: ALAND10 AWATER10
us_zip <- fortify(us_zip,region="GEOID10")
us_zip <- data.table(us_zip)
us_states <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Shapefiles/US States","cb_2014_us_state_20m")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US States", layer: "cb_2014_us_state_20m"
## with 52 features
## It has 9 fields
## Integer64 fields read as strings: ALAND AWATER
## Warning in readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana
## State University/Raw Data/Shapefiles/US States", : Z-dimension discarded
us_states <- data.table(fortify(us_states,region="GEOID"))
us_states[,id:=as.numeric(id)]
us_counties <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Shapefiles/US Counties/cb_2013_us_county_20m","cb_2013_us_county_20m")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\dratnadiwakara2\Documents\OneDrive - Louisiana State University\Raw Data\Shapefiles\US Counties\cb_2013_us_county_20m", layer: "cb_2013_us_county_20m"
## with 3221 features
## It has 9 fields
## Integer64 fields read as strings: ALAND AWATER
us_counties <- fortify(us_counties,region="GEOID")
us_counties <- data.table(us_counties)
us_counties <- us_counties[as.integer(id) %in% counties$COUNTY]
us_zip_map <- merge(us_zip,sfr_count_merged[yr==2019 & no_sfr_homes>10,c("id","frac_sfr","sfr_frac_cat","no_sfr_homes")],by="id")
us_zip_map <- us_zip_map[substr(id,1,2)!= "99" & long< (-50) & lat>25 & !is.na(sfr_frac_cat)]
map1 <- ggplot()+
geom_polygon(data=us_counties[long > (-130)], aes(x=long,y=lat,group=group),fill="goldenrod1",color=NA)+
geom_polygon(data=us_zip_map, aes(x=long,y=lat,group=group),color=NA,fill="dodgerblue")+
# scale_fill_manual(values = c("darkturquoise", "dodgerblue", "dodgerblue4","white"))+
scale_fill_gradientn(colors=c("ivory1","honeydew1","darkseagreen3","darkturquoise","dodgerblue","dodgerblue4"))+
geom_polygon(data=us_states[! us_states$id %in% c(2,15,72)], aes(x=long,y=lat,group=group),fill=NA,color="gray50")+
theme_minimal()+
theme(axis.title=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
legend.position = "bottom",panel.grid = element_blank())+ guides(fill=guide_legend(title="Fraction of SFR homes"))
# ggsave("map2.png",map1,width = 11, height = 8)
alt text here
Was able to match 165294 SFR homes with ZTRAX data and there are 12 million other homes.
home_chars <- readRDS("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Projects/ZTRAX AVM/home_char.rds")
home_chars_zip <- home_chars[propertyzip %in% zipcode_subset]
home_chars_zip[,sfr:=ifelse(importparcelid %in% sfr_homes$importparcelid,1,0)]
areas <- readRDS("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Projects/ZTRAX AVM/areas.rds")
home_chars_zip <- merge(home_chars_zip,areas,by="rowid")
home_chars_zip <- home_chars_zip[lotsizeacres>0 & buildingareasqft > 500 & buildingareasqft<10000 & yearbuilt> 1920 & totalbedrooms %in% 1:10 ]
This table shows the key characteristics of the two samples. Column 1: non-SFR homes, column 2: SFR homes
summary_stats <-
home_chars_zip[,c("lotsizeacres","yearbuilt","noofstories","totalbedrooms","buildingareasqft","sfr")] %>%
group_by(sfr) %>%
summarise_all(funs(mean,median,sd),na.rm=T)
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
print(t(summary_stats))
## [,1] [,2]
## sfr 0.0000000 1.0000000
## lotsizeacres_mean 110.6067205 32.8598838
## yearbuilt_mean 1983.3307768 1993.1486745
## noofstories_mean 1.3191327 1.4174492
## totalbedrooms_mean 3.2736025 3.3541871
## buildingareasqft_mean 1981.8753083 1921.6952581
## lotsizeacres_median 0.2090000 0.1840000
## yearbuilt_median 1987.0000000 2000.0000000
## noofstories_median 1.0000000 1.0000000
## totalbedrooms_median 3.0000000 3.0000000
## buildingareasqft_median 1760.0000000 1792.0000000
## lotsizeacres_sd 4950.5187483 815.9185597
## yearbuilt_sd 22.6218059 19.5057974
## noofstories_sd 0.5200679 0.5927055
## totalbedrooms_sd 0.8092896 0.6696256
## buildingareasqft_sd 944.4253040 706.8738659
This regression is aimed at understanding differences in home characteristics based on whether the home is SFR or not.
r <- list()
r[[1]] <- felm(log(lotsizeacres)~sfr+log(buildingareasqft)+totalbedrooms|propertyzip+yearbuilt,data=home_chars_zip)
r[[2]] <- felm(log(yearbuilt)~sfr+log(buildingareasqft)+totalbedrooms+log(lotsizeacres)|propertyzip,data=home_chars_zip)
r[[3]] <- felm(totalbedrooms~sfr+log(buildingareasqft)+log(lotsizeacres)|propertyzip+yearbuilt,data=home_chars_zip)
r[[4]] <- felm(log(buildingareasqft)~sfr+totalbedrooms+log(lotsizeacres)|propertyzip+yearbuilt,data=home_chars_zip)
stargazer(r,type="text",no.space=T,omit.stat = "ser")
##
## ==========================================================================================
## Dependent variable:
## --------------------------------------------------------------------
## log(lotsizeacres) log(yearbuilt) totalbedrooms log(buildingareasqft)
## (1) (2) (3) (4)
## ------------------------------------------------------------------------------------------
## sfr -0.162*** 0.003*** 0.088*** -0.069***
## (0.002) (0.00002) (0.001) (0.001)
## log(buildingareasqft) 0.865*** 0.010*** 1.224***
## (0.001) (0.00001) (0.001)
## totalbedrooms -0.060*** 0.001*** 0.217***
## (0.0003) (0.00000) (0.0001)
## log(lotsizeacres) -0.002*** -0.042*** 0.109***
## (0.00000) (0.0002) (0.0001)
## ------------------------------------------------------------------------------------------
## Observations 12,258,469 12,258,469 12,258,469 12,258,469
## R2 0.702 0.608 0.467 0.664
## Adjusted R2 0.702 0.607 0.466 0.664
## ==========================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
counties2 <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_COUNTY_092016.csv")
counties2 <- counties2[COUNTY %in% counties$COUNTY]
acs_data[,sfr_zip:=ifelse(ZIP %in% as.numeric(zipcode_subset),1,0)]
acs_data[,sfr_county:=ifelse(ZIP %in% as.numeric(counties2$ZIP),1,0)]
summary_stats_acs <-
acs_data[year==2019 & sfr_county==1,c("median_age","median_income","house_value","median_year_structure_built","no_of_homes","population","population_gr","income_gr","sfr_zip")] %>%
group_by(sfr_zip) %>%
summarise_all(funs(mean,median,sd),na.rm=T)
# print(t(summary_stats_acs))
summary_stats_acs <-
acs_data[year==2019 ,c("median_age","median_income","house_value","median_year_structure_built","no_of_homes","population","population_gr","income_gr","sfr_county")] %>%
group_by(sfr_county) %>%
summarise_all(funs(mean,median,sd),na.rm=T)
# print(t(summary_stats_acs))
counties3 <- fread("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Crosswalk Files/ZIP_COUNTY_092016.csv")
setorder(counties3,ZIP,-RES_RATIO)
counties3 <- counties2[!duplicated(ZIP)]
zhvi <- readRDS("Cloud Lab/zip_zhvi.rds")
zhvi[,year:=year(month)]
zhvi <- zhvi[,.(zhvi_uc_sfr_tier_33_67=mean(zhvi_uc_sfr_tier_33_67,na.rm=T),
zhvi_bdrmcnt_3 = mean(zhvi_bdrmcnt_3,na.rm=T)),
by=.(zipcode,year)]
zhvi[,zhvi_uc_sfr_tier_33_67_lag3:=lag(zhvi_uc_sfr_tier_33_67,3),by=zipcode]
zhvi[,zhvi_uc_sfr_tier_33_67_lag10:=lag(zhvi_uc_sfr_tier_33_67,10),by=zipcode]
zhvi[,zhvi_bdrmcnt_3_lag3:=lag(zhvi_bdrmcnt_3,3),by=zipcode]
zhvi[,zhvi_bdrmcnt_3_lag10:=lag(zhvi_bdrmcnt_3,10),by=zipcode]
zhvi_2022 <- zhvi[year==2022]
zhvi_2022 <- merge(zhvi_2022,counties3,by.x="zipcode",by.y="ZIP")
geodata <- fread("https://raw.githubusercontent.com/scpike/us-state-county-zip/master/geo-data.csv")
geodata <- geodata[,c("zipcode","state_abbr")]
geodata[,zipcode:=as.integer(zipcode)]
## Warning in eval(jsub, SDenv, parent.frame()): NAs introduced by coercion
zhvi_2022 <- merge(zhvi_2022,sfr_count_merged[yr==2019 ],by="zipcode")
zhvi_2022[,zhvi_bdrmcnt_3_gr3 := zhvi_bdrmcnt_3/zhvi_bdrmcnt_3_lag3]
zhvi_2022[,zhvi_bdrmcnt_3_gr10 := zhvi_bdrmcnt_3/zhvi_bdrmcnt_3_lag10]
zhvi_2022[,zhvi_uc_sfr_tier_33_67_gr3 := zhvi_uc_sfr_tier_33_67/zhvi_uc_sfr_tier_33_67_lag3]
zhvi_2022[,zhvi_uc_sfr_tier_33_67_gr10 := zhvi_uc_sfr_tier_33_67/zhvi_uc_sfr_tier_33_67_lag10]
temp <- zhvi_2022[no_sfr_homes==0]
temp <- temp[,frac_sfr_bin:=0]
zhvi_2022 <- zhvi_2022[no_sfr_homes>0]
zhvi_2022[,frac_sfr_bin:=ntile(frac_sfr,5)]
zhvi_2022 <- rbind(temp,zhvi_2022)
zhvi_2022 <- merge(zhvi_2022,geodata,by="zipcode")
zhvi_2022[,house_age:=2022-median_year_structure_built]
zhvi_2022[,renter_frac:=renter_occupied/(owner_occupied+renter_occupied)]
zhvi_2022[,white_frac:=white_population/population]
zhvi_2022[,college_frac:=college_educated_no/population]
control_vars = "factor(frac_sfr_bin)+log(median_income)+log(house_value)+log(no_of_homes)+log(population)+log(1+house_age)+renter_frac+white_frac+college_frac+median_age+population_gr+college_educated_gr+income_gr|COUNTY|0|COUNTY"
r <- list()
r[[1]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr10)~",control_vars)),data=zhvi_2022) #[zipcode %in% zori_2022$zipcode]
r[[2]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr3)~",control_vars)),data=zhvi_2022)
r[[3]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr10)~",control_vars)),data=zhvi_2022)
r[[4]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr3)~",control_vars)),data=zhvi_2022)
stargazer(r,type="text",no.space = T,dep.var.labels.include = F,omit.stat = "ser")
##
## =============================================================
## Dependent variable:
## ---------------------------------------
## (1) (2) (3) (4)
## -------------------------------------------------------------
## factor(frac_sfr_bin)1 -0.007 0.006** -0.004 0.007**
## (0.007) (0.003) (0.007) (0.003)
## factor(frac_sfr_bin)2 0.007 0.015*** 0.011** 0.016***
## (0.005) (0.003) (0.005) (0.003)
## factor(frac_sfr_bin)3 0.020** 0.018*** 0.025*** 0.020***
## (0.008) (0.004) (0.008) (0.003)
## factor(frac_sfr_bin)4 0.050*** 0.025*** 0.050*** 0.024***
## (0.009) (0.004) (0.008) (0.004)
## factor(frac_sfr_bin)5 0.078*** 0.032*** 0.073*** 0.031***
## (0.011) (0.004) (0.010) (0.004)
## log(median_income) -0.144*** -0.025* -0.183*** -0.026*
## (0.049) (0.013) (0.045) (0.014)
## log(house_value) -0.018 -0.040*** -0.012 -0.036***
## (0.026) (0.010) (0.024) (0.010)
## log(no_of_homes) -0.071** 0.021** -0.013 0.036***
## (0.034) (0.010) (0.031) (0.009)
## log(population) 0.071** -0.022** 0.011 -0.037***
## (0.034) (0.010) (0.031) (0.009)
## log(1 + house_age) 0.018*** 0.005*** 0.018*** 0.004**
## (0.005) (0.002) (0.005) (0.002)
## renter_frac -0.045 -0.066*** -0.035 -0.063***
## (0.036) (0.013) (0.037) (0.013)
## white_frac -0.143*** -0.100*** -0.135*** -0.105***
## (0.035) (0.015) (0.033) (0.015)
## college_frac 0.072 0.119*** 0.066 0.086*
## (0.094) (0.045) (0.093) (0.045)
## median_age -0.003*** -0.0001 -0.004*** -0.0004
## (0.001) (0.0004) (0.001) (0.0004)
## population_gr -0.099*** -0.007 -0.080** -0.004
## (0.033) (0.018) (0.031) (0.017)
## college_educated_gr 0.036* -0.002 0.021 -0.001
## (0.019) (0.010) (0.017) (0.009)
## income_gr 0.210*** -0.007 0.192*** 0.001
## (0.035) (0.015) (0.034) (0.015)
## -------------------------------------------------------------
## Observations 5,928 6,079 6,202 6,315
## R2 0.801 0.714 0.793 0.698
## Adjusted R2 0.790 0.698 0.782 0.682
## =============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Subset: Zipcodes in counties where there are zipcodes with SFR homes.
control_vars = "factor(frac_sfr_bin)+log(median_income)+log(house_value)+log(no_of_homes)+log(population)+log(1+house_age)+renter_frac+white_frac+college_frac+median_age+population_gr+college_educated_gr+income_gr|COUNTY|0|COUNTY"
r <- list()
r[[1]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr10)~",control_vars)),data=zhvi_2022[zipcode %in% counties2$ZIP]) #[zipcode %in% zori_2022$zipcode]
r[[2]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr3)~",control_vars)),data=zhvi_2022[zipcode %in% counties2$ZIP])
r[[3]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr10)~",control_vars)),data=zhvi_2022[zipcode %in% counties2$ZIP])
r[[4]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr3)~",control_vars)),data=zhvi_2022[zipcode %in% counties2$ZIP])
stargazer(r,type="text",no.space = T,dep.var.labels.include = F,omit.stat = "ser")
##
## =============================================================
## Dependent variable:
## ---------------------------------------
## (1) (2) (3) (4)
## -------------------------------------------------------------
## factor(frac_sfr_bin)1 -0.007 0.006** -0.004 0.007**
## (0.007) (0.003) (0.007) (0.003)
## factor(frac_sfr_bin)2 0.007 0.015*** 0.011** 0.016***
## (0.005) (0.003) (0.005) (0.003)
## factor(frac_sfr_bin)3 0.020** 0.018*** 0.025*** 0.020***
## (0.008) (0.004) (0.008) (0.003)
## factor(frac_sfr_bin)4 0.050*** 0.025*** 0.050*** 0.024***
## (0.009) (0.004) (0.008) (0.004)
## factor(frac_sfr_bin)5 0.078*** 0.032*** 0.073*** 0.031***
## (0.011) (0.004) (0.010) (0.004)
## log(median_income) -0.144*** -0.025* -0.183*** -0.026*
## (0.049) (0.013) (0.045) (0.014)
## log(house_value) -0.018 -0.040*** -0.012 -0.036***
## (0.026) (0.010) (0.024) (0.010)
## log(no_of_homes) -0.071** 0.021** -0.013 0.036***
## (0.034) (0.010) (0.031) (0.009)
## log(population) 0.071** -0.022** 0.011 -0.037***
## (0.034) (0.010) (0.031) (0.009)
## log(1 + house_age) 0.018*** 0.005*** 0.018*** 0.004**
## (0.005) (0.002) (0.005) (0.002)
## renter_frac -0.045 -0.066*** -0.035 -0.063***
## (0.036) (0.013) (0.037) (0.013)
## white_frac -0.143*** -0.100*** -0.135*** -0.105***
## (0.035) (0.015) (0.033) (0.015)
## college_frac 0.072 0.119*** 0.066 0.086*
## (0.094) (0.045) (0.093) (0.045)
## median_age -0.003*** -0.0001 -0.004*** -0.0004
## (0.001) (0.0004) (0.001) (0.0004)
## population_gr -0.099*** -0.007 -0.080** -0.004
## (0.033) (0.018) (0.031) (0.017)
## college_educated_gr 0.036* -0.002 0.021 -0.001
## (0.019) (0.010) (0.017) (0.009)
## income_gr 0.210*** -0.007 0.192*** 0.001
## (0.035) (0.015) (0.034) (0.015)
## -------------------------------------------------------------
## Observations 5,928 6,079 6,202 6,315
## R2 0.801 0.714 0.793 0.698
## Adjusted R2 0.790 0.698 0.782 0.682
## =============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
zori <- data.table(readRDS("Cloud Lab/Zip_ZORI_AllHomes.rds"))
zori[,year:=year(month)]
zori <- zori[,.(Zip_ZORI_AllHomes=mean(Zip_ZORI_AllHomes,na.rm=T)),
by=.(zipcode,year)]
zori[,Zip_ZORI_AllHomes_lag3:=lag(Zip_ZORI_AllHomes,3),by=zipcode]
zori[,Zip_ZORI_AllHomes_lag5:=lag(Zip_ZORI_AllHomes,5),by=zipcode]
zori_2022 <- zori[year==2022]
zori_2022 <- merge(zori_2022,sfr_count_merged[yr==2019 ],by="zipcode")
zori_2022[,Zip_ZORI_AllHomes_gr3 := Zip_ZORI_AllHomes/Zip_ZORI_AllHomes_lag3]
zori_2022[,Zip_ZORI_AllHomes_gr5 := Zip_ZORI_AllHomes/Zip_ZORI_AllHomes_lag5]
temp <- zori_2022[no_sfr_homes==0]
temp <- temp[,frac_sfr_bin:=0]
zori_2022 <- zori_2022[no_sfr_homes>0]
zori_2022[,frac_sfr_bin:=ntile(frac_sfr,5)]
zori_2022 <- rbind(temp,zori_2022)
zori_2022 <- merge(zori_2022,geodata,by="zipcode")
zori_2022[,house_age:=2022-median_year_structure_built]
zori_2022[,renter_frac:=renter_occupied/(owner_occupied+renter_occupied)]
zori_2022[,white_frac:=white_population/population]
zori_2022[,college_frac:=college_educated_no/population]
zori_2022 <- merge(zori_2022,counties3,by.x="zipcode",by.y="ZIP")
control_vars = "factor(frac_sfr_bin)+log(median_income)+log(house_value)+log(no_of_homes)+log(population)+log(1+house_age)+renter_frac+white_frac+college_frac+median_age+population_gr+college_educated_gr+income_gr|COUNTY|0|COUNTY"
r <- list()
r[[1]] <- felm(as.formula(paste0("log(Zip_ZORI_AllHomes_gr5)~",control_vars)),data=zori_2022)
r[[2]] <- felm(as.formula(paste0("log(Zip_ZORI_AllHomes_gr3)~",control_vars)),data=zori_2022)
stargazer(r,type="text",no.space = T,dep.var.labels.include = F,omit.stat = "ser")
##
## ==================================================
## Dependent variable:
## ----------------------------
## (1) (2)
## --------------------------------------------------
## factor(frac_sfr_bin)1 0.001 -0.005
## (0.008) (0.008)
## factor(frac_sfr_bin)2 0.007 0.001
## (0.009) (0.009)
## factor(frac_sfr_bin)3 0.008 -0.002
## (0.009) (0.009)
## factor(frac_sfr_bin)4 0.010 -0.002
## (0.009) (0.008)
## factor(frac_sfr_bin)5 0.017* -0.0004
## (0.010) (0.008)
## log(median_income) -0.022 -0.011
## (0.018) (0.013)
## log(house_value) -0.033* -0.016
## (0.018) (0.013)
## log(no_of_homes) 0.037 0.043
## (0.030) (0.030)
## log(population) -0.043 -0.045
## (0.030) (0.029)
## log(1 + house_age) -0.004 -0.006***
## (0.002) (0.002)
## renter_frac -0.092*** -0.104***
## (0.029) (0.030)
## white_frac -0.038* -0.041***
## (0.022) (0.015)
## college_frac -0.043 -0.013
## (0.077) (0.060)
## median_age -0.001 -0.001
## (0.001) (0.001)
## population_gr -0.098*** -0.065***
## (0.027) (0.020)
## college_educated_gr 0.043** 0.036**
## (0.019) (0.015)
## income_gr 0.024 -0.015
## (0.025) (0.018)
## --------------------------------------------------
## Observations 1,671 1,671
## R2 0.805 0.810
## Adjusted R2 0.781 0.786
## ==================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Subset: Only zipcodes with rent data available
control_vars = "factor(frac_sfr_bin)+log(median_income)+log(house_value)+log(no_of_homes)+log(population)+log(1+house_age)+renter_frac+white_frac+college_frac+median_age+population_gr+college_educated_gr+income_gr|COUNTY|0|COUNTY"
r <- list()
r[[1]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr10)~",control_vars)),data=zhvi_2022[zipcode %in% zori_2022$zipcode]) #[zipcode %in% zori_2022$zipcode]
r[[2]] <- felm(as.formula(paste0("log(zhvi_bdrmcnt_3_gr3)~",control_vars)),data=zhvi_2022[zipcode %in% zori_2022$zipcode])
r[[3]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr10)~",control_vars)),data=zhvi_2022[zipcode %in% zori_2022$zipcode])
r[[4]] <- felm(as.formula(paste0("log(zhvi_uc_sfr_tier_33_67_gr3)~",control_vars)),data=zhvi_2022[zipcode %in% zori_2022$zipcode])
stargazer(r,type="text",no.space = T,dep.var.labels.include = F,omit.stat = "ser")
##
## =============================================================
## Dependent variable:
## ---------------------------------------
## (1) (2) (3) (4)
## -------------------------------------------------------------
## factor(frac_sfr_bin)1 -0.007 0.003 0.002 0.006
## (0.012) (0.008) (0.013) (0.007)
## factor(frac_sfr_bin)2 -0.005 0.010 0.001 0.010
## (0.015) (0.009) (0.016) (0.009)
## factor(frac_sfr_bin)3 0.015 0.012 0.015 0.013
## (0.016) (0.008) (0.016) (0.009)
## factor(frac_sfr_bin)4 0.044** 0.012 0.037* 0.009
## (0.020) (0.009) (0.021) (0.009)
## factor(frac_sfr_bin)5 0.072*** 0.018** 0.057** 0.014
## (0.027) (0.009) (0.027) (0.009)
## log(median_income) -0.071 -0.011 -0.109* -0.019
## (0.067) (0.018) (0.062) (0.019)
## log(house_value) -0.137*** -0.054*** -0.155*** -0.053***
## (0.029) (0.014) (0.028) (0.015)
## log(no_of_homes) -0.188*** -0.035* -0.083** -0.013
## (0.038) (0.020) (0.036) (0.019)
## log(population) 0.163*** 0.028 0.061* 0.007
## (0.036) (0.018) (0.036) (0.017)
## log(1 + house_age) 0.020** 0.003 0.020*** 0.002
## (0.008) (0.003) (0.006) (0.003)
## renter_frac 0.052 -0.054** 0.030 -0.073***
## (0.066) (0.022) (0.065) (0.021)
## white_frac -0.165*** -0.089*** -0.160*** -0.102***
## (0.061) (0.022) (0.058) (0.023)
## college_frac -0.333*** 0.096 -0.485*** 0.022
## (0.113) (0.072) (0.113) (0.074)
## median_age 0.001 0.001** -0.0005 0.001*
## (0.002) (0.001) (0.001) (0.001)
## population_gr -0.336*** -0.068*** -0.330*** -0.063***
## (0.044) (0.020) (0.047) (0.021)
## college_educated_gr 0.106*** 0.018* 0.121*** 0.023*
## (0.028) (0.011) (0.028) (0.012)
## income_gr 0.203*** -0.020 0.249*** -0.005
## (0.061) (0.021) (0.066) (0.022)
## -------------------------------------------------------------
## Observations 1,654 1,662 1,662 1,667
## R2 0.854 0.850 0.854 0.836
## Adjusted R2 0.835 0.832 0.835 0.816
## =============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01