# Load packages
library(pacman)
## Warning: package 'pacman' was built under R version 4.4.2
p_load(tidyverse, lubridate, usmap, gridExtra, stringr, readxl, plot3D,
cowplot, reshape2, scales, broom, data.table, ggplot2, stargazer,
foreign, ggthemes, ggforce, ggridges, latex2exp, viridis, extrafont,
kableExtra, snakecase, janitor)
# Load housing data
housingdata <- readRDS("testdata20250121.RDS")
Research question: Does GDP growth affect housing prices? Treatment variable is real county-level GDP growth in percentage, and the outcome variable is housing sale price.
Understanding the link between economic growth and housing prices is critical for policymakers, investors, and homeowners. Economic expansion can increase income levels, employment, and demand for housing, driving up prices. However, supply constraints, interest rates, and migration patterns may moderate or amplify these effects.Housing markets typically respond to local economic conditions such as employment, local industries, and neighborhood-level economic factors, and county GDP capture more relevant variations in economic activity that impact housing prices.
library(ggdag)
## Warning: package 'ggdag' was built under R version 4.4.2
##
## Attaching package: 'ggdag'
## The following object is masked from 'package:stats':
##
## filter
library(dagitty)
## Warning: package 'dagitty' was built under R version 4.4.2
# Define the DAG
dag <- dagitty("
dag {
county_GDP_Growth -> Housing_Prices
county_GDP_Growth -> Income -> Housing_Prices
Population_Growth -> county_GDP_Growth
Population_Growth -> Housing_Prices
Interest_Rates -> Housing_Prices
Labor_Market_Conditions -> county_GDP_Growth
Labor_Market_Conditions -> Housing_Prices
Housing_Supply -> Housing_Prices
Housing_Prices -> county_GDP_Growth
}")
ggdag(dag, layout = "tree", text = FALSE) + # Use a layout algorithm
theme_minimal() +
geom_dag_text(aes(label = name), size = 2, color = "white", nudge_x = 0, nudge_y = 0) +
ggtitle("Causal DAG for State GDP Growth and Housing Prices") +
theme(plot.title = element_text(size = 10))
Population, interest rates, and labor supply are confounders since they can influence both GDP growth and housing prices. Higher GDP can be linked with increased income, which affects housing demand and prices.At the same time, reverse causality is a concern because while higher GDP can drive higher housing prices, high housing sales also drive GDP.
Identification:
sale_amount ~ land_square_footage + bedrooms_all_buildings + number_of_bathrooms + AC_presence + stories_number + lagged_county_GDP_growth | fips_county_code+year
housingdata$sale_date <- as.character(housingdata$sale_date)
# Extract year, month, and day
housingdata$year <- substr(housingdata$sale_date, 1, 4)
housingdata$month <- substr(housingdata$sale_date, 5, 6)
housingdata$day <- substr(housingdata$sale_date, 7, 8)
housing_dates <- housingdata %>% select(c("sale_date", "year", "month", "day"))
head(housing_dates, 10)
## sale_date year month day
## <char> <char> <char> <char>
## 1: 20150129 2015 01 29
## 2: 20090904 2009 09 04
## 3: 20130228 2013 02 28
## 4: 20080825 2008 08 25
## 5: 20140225 2014 02 25
## 6: 20180111 2018 01 11
## 7: 20150408 2015 04 08
## 8: 20150623 2015 06 23
## 9: 20090528 2009 05 28
## 10: 20120131 2012 01 31
housingdata$year <- as.numeric(housingdata$year)
housingdata$year_built <- as.numeric(housingdata$year_built)
summary(housingdata$year)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2008 2012 2015 2015 2017 2019
summary(housingdata$year_built)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1700 1965 1986 1981 2001 2019
quantile(housingdata$year_built, probs = seq(0, 1, 0.25))
## 0% 25% 50% 75% 100%
## 1700 1965 1986 2001 2019
quantile(housingdata$year, probs = seq(0, 1, 0.25))
## 0% 25% 50% 75% 100%
## 2008 2012 2015 2017 2019
quantiles <- quantile(housingdata$year_built, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
housingdata$year_built_grp <- cut(housingdata$year_built,
breaks = quantiles,
labels = c("1", "2", "3", "4"),
include.lowest = TRUE)
table(housingdata$year_built_grp)
##
## 1 2 3 4
## 588505 622996 565824 573223
remove_top_1_percent <- function(x) {
threshold <- quantile(x, probs = 0.99, na.rm = TRUE) # Compute 99th percentile
x[x >= threshold] <- NA # Replace outliers with NA
return(x)
}
# Apply the function to the specified variables
cleanhousingdata <- housingdata %>%
mutate(across(c(bedrooms_all_buildings, number_of_bathrooms, number_of_fireplaces, land_square_footage), remove_top_1_percent)) %>%
drop_na() # Remove rows with NA values
summary(cleanhousingdata)
## sale_amount sale_date x y
## Min. :1.000e+02 Length:2249257 Min. :-124.57 Min. :26.32
## 1st Qu.:1.770e+05 Class :character 1st Qu.:-118.32 1st Qu.:33.51
## Median :2.800e+05 Mode :character Median :-104.77 Median :34.35
## Mean :4.142e+05 Mean :-102.84 Mean :35.85
## 3rd Qu.:4.550e+05 3rd Qu.: -84.55 3rd Qu.:38.33
## Max. :2.730e+09 Max. : -73.91 Max. :48.97
## fips_county_code year_built bedrooms_all_buildings number_of_bathrooms
## Min. : 1101 Min. :1700 Min. :1.000 Min. :0.010
## 1st Qu.: 6081 1st Qu.:1965 1st Qu.:3.000 1st Qu.:2.000
## Median :12091 Median :1985 Median :3.000 Median :2.000
## Mean :20499 Mean :1981 Mean :3.366 Mean :2.402
## 3rd Qu.:40101 3rd Qu.:2001 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :53071 Max. :2019 Max. :5.000 Max. :5.400
## number_of_fireplaces stories_number land_square_footage abbr
## Min. :1.000 Min. : 0.500 Min. : 1 Length:2249257
## 1st Qu.:1.000 1st Qu.: 1.000 1st Qu.: 6764 Class :character
## Median :1.000 Median : 1.000 Median : 9309 Mode :character
## Mean :1.084 Mean : 1.361 Mean : 20019
## 3rd Qu.:1.000 3rd Qu.: 2.000 3rd Qu.: 15899
## Max. :2.000 Max. :175.000 Max. :389760
## AC_presence year month day
## Min. :0.0000 Min. :2008 Length:2249257 Length:2249257
## 1st Qu.:0.0000 1st Qu.:2012 Class :character Class :character
## Median :1.0000 Median :2015 Mode :character Mode :character
## Mean :0.6097 Mean :2015
## 3rd Qu.:1.0000 3rd Qu.:2017
## Max. :1.0000 Max. :2019
## year_built_grp
## 1:568753
## 2:602880
## 3:540377
## 4:537247
##
##
hpi <- read_excel("hpi_po_us_and_census.xls")
cleanhpi <- hpi %>%
filter(division!= "USA")
start_year <- min(housingdata$year, na.rm = TRUE)
cleanhpi <- cleanhpi %>%
group_by(division) %>%
mutate(base_index = index_po_seasonally_adjusted[year == start_year & qtr == 1],
hpi = (index_po_seasonally_adjusted / base_index) * 100) %>%
ungroup()
head(cleanhpi)
## # A tibble: 6 × 7
## division year qtr index_po_not_seasonal…¹ index_po_seasonally_…² base_index
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 DV_ENC 1991 1 100 100 180.
## 2 DV_ENC 1991 2 101. 101. 180.
## 3 DV_ENC 1991 3 102. 101. 180.
## 4 DV_ENC 1991 4 103. 102. 180.
## 5 DV_ENC 1992 1 104. 104. 180.
## 6 DV_ENC 1992 2 105. 105. 180.
## # ℹ abbreviated names: ¹​index_po_not_seasonally_adjusted,
## # ²​index_po_seasonally_adjusted
## # ℹ 1 more variable: hpi <dbl>
cleanhousingdata <- cleanhousingdata %>%
mutate(
qtr = case_when(
month %in% c("01", "02", "03") ~ 1, # First quarter (January to March)
month %in% c("04", "05", "06") ~ 2, # Second quarter (April to June)
month %in% c("07", "08", "09") ~ 3, # Third quarter (July to September)
month %in% c("10", "11", "12") ~ 4 # Fourth quarter (October to December)
),
division = case_when(
abbr %in% c("CT", 'ME', 'MA', 'NH', 'RO', 'VT') ~ "DV_NE",
abbr %in% c("IN", 'IL', 'MI', 'OH', 'WI') ~ "DV_ENC",
abbr %in% c("ND", 'SD', 'NE', 'KS', 'MN', 'IA', 'MO') ~ "DV_WNC",
abbr %in% c("MT", 'ID', 'WY', 'NV', 'AZ', 'NM', 'CO','UT') ~ "DV_MT",
abbr %in% c("WA", 'OR', 'CA', 'AK', 'HI') ~ "DV_PAC",
abbr %in% c("NY", 'PA', 'NJ') ~ "DV_MA",
abbr %in% c("WV", 'MD', 'DE', 'VA', 'NC', 'SC','GA','FL','DC') ~ "DV_SA",
abbr %in% c("KY", 'TN', 'MS', 'AL', 'MN', 'IA', 'MO') ~ "DV_ESC",
abbr %in% c("OK", 'TX', 'AR', 'LA') ~ "DV_WSC"
)
)
cleanhousingHPI <- cleanhousingdata %>%
left_join(cleanhpi, by = c('division', 'year', 'qtr')) %>%
mutate(dlog_sale_amount = log(sale_amount * 100 / hpi))
head(cleanhousingHPI)
## sale_amount sale_date x y fips_county_code year_built
## <num> <char> <num> <num> <int> <num>
## 1: 104000 20150129 -86.19826 32.41438 1101 2009
## 2: 180501 20090904 -86.20467 32.32170 1101 1986
## 3: 140000 20130228 -86.13278 32.34974 1101 2006
## 4: 245916 20080825 -86.17190 32.40677 1101 2000
## 5: 60000 20140225 -86.27811 32.39384 1101 1940
## 6: 108150 20180111 -86.12927 32.34984 1101 2006
## bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
## <int> <num> <int>
## 1: 3 2.0 1
## 2: 3 2.0 1
## 3: 3 2.0 1
## 4: 3 2.0 1
## 5: 2 1.5 1
## 6: 3 2.0 1
## stories_number land_square_footage abbr AC_presence year month day
## <num> <int> <char> <num> <num> <char> <char>
## 1: 1 12855 AL 0 2015 01 29
## 2: 1 16583 AL 0 2009 09 04
## 3: 1 6216 AL 0 2013 02 28
## 4: 1 8372 AL 0 2008 08 25
## 5: 1 21928 AL 0 2014 02 25
## 6: 1 4713 AL 0 2018 01 11
## year_built_grp qtr division index_po_not_seasonally_adjusted
## <fctr> <num> <char> <num>
## 1: 4 1 DV_ESC 193.48
## 2: 2 3 DV_ESC 187.69
## 3: 4 1 DV_ESC 181.11
## 4: 3 3 DV_ESC 193.03
## 5: 1 1 DV_ESC 186.10
## 6: 4 1 DV_ESC 224.79
## index_po_seasonally_adjusted base_index hpi dlog_sale_amount
## <num> <num> <num> <num>
## 1: 195.20 195.53 99.83123 11.55384
## 2: 186.02 195.53 95.13630 12.15335
## 3: 183.18 195.53 93.68383 11.91464
## 4: 191.54 195.53 97.95939 12.43336
## 5: 187.99 195.53 96.14381 11.04142
## 6: 226.27 195.53 115.72137 11.44526
county_GDP <- read.csv("CAGDP9__ALL_AREAS_2001_2023.csv") # downloaded from https://www.bea.gov/data/gdp/gdp-county-metro-and-other-areas
head(county_GDP)
## GeoFIPS GeoName Region TableName LineCode IndustryClassification
## 1 00000 United States * NA CAGDP9 1 ...
## 2 00000 United States * NA CAGDP9 2 ...
## 3 00000 United States * NA CAGDP9 3 11
## 4 00000 United States * NA CAGDP9 6 21
## 5 00000 United States * NA CAGDP9 10 22
## 6 00000 United States * NA CAGDP9 11 23
## Description
## 1 All industry total
## 2 Private industries
## 3 Agriculture, forestry, fishing and hunting
## 4 Mining, quarrying, and oil and gas extraction
## 5 Utilities
## 6 Construction
## Unit X2001 X2002 X2003
## 1 Thousands of chained 2017 dollars 14230726000 14472712000 14877312000
## 2 Thousands of chained 2017 dollars 12041461000 12245105000 12616710000
## 3 Thousands of chained 2017 dollars 127095000 131323000 141451000
## 4 Thousands of chained 2017 dollars 175810000 181209000 159545000
## 5 Thousands of chained 2017 dollars 233878000 239465000 236504000
## 6 Thousands of chained 2017 dollars 909395000 878484000 896733000
## X2004 X2005 X2006 X2007 X2008 X2009
## 1 15449757000 15987957000 16433148000 16762445000 16781485000 16349110000
## 2 13159877000 13667016000 14095064000 14398387000 14372138000 13941688000
## 3 153419000 159554000 161687000 144468000 144066000 159975000
## 4 159750000 161563000 186486000 201455000 194981000 224143000
## 5 251198000 239788000 254326000 260531000 276326000 256992000
## 6 930317000 934014000 913050000 888205000 801629000 686678000
## X2010 X2011 X2012 X2013 X2014 X2015
## 1 16789750000 17052410000 17442759000 17812167000 18261714000 18799622000
## 2 14366648000 14634972000 15045864000 15417297000 15860786000 16394174000
## 3 155952000 150224000 145700000 160433000 158986000 171593000
## 4 199079000 207047000 232219000 241753000 267428000 285933000
## 5 290760000 301868000 306229000 304002000 292730000 296934000
## 6 656758000 648944000 670176000 697567000 721220000 764407000
## X2016 X2017 X2018 X2019 X2020 X2021
## 1 19141672000 19612102000 20193896000 20715671000 20267585000 21494798000
## 2 16710707000 17156255000 17711775000 18216263000 17789792000 19004921000
## 3 181537000 176840000 184105000 172480000 174902000 185699000
## 4 261614000 267302000 277013000 312457000 304491000 277667000
## 5 313557000 313711000 309269000 312519000 331357000 321279000
## 6 804359000 840220000 863755000 882288000 862678000 887608000
## X2022 X2023
## 1 22034828000 22671096000
## 2 19506244000 20092919000
## 3 185649000 192667000
## 4 251747000 336302000
## 5 326953000 342977000
## 6 839967000 821062000
colnames(county_GDP) <- gsub("^X", "", colnames(county_GDP))
unique(county_GDP$Description)
## [1] "All industry total "
## [2] " Private industries "
## [3] " Agriculture, forestry, fishing and hunting "
## [4] " Mining, quarrying, and oil and gas extraction "
## [5] " Utilities "
## [6] " Construction "
## [7] " Manufacturing "
## [8] " Durable goods manufacturing "
## [9] " Nondurable goods manufacturing "
## [10] " Wholesale trade "
## [11] " Retail trade "
## [12] " Transportation and warehousing "
## [13] " Information "
## [14] " Finance, insurance, real estate, rental, and leasing "
## [15] " Finance and insurance "
## [16] " Real estate and rental and leasing "
## [17] " Professional and business services "
## [18] " Professional, scientific, and technical services "
## [19] " Management of companies and enterprises "
## [20] " Administrative and support and waste management and remediation services "
## [21] " Educational services, health care, and social assistance "
## [22] " Educational services "
## [23] " Health care and social assistance "
## [24] " Arts, entertainment, recreation, accommodation, and food services "
## [25] " Arts, entertainment, and recreation "
## [26] " Accommodation and food services "
## [27] " Other services (except government and government enterprises) "
## [28] "Government and government enterprises "
## [29] "Natural resources and mining "
## [30] "Trade "
## [31] "Transportation and utilities "
## [32] "Manufacturing and information "
## [33] "Private goods-producing industries 2/"
## [34] "Private services-providing industries 3/"
## [35] ""
county_GDP_clean <- county_GDP %>%
select(-c("GeoName", "Region", "TableName", "LineCode", "IndustryClassification", "Unit")) %>%
filter(Description == "All industry total ") %>%
select(-Description) %>%
mutate(across(starts_with("2"), ~ as.numeric(.)))
## Warning: There were 23 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(starts_with("2"), ~as.numeric(.))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 22 remaining warnings.
# Ensure only the year columns are selected for GDP growth calculation
year_columns <- grep("^2", colnames(county_GDP_clean), value = TRUE) # Columns starting with "2" (e.g., 2013, 2014)
# Identify the county code column
county_code_column <- "GeoFIPS" # Adjust if your county code column has a different name
# Create a new dataframe to store the growth values
GDP_growth <- county_GDP_clean[, c(county_code_column, year_columns)] # Keep county code and year columns
# Loop through the year columns (starting from the second year)
for (i in 2:length(year_columns)) {
GDP_growth[, paste0("Growth_", year_columns[i])] <-
(county_GDP_clean[, year_columns[i]] - county_GDP_clean[, year_columns[i - 1]]) /
county_GDP_clean[, year_columns[i - 1]]
}
GDP_growth_only <- GDP_growth[, c(county_code_column, grep("^Growth", colnames(GDP_growth), value = TRUE))]
head(GDP_growth_only)
## GeoFIPS Growth_2002 Growth_2003 Growth_2004 Growth_2005 Growth_2006
## 1 00000 0.01700447 0.02795606 0.03847772 3.483550e-02 0.027845396
## 2 01000 0.02363979 0.03253245 0.06669167 3.861089e-02 0.013573748
## 3 01001 0.03735550 0.02900563 0.15480200 1.058026e-02 0.067085703
## 4 01003 0.05108749 0.06140797 0.10047300 1.098696e-01 0.028251361
## 5 01005 -0.01714853 0.02810943 0.08378639 6.986655e-05 -0.023382165
## 6 01007 0.01316053 0.04838200 0.04589706 1.718075e-03 -0.006227022
## Growth_2007 Growth_2008 Growth_2009 Growth_2010 Growth_2011 Growth_2012
## 1 0.02003858 1.135872e-03 -0.02576500 0.02695193 0.015644069 0.02289113
## 2 0.01051212 -5.237230e-03 -0.03371375 0.02710156 0.009615559 0.01143487
## 3 0.01693207 -8.795678e-02 0.02515675 0.07063659 0.060254491 0.15574675
## 4 0.02931997 -3.366169e-02 -0.04876943 0.01452880 -0.013669136 0.01146138
## 5 -0.03267907 -5.981153e-02 -0.02023018 0.01631280 -0.059960213 -0.03387160
## 6 0.06630980 4.437959e-05 -0.02201652 0.08199828 0.017552213 -0.00773134
## Growth_2013 Growth_2014 Growth_2015 Growth_2016 Growth_2017 Growth_2018
## 1 0.021178301 0.025238198 0.02945550 0.01819451 0.02457622 0.02966505
## 2 0.016140350 -0.004314198 0.01397713 0.01872452 0.01762958 0.01935825
## 3 -0.062798242 0.007175474 0.08348704 0.04554809 -0.04533391 0.01417031
## 4 0.052683661 0.022256467 0.04682811 0.05262911 0.03787070 0.05098802
## 5 0.125377005 -0.049217811 -0.02324537 -0.01319369 -0.01166267 0.01616649
## 6 0.001707844 -0.009117362 -0.03032869 0.01918625 0.01308887 -0.02360224
## Growth_2019 Growth_2020 Growth_2021 Growth_2022 Growth_2023
## 1 0.02583825 -0.021630291 0.060550529 0.025123753 0.02887556
## 2 0.02021662 -0.013246261 0.051454996 0.020664475 0.02849730
## 3 -0.02658355 0.004002264 -0.006283991 0.092021836 0.02645812
## 4 0.04976492 -0.004406941 0.073623201 0.050441086 0.03174627
## 5 -0.01548427 -0.039285386 0.028927093 -0.011256687 -0.03874069
## 6 0.11114692 0.044329930 -0.001772834 -0.005221433 0.01948757
colnames(GDP_growth_only) <- gsub("^Growth_", "", colnames(GDP_growth_only))
head(GDP_growth_only)
## GeoFIPS 2002 2003 2004 2005 2006
## 1 00000 0.01700447 0.02795606 0.03847772 3.483550e-02 0.027845396
## 2 01000 0.02363979 0.03253245 0.06669167 3.861089e-02 0.013573748
## 3 01001 0.03735550 0.02900563 0.15480200 1.058026e-02 0.067085703
## 4 01003 0.05108749 0.06140797 0.10047300 1.098696e-01 0.028251361
## 5 01005 -0.01714853 0.02810943 0.08378639 6.986655e-05 -0.023382165
## 6 01007 0.01316053 0.04838200 0.04589706 1.718075e-03 -0.006227022
## 2007 2008 2009 2010 2011 2012
## 1 0.02003858 1.135872e-03 -0.02576500 0.02695193 0.015644069 0.02289113
## 2 0.01051212 -5.237230e-03 -0.03371375 0.02710156 0.009615559 0.01143487
## 3 0.01693207 -8.795678e-02 0.02515675 0.07063659 0.060254491 0.15574675
## 4 0.02931997 -3.366169e-02 -0.04876943 0.01452880 -0.013669136 0.01146138
## 5 -0.03267907 -5.981153e-02 -0.02023018 0.01631280 -0.059960213 -0.03387160
## 6 0.06630980 4.437959e-05 -0.02201652 0.08199828 0.017552213 -0.00773134
## 2013 2014 2015 2016 2017 2018
## 1 0.021178301 0.025238198 0.02945550 0.01819451 0.02457622 0.02966505
## 2 0.016140350 -0.004314198 0.01397713 0.01872452 0.01762958 0.01935825
## 3 -0.062798242 0.007175474 0.08348704 0.04554809 -0.04533391 0.01417031
## 4 0.052683661 0.022256467 0.04682811 0.05262911 0.03787070 0.05098802
## 5 0.125377005 -0.049217811 -0.02324537 -0.01319369 -0.01166267 0.01616649
## 6 0.001707844 -0.009117362 -0.03032869 0.01918625 0.01308887 -0.02360224
## 2019 2020 2021 2022 2023
## 1 0.02583825 -0.021630291 0.060550529 0.025123753 0.02887556
## 2 0.02021662 -0.013246261 0.051454996 0.020664475 0.02849730
## 3 -0.02658355 0.004002264 -0.006283991 0.092021836 0.02645812
## 4 0.04976492 -0.004406941 0.073623201 0.050441086 0.03174627
## 5 -0.01548427 -0.039285386 0.028927093 -0.011256687 -0.03874069
## 6 0.11114692 0.044329930 -0.001772834 -0.005221433 0.01948757
GDP_growth_final <- GDP_growth_only %>%
pivot_longer(cols = -GeoFIPS,
names_to = "year",
values_to = "GDP_GrRt") %>%
mutate(year = as.numeric(year),
GeoFIPS = as.numeric(GeoFIPS))
head(cleanhousingHPI)
## sale_amount sale_date x y fips_county_code year_built
## <num> <char> <num> <num> <int> <num>
## 1: 104000 20150129 -86.19826 32.41438 1101 2009
## 2: 180501 20090904 -86.20467 32.32170 1101 1986
## 3: 140000 20130228 -86.13278 32.34974 1101 2006
## 4: 245916 20080825 -86.17190 32.40677 1101 2000
## 5: 60000 20140225 -86.27811 32.39384 1101 1940
## 6: 108150 20180111 -86.12927 32.34984 1101 2006
## bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
## <int> <num> <int>
## 1: 3 2.0 1
## 2: 3 2.0 1
## 3: 3 2.0 1
## 4: 3 2.0 1
## 5: 2 1.5 1
## 6: 3 2.0 1
## stories_number land_square_footage abbr AC_presence year month day
## <num> <int> <char> <num> <num> <char> <char>
## 1: 1 12855 AL 0 2015 01 29
## 2: 1 16583 AL 0 2009 09 04
## 3: 1 6216 AL 0 2013 02 28
## 4: 1 8372 AL 0 2008 08 25
## 5: 1 21928 AL 0 2014 02 25
## 6: 1 4713 AL 0 2018 01 11
## year_built_grp qtr division index_po_not_seasonally_adjusted
## <fctr> <num> <char> <num>
## 1: 4 1 DV_ESC 193.48
## 2: 2 3 DV_ESC 187.69
## 3: 4 1 DV_ESC 181.11
## 4: 3 3 DV_ESC 193.03
## 5: 1 1 DV_ESC 186.10
## 6: 4 1 DV_ESC 224.79
## index_po_seasonally_adjusted base_index hpi dlog_sale_amount
## <num> <num> <num> <num>
## 1: 195.20 195.53 99.83123 11.55384
## 2: 186.02 195.53 95.13630 12.15335
## 3: 183.18 195.53 93.68383 11.91464
## 4: 191.54 195.53 97.95939 12.43336
## 5: 187.99 195.53 96.14381 11.04142
## 6: 226.27 195.53 115.72137 11.44526
Housing_GDP <- cleanhousingHPI %>%
mutate(GeoFIPS = fips_county_code) %>%
left_join(GDP_growth_final, by = c("GeoFIPS", "year"))
head(Housing_GDP)
## sale_amount sale_date x y fips_county_code year_built
## <num> <char> <num> <num> <int> <num>
## 1: 104000 20150129 -86.19826 32.41438 1101 2009
## 2: 180501 20090904 -86.20467 32.32170 1101 1986
## 3: 140000 20130228 -86.13278 32.34974 1101 2006
## 4: 245916 20080825 -86.17190 32.40677 1101 2000
## 5: 60000 20140225 -86.27811 32.39384 1101 1940
## 6: 108150 20180111 -86.12927 32.34984 1101 2006
## bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
## <int> <num> <int>
## 1: 3 2.0 1
## 2: 3 2.0 1
## 3: 3 2.0 1
## 4: 3 2.0 1
## 5: 2 1.5 1
## 6: 3 2.0 1
## stories_number land_square_footage abbr AC_presence year month day
## <num> <int> <char> <num> <num> <char> <char>
## 1: 1 12855 AL 0 2015 01 29
## 2: 1 16583 AL 0 2009 09 04
## 3: 1 6216 AL 0 2013 02 28
## 4: 1 8372 AL 0 2008 08 25
## 5: 1 21928 AL 0 2014 02 25
## 6: 1 4713 AL 0 2018 01 11
## year_built_grp qtr division index_po_not_seasonally_adjusted
## <fctr> <num> <char> <num>
## 1: 4 1 DV_ESC 193.48
## 2: 2 3 DV_ESC 187.69
## 3: 4 1 DV_ESC 181.11
## 4: 3 3 DV_ESC 193.03
## 5: 1 1 DV_ESC 186.10
## 6: 4 1 DV_ESC 224.79
## index_po_seasonally_adjusted base_index hpi dlog_sale_amount GeoFIPS
## <num> <num> <num> <num> <num>
## 1: 195.20 195.53 99.83123 11.55384 1101
## 2: 186.02 195.53 95.13630 12.15335 1101
## 3: 183.18 195.53 93.68383 11.91464 1101
## 4: 191.54 195.53 97.95939 12.43336 1101
## 5: 187.99 195.53 96.14381 11.04142 1101
## 6: 226.27 195.53 115.72137 11.44526 1101
## GDP_GrRt
## <num>
## 1: 0.021559187
## 2: -0.045027726
## 3: 0.003560440
## 4: -0.010115864
## 5: -0.008824075
## 6: -0.002555574
state_year_means <- Housing_GDP %>%
group_by(abbr, year) %>%
summarize(state_GDP_GrRt = mean(GDP_GrRt, na.rm = TRUE)) %>%
rename("state" = "abbr") %>%
ungroup()
## `summarise()` has grouped output by 'abbr'. You can override using the
## `.groups` argument.
ggplot(state_year_means, aes(x = year, y = state_GDP_GrRt, group = state, color = state)) +
geom_line() +
theme_minimal() +
labs(title = "Mean GDP Growth by State and Year",
x = "Year",
y = "State Mean GDP Growth",
color = "State") +
theme(legend.position = "none")
plot_usmap(data = state_year_means %>% filter(year == 2013),
values = "state_GDP_GrRt", regions = "states") +
scale_fill_continuous(name = "GDP Growth", low = "blue", high = "red") +
labs(title = "State-Level GDP Growth (2020)") +
theme(legend.position = "right")
#install.packages("patchwork")
library(patchwork) # For arranging multiple plots
## Warning: package 'patchwork' was built under R version 4.4.2
##
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
##
## align_plots
plots <- lapply(unique(state_year_means$year), function(y) {
plot_usmap(data = state_year_means %>% filter(year == y), values = "state_GDP_GrRt") +
scale_fill_continuous(name = "GDP Growth", low = "blue", high = "red") +
labs(title = paste("State-Level GDP Growth in", y))
})
# Arrange the maps side by side
wrap_plots(plots)
GDP growth rates have not been concentrating in specific states over the
years.
state_year_sale <- Housing_GDP %>%
group_by(abbr, year) %>%
summarize(state_housing = mean(sale_amount, na.rm = TRUE)) %>%
rename("state" = "abbr") %>%
ungroup()
## `summarise()` has grouped output by 'abbr'. You can override using the
## `.groups` argument.
ggplot(state_year_sale, aes(x = year, y = state_housing, group = state, color = state)) +
geom_line() +
theme_minimal() +
labs(title = "Mean housing sale amount by State and Year",
x = "Year",
y = "Mean housing sale amount",
color = "State") +
theme(legend.position = "none")
#iiii.
# Create the lagged variable
Housing_GDP <- Housing_GDP %>%
arrange(fips_county_code, year) %>%
group_by(fips_county_code) %>%
mutate(GDP_growth_lag = lag(GDP_GrRt, order_by = year)) %>%
ungroup()
# Load the necessary packages
library(xtable)
##
## Attaching package: 'xtable'
## The following objects are masked from 'package:ggdag':
##
## label, label<-
# Assuming your dataset is named `housing_data`
# Create summary statistics table
summary_stats <- summary(Housing_GDP[c("sale_amount", "land_square_footage", "bedrooms_all_buildings",
"number_of_bathrooms", "AC_presence", "stories_number",
"GDP_growth_lag")])
# Display the table
xtable(summary_stats, digits = 2)
## % latex table generated in R 4.4.1 by xtable 1.8-4 package
## % Wed Feb 19 11:07:50 2025
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlllllll}
## \hline
## & sale\_amount & land\_square\_footage & bedrooms\_all\_buildings & number\_of\_bathrooms & AC\_presence & stories\_number & GDP\_growth\_lag \\
## \hline
## X & Min. :1.000e+02 & Min. : 1 & Min. :1.000 & Min. :0.010 & Min. :0.0000 & Min. : 0.500 & Min. :-0.579 \\
## X.1 & 1st Qu.:1.770e+05 & 1st Qu.: 6764 & 1st Qu.:3.000 & 1st Qu.:2.000 & 1st Qu.:0.0000 & 1st Qu.: 1.000 & 1st Qu.: 0.012 \\
## X.2 & Median :2.800e+05 & Median : 9309 & Median :3.000 & Median :2.000 & Median :1.0000 & Median : 1.000 & Median : 0.029 \\
## X.3 & Mean :4.142e+05 & Mean : 20019 & Mean :3.366 & Mean :2.402 & Mean :0.6097 & Mean : 1.361 & Mean : 0.028 \\
## X.4 & 3rd Qu.:4.550e+05 & 3rd Qu.: 15899 & 3rd Qu.:4.000 & 3rd Qu.:3.000 & 3rd Qu.:1.0000 & 3rd Qu.: 2.000 & 3rd Qu.: 0.045 \\
## X.5 & Max. :2.730e+09 & Max. :389760 & Max. :5.000 & Max. :5.400 & Max. :1.0000 & Max. :175.000 & Max. : 1.560 \\
## X.6 & & & & & & & NA's :15917 \\
## \hline
## \end{tabular}
## \end{table}
library(fixest)
##
## Attaching package: 'fixest'
## The following object is masked from 'package:scales':
##
## pvalue
# Estimate the fixed-effects regression model
model_feols <- feols(sale_amount ~ land_square_footage + bedrooms_all_buildings +
number_of_bathrooms + AC_presence + stories_number +
GDP_growth_lag | fips_county_code + year,
data = Housing_GDP,
cluster = ~abbr)
## NOTE: 15,917 observations removed because of NA values (RHS: 15,917).
# View the summary of the regression results
summary(model_feols)
## OLS estimation, Dep. Var.: sale_amount
## Observations: 2,233,340
## Fixed-effects: fips_county_code: 468, year: 12
## Standard-errors: Clustered (abbr)
## Estimate Std. Error t value Pr(>|t|)
## land_square_footage 0.296597 0.099104 2.992801 0.0061435 **
## bedrooms_all_buildings -14517.253156 6853.935549 -2.118090 0.0442839 *
## number_of_bathrooms 174289.500895 53157.694170 3.278726 0.0030619 **
## AC_presence -33510.463317 14918.901923 -2.246175 0.0337724 *
## stories_number 11498.650660 11128.646627 1.033248 0.3113803
## GDP_growth_lag 43328.212458 81245.945640 0.533297 0.5985377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 2,098,368.3 Adj. R2: 0.019654
## Within R2: 0.003467
# Load texreg for creating a LaTeX-style formatted table
library(texreg)
## Version: 1.39.4
## Date: 2024-07-23
## Author: Philip Leifeld (University of Manchester)
##
## Consider submitting praise using the praise or praise_interactive functions.
## Please cite the JSS article in your publications -- see citation("texreg").
##
## Attaching package: 'texreg'
## The following object is masked from 'package:tidyr':
##
## extract
# Create a nicely formatted table using texreg
screenreg(model_feols, digits = 3)
##
## =============================================
## Model 1
## ---------------------------------------------
## land_square_footage 0.297 **
## (0.099)
## bedrooms_all_buildings -14517.253 *
## (6853.936)
## number_of_bathrooms 174289.501 **
## (53157.694)
## AC_presence -33510.463 *
## (14918.902)
## stories_number 11498.651
## (11128.647)
## GDP_growth_lag 43328.212
## (81245.946)
## ---------------------------------------------
## Num. obs. 2233340
## Num. groups: fips_county_code 468
## Num. groups: year 12
## R^2 (full model) 0.020
## R^2 (proj model) 0.003
## Adj. R^2 (full model) 0.020
## Adj. R^2 (proj model) 0.003
## =============================================
## *** p < 0.001; ** p < 0.01; * p < 0.05