# Load packages
library(pacman)
## Warning: package 'pacman' was built under R version 4.4.2
p_load(tidyverse, lubridate, usmap, gridExtra, stringr, readxl, plot3D,
cowplot, reshape2, scales, broom, data.table, ggplot2, stargazer,
foreign, ggthemes, ggforce, ggridges, latex2exp, viridis, extrafont,
kableExtra, snakecase, janitor)
# Load housing data
housingdata <- readRDS("testdata20250121.RDS")

P1

i.

Research question: Does GDP growth affect housing prices? Treatment variable is real county-level GDP growth in percentage, and the outcome variable is housing sale price.

Understanding the link between economic growth and housing prices is critical for policymakers, investors, and homeowners. Economic expansion can increase income levels, employment, and demand for housing, driving up prices. However, supply constraints, interest rates, and migration patterns may moderate or amplify these effects.Housing markets typically respond to local economic conditions such as employment, local industries, and neighborhood-level economic factors, and county GDP capture more relevant variations in economic activity that impact housing prices.

ii.

library(ggdag)
## Warning: package 'ggdag' was built under R version 4.4.2
## 
## Attaching package: 'ggdag'
## The following object is masked from 'package:stats':
## 
##     filter
library(dagitty)
## Warning: package 'dagitty' was built under R version 4.4.2
# Define the DAG
dag <- dagitty("
dag {
  county_GDP_Growth -> Housing_Prices
  county_GDP_Growth -> Income -> Housing_Prices
  Population_Growth -> county_GDP_Growth
  Population_Growth -> Housing_Prices
  Interest_Rates -> Housing_Prices
  Labor_Market_Conditions -> county_GDP_Growth
  Labor_Market_Conditions -> Housing_Prices
  Housing_Supply -> Housing_Prices
  Housing_Prices -> county_GDP_Growth  
}")

ggdag(dag, layout = "tree", text = FALSE) +  # Use a layout algorithm
  theme_minimal() +
  geom_dag_text(aes(label = name), size = 2, color = "white", nudge_x = 0, nudge_y = 0) +
  ggtitle("Causal DAG for State GDP Growth and Housing Prices") +
  theme(plot.title = element_text(size = 10))

Population, interest rates, and labor supply are confounders since they can influence both GDP growth and housing prices. Higher GDP can be linked with increased income, which affects housing demand and prices.At the same time, reverse causality is a concern because while higher GDP can drive higher housing prices, high housing sales also drive GDP.

Identification:

sale_amount ~ land_square_footage + bedrooms_all_buildings + number_of_bathrooms + AC_presence + stories_number + lagged_county_GDP_growth | fips_county_code+year

P2

i.

housingdata$sale_date <- as.character(housingdata$sale_date)

# Extract year, month, and day
housingdata$year <- substr(housingdata$sale_date, 1, 4)
housingdata$month <- substr(housingdata$sale_date, 5, 6)
housingdata$day <- substr(housingdata$sale_date, 7, 8)

housing_dates <- housingdata %>% select(c("sale_date", "year", "month", "day"))

head(housing_dates, 10)
##     sale_date   year  month    day
##        <char> <char> <char> <char>
##  1:  20150129   2015     01     29
##  2:  20090904   2009     09     04
##  3:  20130228   2013     02     28
##  4:  20080825   2008     08     25
##  5:  20140225   2014     02     25
##  6:  20180111   2018     01     11
##  7:  20150408   2015     04     08
##  8:  20150623   2015     06     23
##  9:  20090528   2009     05     28
## 10:  20120131   2012     01     31
housingdata$year <- as.numeric(housingdata$year)
housingdata$year_built <- as.numeric(housingdata$year_built)

summary(housingdata$year)      
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2008    2012    2015    2015    2017    2019
summary(housingdata$year_built) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1700    1965    1986    1981    2001    2019
quantile(housingdata$year_built, probs = seq(0, 1, 0.25)) 
##   0%  25%  50%  75% 100% 
## 1700 1965 1986 2001 2019
quantile(housingdata$year, probs = seq(0, 1, 0.25)) 
##   0%  25%  50%  75% 100% 
## 2008 2012 2015 2017 2019
quantiles <- quantile(housingdata$year_built, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)

housingdata$year_built_grp <- cut(housingdata$year_built, 
                         breaks = quantiles, 
                         labels = c("1", "2", "3", "4"), 
                         include.lowest = TRUE)

table(housingdata$year_built_grp)
## 
##      1      2      3      4 
## 588505 622996 565824 573223
remove_top_1_percent <- function(x) {
  threshold <- quantile(x, probs = 0.99, na.rm = TRUE)  # Compute 99th percentile
  x[x >= threshold] <- NA  # Replace outliers with NA
  return(x)
}

# Apply the function to the specified variables
cleanhousingdata <- housingdata %>%
  mutate(across(c(bedrooms_all_buildings, number_of_bathrooms, number_of_fireplaces, land_square_footage), remove_top_1_percent)) %>%
  drop_na()  # Remove rows with NA values

summary(cleanhousingdata)
##   sale_amount         sale_date               x                 y        
##  Min.   :1.000e+02   Length:2249257     Min.   :-124.57   Min.   :26.32  
##  1st Qu.:1.770e+05   Class :character   1st Qu.:-118.32   1st Qu.:33.51  
##  Median :2.800e+05   Mode  :character   Median :-104.77   Median :34.35  
##  Mean   :4.142e+05                      Mean   :-102.84   Mean   :35.85  
##  3rd Qu.:4.550e+05                      3rd Qu.: -84.55   3rd Qu.:38.33  
##  Max.   :2.730e+09                      Max.   : -73.91   Max.   :48.97  
##  fips_county_code   year_built   bedrooms_all_buildings number_of_bathrooms
##  Min.   : 1101    Min.   :1700   Min.   :1.000          Min.   :0.010      
##  1st Qu.: 6081    1st Qu.:1965   1st Qu.:3.000          1st Qu.:2.000      
##  Median :12091    Median :1985   Median :3.000          Median :2.000      
##  Mean   :20499    Mean   :1981   Mean   :3.366          Mean   :2.402      
##  3rd Qu.:40101    3rd Qu.:2001   3rd Qu.:4.000          3rd Qu.:3.000      
##  Max.   :53071    Max.   :2019   Max.   :5.000          Max.   :5.400      
##  number_of_fireplaces stories_number    land_square_footage     abbr          
##  Min.   :1.000        Min.   :  0.500   Min.   :     1      Length:2249257    
##  1st Qu.:1.000        1st Qu.:  1.000   1st Qu.:  6764      Class :character  
##  Median :1.000        Median :  1.000   Median :  9309      Mode  :character  
##  Mean   :1.084        Mean   :  1.361   Mean   : 20019                        
##  3rd Qu.:1.000        3rd Qu.:  2.000   3rd Qu.: 15899                        
##  Max.   :2.000        Max.   :175.000   Max.   :389760                        
##   AC_presence          year         month               day           
##  Min.   :0.0000   Min.   :2008   Length:2249257     Length:2249257    
##  1st Qu.:0.0000   1st Qu.:2012   Class :character   Class :character  
##  Median :1.0000   Median :2015   Mode  :character   Mode  :character  
##  Mean   :0.6097   Mean   :2015                                        
##  3rd Qu.:1.0000   3rd Qu.:2017                                        
##  Max.   :1.0000   Max.   :2019                                        
##  year_built_grp
##  1:568753      
##  2:602880      
##  3:540377      
##  4:537247      
##                
## 
hpi <- read_excel("hpi_po_us_and_census.xls")

cleanhpi <- hpi %>%
filter(division!= "USA")

start_year <- min(housingdata$year, na.rm = TRUE)

cleanhpi <- cleanhpi %>%
  group_by(division) %>%
  mutate(base_index = index_po_seasonally_adjusted[year == start_year & qtr == 1],
         hpi = (index_po_seasonally_adjusted / base_index) * 100) %>%
  ungroup()

head(cleanhpi)
## # A tibble: 6 × 7
##   division  year   qtr index_po_not_seasonal…¹ index_po_seasonally_…² base_index
##   <chr>    <dbl> <dbl>                   <dbl>                  <dbl>      <dbl>
## 1 DV_ENC    1991     1                    100                    100        180.
## 2 DV_ENC    1991     2                    101.                   101.       180.
## 3 DV_ENC    1991     3                    102.                   101.       180.
## 4 DV_ENC    1991     4                    103.                   102.       180.
## 5 DV_ENC    1992     1                    104.                   104.       180.
## 6 DV_ENC    1992     2                    105.                   105.       180.
## # ℹ abbreviated names: ¹​index_po_not_seasonally_adjusted,
## #   ²​index_po_seasonally_adjusted
## # ℹ 1 more variable: hpi <dbl>
cleanhousingdata <- cleanhousingdata %>%
 mutate(
  qtr = case_when(
    month %in% c("01", "02", "03") ~ 1,  # First quarter (January to March)
    month %in% c("04", "05", "06") ~ 2,  # Second quarter (April to June)
    month %in% c("07", "08", "09") ~ 3,  # Third quarter (July to September)
    month %in% c("10", "11", "12") ~ 4   # Fourth quarter (October to December)
  ),
    division = case_when(
      abbr %in% c("CT", 'ME', 'MA', 'NH', 'RO', 'VT') ~ "DV_NE",
      abbr %in% c("IN", 'IL', 'MI', 'OH', 'WI') ~ "DV_ENC",
      abbr %in% c("ND", 'SD', 'NE', 'KS', 'MN', 'IA', 'MO') ~ "DV_WNC",
      abbr %in% c("MT", 'ID', 'WY', 'NV', 'AZ', 'NM', 'CO','UT') ~ "DV_MT",
      abbr %in% c("WA", 'OR', 'CA', 'AK', 'HI') ~ "DV_PAC",
      abbr %in% c("NY", 'PA', 'NJ') ~ "DV_MA",
      abbr %in% c("WV", 'MD', 'DE', 'VA', 'NC', 'SC','GA','FL','DC') ~ "DV_SA",
      abbr %in% c("KY", 'TN', 'MS', 'AL', 'MN', 'IA', 'MO') ~ "DV_ESC",
      abbr %in% c("OK", 'TX', 'AR', 'LA') ~ "DV_WSC"
    )
  )
cleanhousingHPI <- cleanhousingdata %>% 
left_join(cleanhpi, by = c('division', 'year', 'qtr')) %>%
mutate(dlog_sale_amount = log(sale_amount * 100 / hpi))


head(cleanhousingHPI)
##    sale_amount sale_date         x        y fips_county_code year_built
##          <num>    <char>     <num>    <num>            <int>      <num>
## 1:      104000  20150129 -86.19826 32.41438             1101       2009
## 2:      180501  20090904 -86.20467 32.32170             1101       1986
## 3:      140000  20130228 -86.13278 32.34974             1101       2006
## 4:      245916  20080825 -86.17190 32.40677             1101       2000
## 5:       60000  20140225 -86.27811 32.39384             1101       1940
## 6:      108150  20180111 -86.12927 32.34984             1101       2006
##    bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##                     <int>               <num>                <int>
## 1:                      3                 2.0                    1
## 2:                      3                 2.0                    1
## 3:                      3                 2.0                    1
## 4:                      3                 2.0                    1
## 5:                      2                 1.5                    1
## 6:                      3                 2.0                    1
##    stories_number land_square_footage   abbr AC_presence  year  month    day
##             <num>               <int> <char>       <num> <num> <char> <char>
## 1:              1               12855     AL           0  2015     01     29
## 2:              1               16583     AL           0  2009     09     04
## 3:              1                6216     AL           0  2013     02     28
## 4:              1                8372     AL           0  2008     08     25
## 5:              1               21928     AL           0  2014     02     25
## 6:              1                4713     AL           0  2018     01     11
##    year_built_grp   qtr division index_po_not_seasonally_adjusted
##            <fctr> <num>   <char>                            <num>
## 1:              4     1   DV_ESC                           193.48
## 2:              2     3   DV_ESC                           187.69
## 3:              4     1   DV_ESC                           181.11
## 4:              3     3   DV_ESC                           193.03
## 5:              1     1   DV_ESC                           186.10
## 6:              4     1   DV_ESC                           224.79
##    index_po_seasonally_adjusted base_index       hpi dlog_sale_amount
##                           <num>      <num>     <num>            <num>
## 1:                       195.20     195.53  99.83123         11.55384
## 2:                       186.02     195.53  95.13630         12.15335
## 3:                       183.18     195.53  93.68383         11.91464
## 4:                       191.54     195.53  97.95939         12.43336
## 5:                       187.99     195.53  96.14381         11.04142
## 6:                       226.27     195.53 115.72137         11.44526
county_GDP <- read.csv("CAGDP9__ALL_AREAS_2001_2023.csv") # downloaded from https://www.bea.gov/data/gdp/gdp-county-metro-and-other-areas
head(county_GDP)
##   GeoFIPS         GeoName Region TableName LineCode IndustryClassification
## 1   00000 United States *     NA    CAGDP9        1                    ...
## 2   00000 United States *     NA    CAGDP9        2                    ...
## 3   00000 United States *     NA    CAGDP9        3                     11
## 4   00000 United States *     NA    CAGDP9        6                     21
## 5   00000 United States *     NA    CAGDP9       10                     22
## 6   00000 United States *     NA    CAGDP9       11                     23
##                                        Description
## 1                              All industry total 
## 2                              Private industries 
## 3      Agriculture, forestry, fishing and hunting 
## 4   Mining, quarrying, and oil and gas extraction 
## 5                                       Utilities 
## 6                                    Construction 
##                                Unit       X2001       X2002       X2003
## 1 Thousands of chained 2017 dollars 14230726000 14472712000 14877312000
## 2 Thousands of chained 2017 dollars 12041461000 12245105000 12616710000
## 3 Thousands of chained 2017 dollars   127095000   131323000   141451000
## 4 Thousands of chained 2017 dollars   175810000   181209000   159545000
## 5 Thousands of chained 2017 dollars   233878000   239465000   236504000
## 6 Thousands of chained 2017 dollars   909395000   878484000   896733000
##         X2004       X2005       X2006       X2007       X2008       X2009
## 1 15449757000 15987957000 16433148000 16762445000 16781485000 16349110000
## 2 13159877000 13667016000 14095064000 14398387000 14372138000 13941688000
## 3   153419000   159554000   161687000   144468000   144066000   159975000
## 4   159750000   161563000   186486000   201455000   194981000   224143000
## 5   251198000   239788000   254326000   260531000   276326000   256992000
## 6   930317000   934014000   913050000   888205000   801629000   686678000
##         X2010       X2011       X2012       X2013       X2014       X2015
## 1 16789750000 17052410000 17442759000 17812167000 18261714000 18799622000
## 2 14366648000 14634972000 15045864000 15417297000 15860786000 16394174000
## 3   155952000   150224000   145700000   160433000   158986000   171593000
## 4   199079000   207047000   232219000   241753000   267428000   285933000
## 5   290760000   301868000   306229000   304002000   292730000   296934000
## 6   656758000   648944000   670176000   697567000   721220000   764407000
##         X2016       X2017       X2018       X2019       X2020       X2021
## 1 19141672000 19612102000 20193896000 20715671000 20267585000 21494798000
## 2 16710707000 17156255000 17711775000 18216263000 17789792000 19004921000
## 3   181537000   176840000   184105000   172480000   174902000   185699000
## 4   261614000   267302000   277013000   312457000   304491000   277667000
## 5   313557000   313711000   309269000   312519000   331357000   321279000
## 6   804359000   840220000   863755000   882288000   862678000   887608000
##         X2022       X2023
## 1 22034828000 22671096000
## 2 19506244000 20092919000
## 3   185649000   192667000
## 4   251747000   336302000
## 5   326953000   342977000
## 6   839967000   821062000
colnames(county_GDP) <- gsub("^X", "", colnames(county_GDP))
unique(county_GDP$Description)
##  [1] "All industry total "                                                         
##  [2] " Private industries "                                                        
##  [3] "  Agriculture, forestry, fishing and hunting "                               
##  [4] "  Mining, quarrying, and oil and gas extraction "                            
##  [5] "  Utilities "                                                                
##  [6] "  Construction "                                                             
##  [7] "  Manufacturing "                                                            
##  [8] "   Durable goods manufacturing "                                             
##  [9] "   Nondurable goods manufacturing "                                          
## [10] "  Wholesale trade "                                                          
## [11] "  Retail trade "                                                             
## [12] "  Transportation and warehousing "                                           
## [13] "  Information "                                                              
## [14] "  Finance, insurance, real estate, rental, and leasing "                     
## [15] "   Finance and insurance "                                                   
## [16] "   Real estate and rental and leasing "                                      
## [17] "  Professional and business services "                                       
## [18] "   Professional, scientific, and technical services "                        
## [19] "   Management of companies and enterprises "                                 
## [20] "   Administrative and support and waste management and remediation services "
## [21] "  Educational services, health care, and social assistance "                 
## [22] "   Educational services "                                                    
## [23] "   Health care and social assistance "                                       
## [24] "  Arts, entertainment, recreation, accommodation, and food services "        
## [25] "   Arts, entertainment, and recreation "                                     
## [26] "   Accommodation and food services "                                         
## [27] "  Other services (except government and government enterprises) "            
## [28] "Government and government enterprises "                                      
## [29] "Natural resources and mining "                                               
## [30] "Trade "                                                                      
## [31] "Transportation and utilities "                                               
## [32] "Manufacturing and information "                                              
## [33] "Private goods-producing industries 2/"                                       
## [34] "Private services-providing industries 3/"                                    
## [35] ""
county_GDP_clean <- county_GDP %>%
  select(-c("GeoName", "Region", "TableName", "LineCode", "IndustryClassification", "Unit")) %>%
  filter(Description == "All industry total ") %>%
  select(-Description) %>%
  mutate(across(starts_with("2"), ~ as.numeric(.)))
## Warning: There were 23 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(starts_with("2"), ~as.numeric(.))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 22 remaining warnings.
# Ensure only the year columns are selected for GDP growth calculation
year_columns <- grep("^2", colnames(county_GDP_clean), value = TRUE)  # Columns starting with "2" (e.g., 2013, 2014)

# Identify the county code column 
county_code_column <- "GeoFIPS"  # Adjust if your county code column has a different name

# Create a new dataframe to store the growth values
GDP_growth <- county_GDP_clean[, c(county_code_column, year_columns)]  # Keep county code and year columns

# Loop through the year columns (starting from the second year)
for (i in 2:length(year_columns)) {
  GDP_growth[, paste0("Growth_", year_columns[i])] <- 
    (county_GDP_clean[, year_columns[i]] - county_GDP_clean[, year_columns[i - 1]]) / 
    county_GDP_clean[, year_columns[i - 1]]
}

GDP_growth_only <- GDP_growth[, c(county_code_column, grep("^Growth", colnames(GDP_growth), value = TRUE))]
head(GDP_growth_only)
##   GeoFIPS Growth_2002 Growth_2003 Growth_2004  Growth_2005  Growth_2006
## 1   00000  0.01700447  0.02795606  0.03847772 3.483550e-02  0.027845396
## 2   01000  0.02363979  0.03253245  0.06669167 3.861089e-02  0.013573748
## 3   01001  0.03735550  0.02900563  0.15480200 1.058026e-02  0.067085703
## 4   01003  0.05108749  0.06140797  0.10047300 1.098696e-01  0.028251361
## 5   01005 -0.01714853  0.02810943  0.08378639 6.986655e-05 -0.023382165
## 6   01007  0.01316053  0.04838200  0.04589706 1.718075e-03 -0.006227022
##   Growth_2007   Growth_2008 Growth_2009 Growth_2010  Growth_2011 Growth_2012
## 1  0.02003858  1.135872e-03 -0.02576500  0.02695193  0.015644069  0.02289113
## 2  0.01051212 -5.237230e-03 -0.03371375  0.02710156  0.009615559  0.01143487
## 3  0.01693207 -8.795678e-02  0.02515675  0.07063659  0.060254491  0.15574675
## 4  0.02931997 -3.366169e-02 -0.04876943  0.01452880 -0.013669136  0.01146138
## 5 -0.03267907 -5.981153e-02 -0.02023018  0.01631280 -0.059960213 -0.03387160
## 6  0.06630980  4.437959e-05 -0.02201652  0.08199828  0.017552213 -0.00773134
##    Growth_2013  Growth_2014 Growth_2015 Growth_2016 Growth_2017 Growth_2018
## 1  0.021178301  0.025238198  0.02945550  0.01819451  0.02457622  0.02966505
## 2  0.016140350 -0.004314198  0.01397713  0.01872452  0.01762958  0.01935825
## 3 -0.062798242  0.007175474  0.08348704  0.04554809 -0.04533391  0.01417031
## 4  0.052683661  0.022256467  0.04682811  0.05262911  0.03787070  0.05098802
## 5  0.125377005 -0.049217811 -0.02324537 -0.01319369 -0.01166267  0.01616649
## 6  0.001707844 -0.009117362 -0.03032869  0.01918625  0.01308887 -0.02360224
##   Growth_2019  Growth_2020  Growth_2021  Growth_2022 Growth_2023
## 1  0.02583825 -0.021630291  0.060550529  0.025123753  0.02887556
## 2  0.02021662 -0.013246261  0.051454996  0.020664475  0.02849730
## 3 -0.02658355  0.004002264 -0.006283991  0.092021836  0.02645812
## 4  0.04976492 -0.004406941  0.073623201  0.050441086  0.03174627
## 5 -0.01548427 -0.039285386  0.028927093 -0.011256687 -0.03874069
## 6  0.11114692  0.044329930 -0.001772834 -0.005221433  0.01948757
colnames(GDP_growth_only) <- gsub("^Growth_", "", colnames(GDP_growth_only))
head(GDP_growth_only)
##   GeoFIPS        2002       2003       2004         2005         2006
## 1   00000  0.01700447 0.02795606 0.03847772 3.483550e-02  0.027845396
## 2   01000  0.02363979 0.03253245 0.06669167 3.861089e-02  0.013573748
## 3   01001  0.03735550 0.02900563 0.15480200 1.058026e-02  0.067085703
## 4   01003  0.05108749 0.06140797 0.10047300 1.098696e-01  0.028251361
## 5   01005 -0.01714853 0.02810943 0.08378639 6.986655e-05 -0.023382165
## 6   01007  0.01316053 0.04838200 0.04589706 1.718075e-03 -0.006227022
##          2007          2008        2009       2010         2011        2012
## 1  0.02003858  1.135872e-03 -0.02576500 0.02695193  0.015644069  0.02289113
## 2  0.01051212 -5.237230e-03 -0.03371375 0.02710156  0.009615559  0.01143487
## 3  0.01693207 -8.795678e-02  0.02515675 0.07063659  0.060254491  0.15574675
## 4  0.02931997 -3.366169e-02 -0.04876943 0.01452880 -0.013669136  0.01146138
## 5 -0.03267907 -5.981153e-02 -0.02023018 0.01631280 -0.059960213 -0.03387160
## 6  0.06630980  4.437959e-05 -0.02201652 0.08199828  0.017552213 -0.00773134
##           2013         2014        2015        2016        2017        2018
## 1  0.021178301  0.025238198  0.02945550  0.01819451  0.02457622  0.02966505
## 2  0.016140350 -0.004314198  0.01397713  0.01872452  0.01762958  0.01935825
## 3 -0.062798242  0.007175474  0.08348704  0.04554809 -0.04533391  0.01417031
## 4  0.052683661  0.022256467  0.04682811  0.05262911  0.03787070  0.05098802
## 5  0.125377005 -0.049217811 -0.02324537 -0.01319369 -0.01166267  0.01616649
## 6  0.001707844 -0.009117362 -0.03032869  0.01918625  0.01308887 -0.02360224
##          2019         2020         2021         2022        2023
## 1  0.02583825 -0.021630291  0.060550529  0.025123753  0.02887556
## 2  0.02021662 -0.013246261  0.051454996  0.020664475  0.02849730
## 3 -0.02658355  0.004002264 -0.006283991  0.092021836  0.02645812
## 4  0.04976492 -0.004406941  0.073623201  0.050441086  0.03174627
## 5 -0.01548427 -0.039285386  0.028927093 -0.011256687 -0.03874069
## 6  0.11114692  0.044329930 -0.001772834 -0.005221433  0.01948757
GDP_growth_final <- GDP_growth_only %>%
  pivot_longer(cols = -GeoFIPS,
               names_to = "year",
               values_to = "GDP_GrRt") %>%
  mutate(year = as.numeric(year),
         GeoFIPS = as.numeric(GeoFIPS))
head(cleanhousingHPI)
##    sale_amount sale_date         x        y fips_county_code year_built
##          <num>    <char>     <num>    <num>            <int>      <num>
## 1:      104000  20150129 -86.19826 32.41438             1101       2009
## 2:      180501  20090904 -86.20467 32.32170             1101       1986
## 3:      140000  20130228 -86.13278 32.34974             1101       2006
## 4:      245916  20080825 -86.17190 32.40677             1101       2000
## 5:       60000  20140225 -86.27811 32.39384             1101       1940
## 6:      108150  20180111 -86.12927 32.34984             1101       2006
##    bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##                     <int>               <num>                <int>
## 1:                      3                 2.0                    1
## 2:                      3                 2.0                    1
## 3:                      3                 2.0                    1
## 4:                      3                 2.0                    1
## 5:                      2                 1.5                    1
## 6:                      3                 2.0                    1
##    stories_number land_square_footage   abbr AC_presence  year  month    day
##             <num>               <int> <char>       <num> <num> <char> <char>
## 1:              1               12855     AL           0  2015     01     29
## 2:              1               16583     AL           0  2009     09     04
## 3:              1                6216     AL           0  2013     02     28
## 4:              1                8372     AL           0  2008     08     25
## 5:              1               21928     AL           0  2014     02     25
## 6:              1                4713     AL           0  2018     01     11
##    year_built_grp   qtr division index_po_not_seasonally_adjusted
##            <fctr> <num>   <char>                            <num>
## 1:              4     1   DV_ESC                           193.48
## 2:              2     3   DV_ESC                           187.69
## 3:              4     1   DV_ESC                           181.11
## 4:              3     3   DV_ESC                           193.03
## 5:              1     1   DV_ESC                           186.10
## 6:              4     1   DV_ESC                           224.79
##    index_po_seasonally_adjusted base_index       hpi dlog_sale_amount
##                           <num>      <num>     <num>            <num>
## 1:                       195.20     195.53  99.83123         11.55384
## 2:                       186.02     195.53  95.13630         12.15335
## 3:                       183.18     195.53  93.68383         11.91464
## 4:                       191.54     195.53  97.95939         12.43336
## 5:                       187.99     195.53  96.14381         11.04142
## 6:                       226.27     195.53 115.72137         11.44526
Housing_GDP <- cleanhousingHPI %>%
  mutate(GeoFIPS = fips_county_code) %>%
  left_join(GDP_growth_final, by = c("GeoFIPS", "year"))

ii.

head(Housing_GDP)
##    sale_amount sale_date         x        y fips_county_code year_built
##          <num>    <char>     <num>    <num>            <int>      <num>
## 1:      104000  20150129 -86.19826 32.41438             1101       2009
## 2:      180501  20090904 -86.20467 32.32170             1101       1986
## 3:      140000  20130228 -86.13278 32.34974             1101       2006
## 4:      245916  20080825 -86.17190 32.40677             1101       2000
## 5:       60000  20140225 -86.27811 32.39384             1101       1940
## 6:      108150  20180111 -86.12927 32.34984             1101       2006
##    bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##                     <int>               <num>                <int>
## 1:                      3                 2.0                    1
## 2:                      3                 2.0                    1
## 3:                      3                 2.0                    1
## 4:                      3                 2.0                    1
## 5:                      2                 1.5                    1
## 6:                      3                 2.0                    1
##    stories_number land_square_footage   abbr AC_presence  year  month    day
##             <num>               <int> <char>       <num> <num> <char> <char>
## 1:              1               12855     AL           0  2015     01     29
## 2:              1               16583     AL           0  2009     09     04
## 3:              1                6216     AL           0  2013     02     28
## 4:              1                8372     AL           0  2008     08     25
## 5:              1               21928     AL           0  2014     02     25
## 6:              1                4713     AL           0  2018     01     11
##    year_built_grp   qtr division index_po_not_seasonally_adjusted
##            <fctr> <num>   <char>                            <num>
## 1:              4     1   DV_ESC                           193.48
## 2:              2     3   DV_ESC                           187.69
## 3:              4     1   DV_ESC                           181.11
## 4:              3     3   DV_ESC                           193.03
## 5:              1     1   DV_ESC                           186.10
## 6:              4     1   DV_ESC                           224.79
##    index_po_seasonally_adjusted base_index       hpi dlog_sale_amount GeoFIPS
##                           <num>      <num>     <num>            <num>   <num>
## 1:                       195.20     195.53  99.83123         11.55384    1101
## 2:                       186.02     195.53  95.13630         12.15335    1101
## 3:                       183.18     195.53  93.68383         11.91464    1101
## 4:                       191.54     195.53  97.95939         12.43336    1101
## 5:                       187.99     195.53  96.14381         11.04142    1101
## 6:                       226.27     195.53 115.72137         11.44526    1101
##        GDP_GrRt
##           <num>
## 1:  0.021559187
## 2: -0.045027726
## 3:  0.003560440
## 4: -0.010115864
## 5: -0.008824075
## 6: -0.002555574
state_year_means <- Housing_GDP %>%
  group_by(abbr, year) %>%
  summarize(state_GDP_GrRt = mean(GDP_GrRt, na.rm = TRUE)) %>%
  rename("state" = "abbr") %>%
  ungroup()
## `summarise()` has grouped output by 'abbr'. You can override using the
## `.groups` argument.
ggplot(state_year_means, aes(x = year, y = state_GDP_GrRt, group = state, color = state)) +
  geom_line() +
  theme_minimal() +
  labs(title = "Mean GDP Growth by State and Year",
       x = "Year",
       y = "State Mean GDP Growth",
       color = "State") +
  theme(legend.position = "none")  

plot_usmap(data = state_year_means %>% filter(year == 2013), 
           values = "state_GDP_GrRt", regions = "states") +
  scale_fill_continuous(name = "GDP Growth", low = "blue", high = "red") +
  labs(title = "State-Level GDP Growth (2020)") +
  theme(legend.position = "right")

#install.packages("patchwork")
library(patchwork)  # For arranging multiple plots
## Warning: package 'patchwork' was built under R version 4.4.2
## 
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
## 
##     align_plots
plots <- lapply(unique(state_year_means$year), function(y) {
  plot_usmap(data = state_year_means %>% filter(year == y), values = "state_GDP_GrRt") +
    scale_fill_continuous(name = "GDP Growth", low = "blue", high = "red") +
    labs(title = paste("State-Level GDP Growth in", y))
})

# Arrange the maps side by side
wrap_plots(plots)

GDP growth rates have not been concentrating in specific states over the years.

iii.

state_year_sale <- Housing_GDP %>%
  group_by(abbr, year) %>%
  summarize(state_housing = mean(sale_amount, na.rm = TRUE)) %>%
  rename("state" = "abbr") %>%
  ungroup()
## `summarise()` has grouped output by 'abbr'. You can override using the
## `.groups` argument.
ggplot(state_year_sale, aes(x = year, y = state_housing, group = state, color = state)) +
  geom_line() +
  theme_minimal() +
  labs(title = "Mean housing sale amount by State and Year",
       x = "Year",
       y = "Mean housing sale amount",
       color = "State") +
  theme(legend.position = "none")  

#iiii.

# Create the lagged variable
Housing_GDP <- Housing_GDP %>%
  arrange(fips_county_code, year) %>%  
  group_by(fips_county_code) %>%  
  mutate(GDP_growth_lag = lag(GDP_GrRt, order_by = year)) %>%  
  ungroup()  
# Load the necessary packages
library(xtable)
## 
## Attaching package: 'xtable'
## The following objects are masked from 'package:ggdag':
## 
##     label, label<-
# Assuming your dataset is named `housing_data`
# Create summary statistics table
summary_stats <- summary(Housing_GDP[c("sale_amount", "land_square_footage", "bedrooms_all_buildings", 
                                        "number_of_bathrooms", "AC_presence", "stories_number", 
                                        "GDP_growth_lag")])

# Display the table
xtable(summary_stats, digits = 2)
## % latex table generated in R 4.4.1 by xtable 1.8-4 package
## % Wed Feb 19 11:07:50 2025
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlllllll}
##   \hline
##  &  sale\_amount & land\_square\_footage & bedrooms\_all\_buildings & number\_of\_bathrooms &  AC\_presence & stories\_number & GDP\_growth\_lag \\ 
##   \hline
## X & Min.   :1.000e+02   & Min.   :     1   & Min.   :1.000   & Min.   :0.010   & Min.   :0.0000   & Min.   :  0.500   & Min.   :-0.579   \\ 
##   X.1 & 1st Qu.:1.770e+05   & 1st Qu.:  6764   & 1st Qu.:3.000   & 1st Qu.:2.000   & 1st Qu.:0.0000   & 1st Qu.:  1.000   & 1st Qu.: 0.012   \\ 
##   X.2 & Median :2.800e+05   & Median :  9309   & Median :3.000   & Median :2.000   & Median :1.0000   & Median :  1.000   & Median : 0.029   \\ 
##   X.3 & Mean   :4.142e+05   & Mean   : 20019   & Mean   :3.366   & Mean   :2.402   & Mean   :0.6097   & Mean   :  1.361   & Mean   : 0.028   \\ 
##   X.4 & 3rd Qu.:4.550e+05   & 3rd Qu.: 15899   & 3rd Qu.:4.000   & 3rd Qu.:3.000   & 3rd Qu.:1.0000   & 3rd Qu.:  2.000   & 3rd Qu.: 0.045   \\ 
##   X.5 & Max.   :2.730e+09   & Max.   :389760   & Max.   :5.000   & Max.   :5.400   & Max.   :1.0000   & Max.   :175.000   & Max.   : 1.560   \\ 
##   X.6 &  &  &  &  &  &  & NA's   :15917   \\ 
##    \hline
## \end{tabular}
## \end{table}
library(fixest)
## 
## Attaching package: 'fixest'
## The following object is masked from 'package:scales':
## 
##     pvalue
# Estimate the fixed-effects regression model
model_feols <- feols(sale_amount ~ land_square_footage + bedrooms_all_buildings + 
                     number_of_bathrooms + AC_presence + stories_number + 
                     GDP_growth_lag | fips_county_code + year, 
                     data = Housing_GDP,
                     cluster = ~abbr)
## NOTE: 15,917 observations removed because of NA values (RHS: 15,917).
# View the summary of the regression results
summary(model_feols)
## OLS estimation, Dep. Var.: sale_amount
## Observations: 2,233,340
## Fixed-effects: fips_county_code: 468,  year: 12
## Standard-errors: Clustered (abbr) 
##                             Estimate   Std. Error   t value  Pr(>|t|)    
## land_square_footage         0.296597     0.099104  2.992801 0.0061435 ** 
## bedrooms_all_buildings -14517.253156  6853.935549 -2.118090 0.0442839 *  
## number_of_bathrooms    174289.500895 53157.694170  3.278726 0.0030619 ** 
## AC_presence            -33510.463317 14918.901923 -2.246175 0.0337724 *  
## stories_number          11498.650660 11128.646627  1.033248 0.3113803    
## GDP_growth_lag          43328.212458 81245.945640  0.533297 0.5985377    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 2,098,368.3     Adj. R2: 0.019654
##                     Within R2: 0.003467
# Load texreg for creating a LaTeX-style formatted table
library(texreg)
## Version:  1.39.4
## Date:     2024-07-23
## Author:   Philip Leifeld (University of Manchester)
## 
## Consider submitting praise using the praise or praise_interactive functions.
## Please cite the JSS article in your publications -- see citation("texreg").
## 
## Attaching package: 'texreg'
## The following object is masked from 'package:tidyr':
## 
##     extract
# Create a nicely formatted table using texreg
screenreg(model_feols, digits = 3)
## 
## =============================================
##                                Model 1       
## ---------------------------------------------
## land_square_footage                  0.297 **
##                                     (0.099)  
## bedrooms_all_buildings          -14517.253 * 
##                                  (6853.936)  
## number_of_bathrooms             174289.501 **
##                                 (53157.694)  
## AC_presence                     -33510.463 * 
##                                 (14918.902)  
## stories_number                   11498.651   
##                                 (11128.647)  
## GDP_growth_lag                   43328.212   
##                                 (81245.946)  
## ---------------------------------------------
## Num. obs.                      2233340       
## Num. groups: fips_county_code      468       
## Num. groups: year                   12       
## R^2 (full model)                     0.020   
## R^2 (proj model)                     0.003   
## Adj. R^2 (full model)                0.020   
## Adj. R^2 (proj model)                0.003   
## =============================================
## *** p < 0.001; ** p < 0.01; * p < 0.05