# Load housing data
housingdata <- readRDS("C:/Users/mohan/Dropbox/Mohan_files/530/PS2/testdata20250121.rds")
I am interested in estimating the causal effect of economic policy uncertainty (D) on housing prices (Y) at the county level. Specifically, I examine whether increases in state-level EPU lead to changes in housing prices due to increased economic uncertainty, which may affects consumer confidence, investment decisions, and mortgage lending.
Treatment Variable (D): Economic Policy Uncertainty (EPU) https://www.policyuncertainty.com/state_epu.html Outcome Variable (Y): housing prices (sale amount)
motivation: Economic policy uncertainty (EPU) can affect housing prices in two opposing ways. On one hand, higher EPU typically decreases housing prices by reducing buyer confidence, tightening credit markets, discouraging real estate investment, and causing stock market spillovers that lower household wealth and demand for housing. On the other hand, in some cases, EPU may increase housing prices if real estate is seen as a safe investment during uncertainty, if sellers delay listings due to expected policy changes, or if government stimulus (such as lower interest rates) boosts demand. While the negative effect is generally more dominant, the net impact depends on the broader economic and policy context.
CAUSAL PATHS
D → Y
D <- X → Y (backdoor path with confounder X)
D → Z <- Y (backdoor path with collider Z)
LINKS
X → D: Broader macroeconomic conditions (e.g., inflation, GDP growth, financial crises) influence economic policy uncertainty by shaping government policies, market expectations, and regulatory actions.
X → Y: The same macroeconomic conditions directly affect housing prices through interest rates, employment levels, and overall market stability.
D → Y: Economic policy uncertainty still impacts housing prices via multiple channels like credit availability, investment decisions, consumer confidence, and government responses.
D → Z: Higher Economic Policy Uncertainty (D) can increase negative media coverage and pessimism about the housing market. Uncertainty leads to more news articles, expert opinions, and consumer worries about potential housing market downturns.
Y → Z: When Housing Prices (Y) fall or rise dramatically, it generates media attention and influences public sentiment about the market.
Macroeconomic Conditions (X) is a confounder because it affects both the independent variable (D) and the dependent variable (Y).
Housing Market Sentiment (Z) is a collider because it is influenced by both D (Economic Policy Uncertainty) and Y (Housing Prices).
\[ Y_{it} = \beta_0 + \beta_1 D_{it} + \beta_2 X_{it} + \epsilon_{it} \] To correctly estimate the causal effect of D on Y, we do not include Z in the regression.
I use Real Gross Domestic Product (GDP) to represent Macroeconomic Conditions (X). https://www.bea.gov/data/gdp/gdp-state?utm_source=chatgpt.com I use EPU_Composite to represent Economic Policy Uncertainty (D). I use sale amount to represent Housing Prices (Y).
I will do state-level regression.
#Y
housingdata <- housingdata %>%
mutate(
sale_date = as.Date(sale_date, format = "%Y%m%d"),
year = year(sale_date),
month = month(sale_date),
day = day(sale_date)
)
Y <- housingdata %>%
group_by(abbr,year, month) %>%
summarise(sum_sale_amount = sum(sale_amount, na.rm = TRUE), .groups = "drop")
#D
library(readxl)
library(tidyr)
library(dplyr)
file_path <- "C:/Users/mohan/Dropbox/Mohan_files/530/PS2/State_Policy_Uncertainty.xlsx"
df <- read_excel(file_path)
df_D <- df %>%
pivot_longer(
cols = starts_with("EPU_Composite"),
names_to = "abbr",
values_to = "EPU_Composite"
) %>%
mutate(
abbr = gsub("EPU_Composite", "", abbr)
) %>%
select(year, month, abbr, EPU_Composite) %>%
filter(year >= 2008 & year <= 2019)
#X
file_path <- "C:/Users/mohan/Dropbox/Mohan_files/530/PS2/Table.csv"
df <- read_csv(file_path, skip = 3)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 183 Columns: 71
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): GeoFips, GeoName, Description
## dbl (68): LineCode, 2008:Q1, 2008:Q2, 2008:Q3, 2008:Q4, 2009:Q1, 2009:Q2, 20...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Filter rows where LineCode == 1 (Real GDP)
df_filtered <- df %>%
filter(LineCode == 1)
df_filtered <- df_filtered %>%
select(GeoName, starts_with("2008:Q"), starts_with("2009:Q"),
starts_with("2010:Q"), starts_with("2011:Q"), starts_with("2012:Q"),
starts_with("2013:Q"), starts_with("2014:Q"), starts_with("2015:Q"),
starts_with("2016:Q"), starts_with("2017:Q"), starts_with("2018:Q"),
starts_with("2019:Q"))
df_long <- df_filtered %>%
pivot_longer(
cols = -GeoName,
names_to = "quarter",
values_to = "real_GDP"
)
df_long <- df_long %>%
mutate(
year = as.numeric(substr(quarter, 1, 4)), # Extract year
start_month = case_when(
grepl("Q1", quarter) ~ 1,
grepl("Q2", quarter) ~ 4,
grepl("Q3", quarter) ~ 7,
grepl("Q4", quarter) ~ 10
)
) %>%
select(GeoName, year, start_month, real_GDP)
df_long <- df_long %>%
group_by(GeoName, year, real_GDP) %>%
expand_grid(month = 0:2) %>% # Expanding each quarter to three months
mutate(month = start_month + month) %>%
select(GeoName, year, month, real_GDP) %>%
ungroup()
state_abbr <- c(
"Alabama" = "AL", "Alaska" = "AK", "Arizona" = "AZ", "Arkansas" = "AR", "California" = "CA",
"Colorado" = "CO", "Connecticut" = "CT", "Delaware" = "DE", "District of Columbia" = "DC",
"Florida" = "FL", "Georgia" = "GA", "Hawaii" = "HI", "Idaho" = "ID", "Illinois" = "IL",
"Indiana" = "IN", "Iowa" = "IA", "Kansas" = "KS", "Kentucky" = "KY", "Louisiana" = "LA",
"Maine" = "ME", "Maryland" = "MD", "Massachusetts" = "MA", "Michigan" = "MI", "Minnesota" = "MN",
"Mississippi" = "MS", "Missouri" = "MO", "Montana" = "MT", "Nebraska" = "NE", "Nevada" = "NV",
"New Hampshire" = "NH", "New Jersey" = "NJ", "New Mexico" = "NM", "New York" = "NY", "North Carolina" = "NC",
"North Dakota" = "ND", "Ohio" = "OH", "Oklahoma" = "OK", "Oregon" = "OR", "Pennsylvania" = "PA",
"Rhode Island" = "RI", "South Carolina" = "SC", "South Dakota" = "SD", "Tennessee" = "TN", "Texas" = "TX",
"Utah" = "UT", "Vermont" = "VT", "Virginia" = "VA", "Washington" = "WA", "West Virginia" = "WV",
"Wisconsin" = "WI", "Wyoming" = "WY"
)
df_long <- df_long %>%
mutate(abbr = state_abbr[GeoName]) %>%
select(abbr, year, month, real_GDP)
df_long <- df_long %>%
filter(!is.na(abbr))
X <- df_long
merge_YDX1 <- merge(Y, df_D, by = c("abbr", "year", "month"), all.x = TRUE)
merge_YDX <- merge(merge_YDX1, X, by = c("abbr", "year", "month"), all.x = TRUE)
head(merge_YDX)
## abbr year month sum_sale_amount EPU_Composite real_GDP
## 1 AL 2008 1 10725984 361.9834 202569.5
## 2 AL 2008 10 8550649 418.2671 195912.7
## 3 AL 2008 11 9968059 431.8193 195912.7
## 4 AL 2008 12 5173831 302.5133 195912.7
## 5 AL 2008 2 4966820 167.7441 202569.5
## 6 AL 2008 3 13539832 149.0655 202569.5
state_epu_means <- merge_YDX %>%
group_by(abbr, year) %>%
summarise(mean_EPU = mean(EPU_Composite, na.rm = TRUE), .groups = "drop")
head(state_epu_means)
## # A tibble: 6 × 3
## abbr year mean_EPU
## <chr> <int> <dbl>
## 1 AL 2008 240.
## 2 AL 2009 224.
## 3 AL 2010 195.
## 4 AL 2011 205.
## 5 AL 2012 162.
## 6 AL 2013 130.
state_name_mapping <- data.frame(
abbr = state.abb,
state = state.name
)
state_epu_means <- state_epu_means %>%
left_join(state_name_mapping, by = "abbr") %>%
rename(state = state) %>%
select(state, year, mean_EPU) # Ensure correct column names
head(state_epu_means)
## # A tibble: 6 × 3
## state year mean_EPU
## <chr> <int> <dbl>
## 1 Alabama 2008 240.
## 2 Alabama 2009 224.
## 3 Alabama 2010 195.
## 4 Alabama 2011 205.
## 5 Alabama 2012 162.
## 6 Alabama 2013 130.
plot_epu_map <- function(year) {
data_year <- state_epu_means %>% filter(year == !!year)
if (nrow(data_year) == 0) {
warning(paste("No data available for year", year))
return(NULL)
}
# Plot the map
plot <- plot_usmap(data = data_year, values = "mean_EPU", regions = "states") +
scale_fill_continuous(low = "lightblue", high = "darkblue", name = "Mean EPU") +
theme_minimal() +
labs(title = paste("State-Level Mean EPU in", year))
return(plot)
}
# Generate maps for all years (2008-2019)
for (yr in unique(state_epu_means$year)) {
print(plot_epu_map(yr))
}
# iii
library(dplyr)
library(usmap)
library(ggplot2)
state_sale_means <- merge_YDX %>%
group_by(abbr, year) %>%
summarise(mean_sale = mean(sum_sale_amount, na.rm = TRUE), .groups = "drop")
head(state_sale_means)
## # A tibble: 6 × 3
## abbr year mean_sale
## <chr> <int> <dbl>
## 1 AL 2008 11970417.
## 2 AL 2009 9779739.
## 3 AL 2010 9912422.
## 4 AL 2011 6845122
## 5 AL 2012 6695630.
## 6 AL 2013 8509559.
state_name_mapping <- data.frame(
abbr = state.abb,
state = state.name
)
state_name_mapping <- rbind(state_name_mapping, data.frame(abbr = "DC", state = "District of Columbia"))
# Merge full state names into state_sale_means
state_sale_means <- state_sale_means %>%
left_join(state_name_mapping, by = "abbr") %>%
rename(state = state) %>%
select(state, year, mean_sale) # Ensure correct column names
head(state_sale_means)
## # A tibble: 6 × 3
## state year mean_sale
## <chr> <int> <dbl>
## 1 Alabama 2008 11970417.
## 2 Alabama 2009 9779739.
## 3 Alabama 2010 9912422.
## 4 Alabama 2011 6845122
## 5 Alabama 2012 6695630.
## 6 Alabama 2013 8509559.
plot_sale_map <- function(year) {
data_year <- state_sale_means %>% filter(year == !!year)
if (nrow(data_year) == 0) {
warning(paste("No data available for year", year))
return(NULL)
}
# Plot the map
plot <- plot_usmap(data = data_year, values = "mean_sale", regions = "states") +
scale_fill_continuous(low = "lightblue", high = "darkblue", name = "Mean Sales") +
theme_minimal() +
labs(title = paste("State-Level Mean Sales in", year))
return(plot)
}
# Generate maps for all years (2008-2019)
for (yr in unique(state_sale_means$year)) {
print(plot_sale_map(yr))
}
# iv
library(stargazer)
library(xtable)
## Warning: 程序包'xtable'是用R版本4.4.1 来建造的
library(kableExtra)
library(dplyr)
summary_data <- merge_YDX %>%
select(sum_sale_amount, real_GDP, EPU_Composite)
summary(summary_data)
## sum_sale_amount real_GDP EPU_Composite
## Min. :8.000e+03 Min. : 87746 Min. : 16.07
## 1st Qu.:5.943e+06 1st Qu.: 195019 1st Qu.:100.64
## Median :4.055e+07 Median : 353956 Median :137.34
## Mean :2.950e+08 Mean : 556766 Mean :153.75
## 3rd Qu.:1.851e+08 3rd Qu.: 652759 3rd Qu.:192.09
## Max. :1.357e+10 Max. :3024676 Max. :852.72
reg_model <- lm(sum_sale_amount ~ EPU_Composite + real_GDP, data = merge_YDX)
summary(reg_model)
##
## Call:
## lm(formula = sum_sale_amount ~ EPU_Composite + real_GDP, data = merge_YDX)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.454e+09 -2.102e+08 5.137e+07 1.571e+08 1.322e+10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.465e+08 2.620e+07 -9.409 <2e-16 ***
## EPU_Composite -7.222e+04 1.414e+05 -0.511 0.61
## real_GDP 9.926e+02 1.958e+01 50.688 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 638400000 on 3516 degrees of freedom
## Multiple R-squared: 0.4225, Adjusted R-squared: 0.4222
## F-statistic: 1286 on 2 and 3516 DF, p-value: < 2.2e-16