── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Import
df <-read.csv("~/Library/CloudStorage/GoogleDrive-sharmaar@bc.edu/My Drive/Econometrics/hurricane_HomePrices/New Jersey Data Final.csv")vis_dat(df)
Data Cleaning
df$Lag_sale_assessment <-NULL# incorrectly createdtable(df$residential) # only 1
1
8300
df$residential <-NULLtable(df$Time_minus_1==df$Time) # should not be equal
=================================================================
Statistic N Mean St. Dev. Min Max
-----------------------------------------------------------------
Year 8,300 2,012.413 1.286 2,010 2,014
sale_price 8,300 244,653.100 338,970.600 0 5,175,000
Price_Range 8,300 0.633 0.482 0 1
sale_assessment 8,300 357,128.700 410,426.500 0 6,540,600
Sale_Range 8,300 0.940 0.238 0 1
Treated 8,300 0.428 0.338 0.050 0.730
Time 8,300 0.565 0.496 0 1
Interaction 8,300 0.235 0.328 0.000 0.730
Time_Placebo 8,300 0.913 0.283 0 1
Interaction_Placebo 8,300 0.235 0.328 0.000 0.730
-----------------------------------------------------------------
hist(df$sale_price)
boxplot(df$sale_price)
df |> dplyr::filter(sale_price <2000000) |>ggplot(mapping =aes(x = sale_price)) +geom_histogram(binwidth =50000, color ="black", fill ="blue") +labs(title ="Histogram of Sale Prices", x ="Sale Price", y ="Count") +theme_minimal()
hist(df$sale_assessment)
boxplot(df$sale_assessment)
df |> dplyr::filter(sale_assessment <2000000) |>ggplot(mapping =aes(x = sale_assessment)) +geom_histogram(binwidth =50000, color ="black", fill ="blue") +labs(title ="Histogram of Sale Assessment", x ="Sale Assessment", y ="Count") +theme_minimal()
Independent Variable
table(df$Year) # 5 years (2010-2014)
2010 2011 2012 2013 2014
726 1512 1836 2056 2170
table(df$Time) # 3610 (0) vs 4690 (1)
0 1
3610 4690
table(df$Treated) # 3687 (0.05) vs 4613 (0.73)
0.05 0.73
3687 4613
table(df$Interaction)
0 0.05 0.73
3610 2161 2529
Diff in Diff 2 way table
table(df$Treated, df$Time)
0 1
0.05 1526 2161
0.73 2084 2529
# Compute the mean of y for each combination of Treated and Timetapply(df$sale_price, list(df$Treated, df$Time), mean, na.rm =TRUE)
0 1
0.05 201276.2 214105.4
0.73 267001.3 278513.6
Diff in Diff Regression
######### SALE PRICE# BASE REGRESSIONSmod1a <-lm(data = df, formula = sale_price ~ Treated*Time)# BASE REGRESSIONS + CONTROLS (area fixed effects)mod1b <-lm(data = df, formula = sale_price ~ Treated*Time + municipality_name)# BASE REGRESSIONS + CONTROLS (area fixed effects) + Year FEmod1c <-lm(data = df, formula = sale_price ~ Treated*Time + municipality_name +as.factor(Year))######### SALE ASSESSMENT# BASE REGRESSIONSmod2a <-lm(data = df, formula = sale_assessment ~ Treated*Time)# BASE REGRESSIONS + CONTROLS (area fixed effects)mod2b <-lm(data = df, formula = sale_assessment ~ Treated*Time + municipality_name)# BASE REGRESSIONS + CONTROLS (area fixed effects) + Year FEmod2c <-lm(data = df, formula = sale_assessment ~ Treated*Time + municipality_name +as.factor(Year))# OMITstargazer(mod1a, mod1b, mod2a, mod2b,type ="text",keep =c("Treated", "Time", "Treated:Time", "Constant" ),add.lines=list(c('Entity Fixed effects', "No","Yes", "No","Yes")) )
Try robustness test by removing outliers to independent variable of sale assessment.
Parallel trends charts
# Calculate average sale assessment by Treated status and Yearaverage_data <- df %>%group_by(Treated, Year) %>%summarize(avg_sale_assessment =mean(sale_assessment, na.rm =TRUE),.groups ="drop" )# Plot the parallel trendsggplot(data = average_data, mapping =aes(x = Year, y = avg_sale_assessment, color =as.factor(Treated), group = Treated ) ) +geom_line(size =1) +geom_point(size =2) +labs(title ="Parallel Trends: Sale Assessment Over Time",x ="Year",y ="Average Sale Assessment",color ="Treated" ) +theme_minimal() +theme(legend.position ="bottom")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Treatment from 2012 Q4 onwards??? If so, use quarter instead.
# Calculate average sale assessment by Treated status and Yearaverage_data <- df %>%group_by(Treated, Quarter) %>%summarize(avg_sale_assessment =mean(sale_assessment, na.rm =TRUE),.groups ="drop" )# Plot the parallel trendsggplot(data = average_data, mapping =aes(x = Quarter, y = avg_sale_assessment, color =as.factor(Treated), group = Treated ) ) +geom_line(size =1) +geom_point(size =2) +labs(title ="Parallel Trends: Sale Assessment Over Time",x ="Year",y ="Average Sale Assessment",color ="Treated" ) +theme_minimal() +theme(legend.position ="bottom")
Event Study
Step 1: Prepare the Data
The key is to calculate relative time periods (time to treatment) for each observation in your data. This is critical for an event study.
This variable creates relative_time as the difference in years from 2012Q4. Negative values are pre-treatment periods, and positive values are post-treatment periods.
Explanation
strsplit(): Splits the Quarter column (e.g., "2010-Q3") into a list of ["2010", "Q3"].
sapply(..., \[, 1): Extracts the first part of the split (year).
gsub("Q", "", ...): Removes the “Q” character from the second part of the split (e.g., "Q3" becomes "3").
as.numeric(): Converts the extracted strings to numeric values for calculations.
Step 2: Estimate the Event Study Model
Use the lm function to estimate dynamic treatment effects by including interaction terms for Treated and relative_time:
Create a chart to visualize the dynamic treatment effects.
ggplot(event_coefficients, aes(x = relative_time, y = estimate)) +geom_point(size =2, color ="blue") +geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width =0.2, color ="blue") +geom_hline(yintercept =0, linetype ="dashed", color ="black") +# Zero effect linegeom_vline(xintercept =0, linetype ="dashed", color ="red") +# Shock periodlabs(title ="Event Study: Impact of Shock on Sale Assessment",x ="Relative Time (Quarters)",y ="Estimated Effect on Sale Assessment" ) +theme_minimal()