Preliminary

Set Up

remove(list = ls() )

library("visdat")
library("stargazer")


Please cite as:

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

library("tidyverse")

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Import

df <- read.csv("~/Library/CloudStorage/GoogleDrive-sharmaar@bc.edu/My Drive/Econometrics/hurricane_HomePrices/New Jersey Data Final.csv")

vis_dat(df)

Data Cleaning

df$Lag_sale_assessment <- NULL # incorrectly created


table(df$residential)   # only 1


   1 
8300

df$residential <- NULL

table(df$Time_minus_1==df$Time) # should not be equal


FALSE  TRUE 
  494  7806

df$Time_minus_1 <- NULL

EDA

table(df$county_name)        # 2


ATLANTIC   SUSSEX 
    4613     3687

# table(df$municipality_name)  # 20
# length(unique(df$municipality_name))
table(df$municipality_name,df$county_name)

                     
                      ATLANTIC SUSSEX
  Andover Borough            0     23
  Andover Township           0    286
  Atlantic City            137      0
  Branchville Borough        0     46
  Byram Township             0    411
  Corbin City               42      0
  Franklin Borough           0    181
  Fredon Township            0    139
  Green Township             0    181
  Hamburg Borough            0     86
  Hopatcong Borough          0    863
  Linwood                  416      0
  Longport Borough         353      0
  Margate City            1430      0
  Northfield               443      0
  Ogdensburg Borough         0    103
  Somers Point             643      0
  Sparta Township            0   1162
  Stanhope Borough           0    206
  Ventnor City            1149      0

Treatment - Atlantic (impacted)
Control - Sussex county (less impacted)

Outcome Variable

Sale Price
Sale Assessment

stargazer(df, 
          type="text"
          )


=================================================================
Statistic             N      Mean      St. Dev.    Min     Max   
-----------------------------------------------------------------
Year                8,300  2,012.413     1.286    2,010   2,014  
sale_price          8,300 244,653.100 338,970.600   0   5,175,000
Price_Range         8,300    0.633       0.482      0       1    
sale_assessment     8,300 357,128.700 410,426.500   0   6,540,600
Sale_Range          8,300    0.940       0.238      0       1    
Treated             8,300    0.428       0.338    0.050   0.730  
Time                8,300    0.565       0.496      0       1    
Interaction         8,300    0.235       0.328    0.000   0.730  
Time_Placebo        8,300    0.913       0.283      0       1    
Interaction_Placebo 8,300    0.235       0.328    0.000   0.730  
-----------------------------------------------------------------

hist(df$sale_price)

boxplot(df$sale_price)

df |> 
  dplyr::filter(sale_price < 2000000) |> 
  ggplot(mapping = aes(x = sale_price)) +
  geom_histogram(binwidth = 50000, color = "black", fill = "blue") +
  labs(title = "Histogram of Sale Prices", x = "Sale Price", y = "Count") +
  theme_minimal()

hist(df$sale_assessment)

boxplot(df$sale_assessment)

df |> 
  dplyr::filter(sale_assessment < 2000000) |> 
  ggplot(mapping = aes(x = sale_assessment)) +
  geom_histogram(binwidth = 50000, color = "black", fill = "blue") +
  labs(title = "Histogram of Sale Assessment", x = "Sale Assessment", y = "Count") +
  theme_minimal()

Independent Variable

table(df$Year)     # 5 years (2010-2014)


2010 2011 2012 2013 2014 
 726 1512 1836 2056 2170

table(df$Time)     # 3610 (0) vs 4690 (1)


   0    1 
3610 4690

table(df$Treated) # 3687 (0.05) vs 4613 (0.73)


0.05 0.73 
3687 4613

table(df$Interaction)


   0 0.05 0.73 
3610 2161 2529

Diff in Diff 2 way table

table(df$Treated, df$Time)

      
          0    1
  0.05 1526 2161
  0.73 2084 2529

# Compute the mean of y for each combination of Treated and Time
tapply(df$sale_price, list(df$Treated, df$Time), mean, na.rm = TRUE)

            0        1
0.05 201276.2 214105.4
0.73 267001.3 278513.6

Diff in Diff Regression

######### SALE PRICE

# BASE REGRESSIONS

mod1a <- lm(data = df, 
           formula = sale_price ~ Treated*Time)

# BASE REGRESSIONS + CONTROLS (area fixed effects)

mod1b <- lm(data = df, 
           formula = sale_price ~ Treated*Time + municipality_name)


# BASE REGRESSIONS + CONTROLS (area fixed effects) + Year FE
mod1c <- lm(data = df, 
           formula = sale_price ~ Treated*Time + municipality_name + as.factor(Year))




######### SALE ASSESSMENT

# BASE REGRESSIONS

mod2a <- lm(data = df, 
           formula = sale_assessment ~ Treated*Time)

# BASE REGRESSIONS + CONTROLS (area fixed effects)

mod2b <- lm(data = df, 
           formula = sale_assessment ~ Treated*Time + municipality_name)

# BASE REGRESSIONS + CONTROLS (area fixed effects) + Year FE
mod2c <- lm(data = df, 
           formula = sale_assessment  ~ Treated*Time + municipality_name + as.factor(Year))






# OMIT
stargazer(mod1a, mod1b, mod2a, mod2b,
          type = "text",
          keep = c("Treated", "Time", "Treated:Time", "Constant" ),
          add.lines=list(c('Entity Fixed effects', "No","Yes", "No","Yes"))
          )


===========================================================================================================================
                                                              Dependent variable:                                          
                     ------------------------------------------------------------------------------------------------------
                                         sale_price                                       sale_assessment                  
                               (1)                       (2)                      (3)                       (4)            
---------------------------------------------------------------------------------------------------------------------------
Treated                   96,654.660***              138,037.400             180,106.400***            325,600.400***      
                           (16,720.120)             (99,630.480)              (19,981.760)             (110,510.100)       
                                                                                                                           
Time                        12,926.020               12,300.250              -41,067.800***            -39,625.000***      
                           (12,135.640)             (11,483.080)              (14,502.970)              (12,737.030)       
                                                                                                                           
Treated:Time                -1,936.686                8,022.315              74,951.640***             87,765.040***       
                           (22,156.440)             (20,974.130)              (26,478.560)              (23,264.510)       
                                                                                                                           
Constant                  196,443.400***             103,446.400             285,613.900***             137,615.100*       
                           (9,289.708)              (71,831.900)              (11,101.880)              (79,675.960)       
                                                                                                                           
---------------------------------------------------------------------------------------------------------------------------
Entity Fixed effects            No                       Yes                       No                       Yes            
Observations                  8,300                     8,300                    8,300                     8,300           
R2                            0.009                     0.117                    0.035                     0.259           
Adjusted R2                   0.009                     0.115                    0.034                     0.257           
Residual Std. Error  337,458.800 (df = 8296)   318,896.000 (df = 8278)  403,287.800 (df = 8296)   353,719.400 (df = 8278)  
F Statistic          25.841*** (df = 3; 8296) 52.321*** (df = 21; 8278) 99.801*** (df = 3; 8296) 137.868*** (df = 21; 8278)
===========================================================================================================================
Note:                                                                                           *p<0.1; **p<0.05; ***p<0.01

# PRESENT
stargazer(mod1a, mod1b, mod1c, mod2a, mod2b, mod2c,
          type = "text",
          keep = c("Treated", "Time", "Treated:Time", "Constant" ),
          add.lines=list(c('Entity Fixed effects', "No","Yes","Yes", "No","Yes","Yes"), 
                         c('Year Fixed effects', "No","No","Yes", "No","No","Yes"))
          )


================================================================================================================================================================================
                                                                                         Dependent variable:                                                                    
                     -----------------------------------------------------------------------------------------------------------------------------------------------------------
                                                      sale_price                                                                 sale_assessment                                
                               (1)                       (2)                       (3)                      (4)                       (5)                        (6)            
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Treated                   96,654.660***              138,037.400               138,485.000             180,106.400***            325,600.400***             321,473.900***      
                           (16,720.120)             (99,630.480)              (99,561.320)              (19,981.760)             (110,510.100)              (110,065.400)       
                                                                                                                                                                                
Time                        12,926.020               12,300.250                -27,623.810             -41,067.800***            -39,625.000***             100,979.300***      
                           (12,135.640)             (11,483.080)              (19,506.820)              (14,502.970)              (12,737.030)               (21,564.850)       
                                                                                                                                                                                
Treated:Time                -1,936.686                8,022.315                 7,634.989              74,951.640***             87,765.040***              82,158.360***       
                           (22,156.440)             (20,974.130)              (20,973.620)              (26,478.560)              (23,264.510)               (23,186.400)       
                                                                                                                                                                                
Constant                  196,443.400***             103,446.400              132,521.700*             285,613.900***             137,615.100*              163,886.800**       
                           (9,289.708)              (71,831.900)              (72,578.040)              (11,101.880)              (79,675.960)               (80,235.250)       
                                                                                                                                                                                
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Entity Fixed effects            No                       Yes                       Yes                       No                       Yes                        Yes            
Year Fixed effects              No                       No                        Yes                       No                        No                        Yes            
Observations                  8,300                     8,300                     8,300                    8,300                     8,300                      8,300           
R2                            0.009                     0.117                     0.119                    0.035                     0.259                      0.266           
Adjusted R2                   0.009                     0.115                     0.116                    0.034                     0.257                      0.263           
Residual Std. Error  337,458.800 (df = 8296)   318,896.000 (df = 8278)   318,644.700 (df = 8274)  403,287.800 (df = 8296)   353,719.400 (df = 8278)    352,262.700 (df = 8274)  
F Statistic          25.841*** (df = 3; 8296) 52.321*** (df = 21; 8278) 44.701*** (df = 25; 8274) 99.801*** (df = 3; 8296) 137.868*** (df = 21; 8278) 119.673*** (df = 25; 8274)
================================================================================================================================================================================
Note:                                                                                                                                                *p<0.1; **p<0.05; ***p<0.01

Try robustness test by removing outliers to independent variable of sale assessment.

Parallel trends charts

# Calculate average sale assessment by Treated status and Year
average_data <- df %>%
  group_by(Treated, Year) %>%
  summarize(
    avg_sale_assessment = mean(sale_assessment, na.rm = TRUE),
    .groups = "drop"
  )


# Plot the parallel trends
ggplot(data = average_data, 
       mapping = aes(x = Year, 
                     y = avg_sale_assessment, 
                     color = as.factor(Treated), 
                     group = Treated
                     )
       ) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(
    title = "Parallel Trends: Sale Assessment Over Time",
    x = "Year",
    y = "Average Sale Assessment",
    color = "Treated"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Treatment from 2012 Q4 onwards??? If so, use quarter instead.

# Calculate average sale assessment by Treated status and Year
average_data <- df %>%
  group_by(Treated, Quarter) %>%
  summarize(
    avg_sale_assessment = mean(sale_assessment, na.rm = TRUE),
    .groups = "drop"
  )


# Plot the parallel trends
ggplot(data = average_data, 
       mapping = aes(x = Quarter, 
                     y = avg_sale_assessment, 
                     color = as.factor(Treated), 
                     group = Treated
                     )
       ) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(
    title = "Parallel Trends: Sale Assessment Over Time",
    x = "Year",
    y = "Average Sale Assessment",
    color = "Treated"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

Event Study

Step 1: Prepare the Data

The key is to calculate relative time periods (time to treatment) for each observation in your data. This is critical for an event study.

Add Relative Time Variable

?strsplit

df2 <- df %>%
  mutate(
    year = as.numeric(sapply(strsplit(Quarter, "-"), `[`, 1)),  # Extract year
    quarter = as.numeric(gsub("Q", "", sapply(strsplit(Quarter, "-"), `[`, 2))),  # Extract quarter
    relative_time = 4 * (year - 2012) + (quarter - 4)  # Calculate quarters relative to 2012-Q4
  )

This variable creates relative_time as the difference in years from 2012Q4. Negative values are pre-treatment periods, and positive values are post-treatment periods.

Explanation

strsplit(): Splits the Quarter column (e.g., "2010-Q3") into a list of ["2010", "Q3"].
sapply(..., \[, 1): Extracts the first part of the split (year).
gsub("Q", "", ...): Removes the “Q” character from the second part of the split (e.g., "Q3" becomes "3").
as.numeric(): Converts the extracted strings to numeric values for calculations.

Step 2: Estimate the Event Study Model

Use the lm function to estimate dynamic treatment effects by including interaction terms for Treated and relative_time:

event_study_model <- lm(
  sale_assessment ~ Treated * as.factor(relative_time) + municipality_name ,
  data = df2
)

summary(event_study_model)


Call:
lm(formula = sale_assessment ~ Treated * as.factor(relative_time) + 
    municipality_name, data = df2)

Residuals:
     Min       1Q   Median       3Q      Max 
-1178076  -116687   -35545    50272  5674171 

Coefficients: (1 not defined because of singularities)
                                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)                           155487.2    84829.3   1.833 0.066847 .  
Treated                               306416.5   122827.1   2.495 0.012626 *  
as.factor(relative_time)-8               476.7    42030.4   0.011 0.990951    
as.factor(relative_time)-7            -36961.4    44615.1  -0.828 0.407440    
as.factor(relative_time)-6            -15515.0    43404.8  -0.357 0.720765    
as.factor(relative_time)-5            -18239.2    41738.1  -0.437 0.662129    
as.factor(relative_time)-4            -11790.9    44136.2  -0.267 0.789362    
as.factor(relative_time)-3              2081.0    41728.4   0.050 0.960227    
as.factor(relative_time)-2            -17491.8    41280.6  -0.424 0.671774    
as.factor(relative_time)-1            -28479.4    39456.6  -0.722 0.470443    
as.factor(relative_time)0              -5229.4    41161.2  -0.127 0.898907    
as.factor(relative_time)1             -31965.9    40456.0  -0.790 0.429468    
as.factor(relative_time)2             -48769.9    39012.6  -1.250 0.211295    
as.factor(relative_time)3             -59951.7    38150.1  -1.571 0.116113    
as.factor(relative_time)4             -56643.0    39355.6  -1.439 0.150115    
as.factor(relative_time)5             -67179.1    41083.4  -1.635 0.102047    
as.factor(relative_time)6             -66937.4    38712.3  -1.729 0.083829 .  
as.factor(relative_time)7             -63240.6    37229.9  -1.699 0.089423 .  
as.factor(relative_time)8             -64224.5    38708.8  -1.659 0.097119 .  
municipality_nameAndover Township      96927.0    76417.7   1.268 0.204697    
municipality_nameAtlantic City        -52257.5    31871.1  -1.640 0.101116    
municipality_nameBranchville Borough  145999.9    90031.6   1.622 0.104916    
municipality_nameByram Township       130079.0    75528.9   1.722 0.085063 .  
municipality_nameCorbin City         -255186.0    55319.0  -4.613 4.03e-06 ***
municipality_nameFranklin Borough      54571.6    77976.0   0.700 0.484040    
municipality_nameFredon Township      224685.9    79329.0   2.832 0.004632 ** 
municipality_nameGreen Township       216212.2    78012.4   2.772 0.005592 ** 
municipality_nameHamburg Borough        2692.9    82705.3   0.033 0.974026    
municipality_nameHopatcong Borough    128212.0    74446.2   1.722 0.085069 .  
municipality_nameLinwood             -128850.7    20196.3  -6.380 1.87e-10 ***
municipality_nameLongport Borough     802247.0    21482.4  37.344  < 2e-16 ***
municipality_nameMargate City         123451.9    13968.5   8.838  < 2e-16 ***
municipality_nameNorthfield          -150938.8    19773.8  -7.633 2.54e-14 ***
municipality_nameOgdensburg Borough   115990.2    81305.0   1.427 0.153731    
municipality_nameSomers Point        -236170.9    17366.0 -13.600  < 2e-16 ***
municipality_nameSparta Township      168670.9    74219.8   2.273 0.023077 *  
municipality_nameStanhope Borough      70026.1    77483.0   0.904 0.366149    
municipality_nameVentnor City               NA         NA      NA       NA    
Treated:as.factor(relative_time)-8     64492.7    77403.5   0.833 0.404755    
Treated:as.factor(relative_time)-7     89981.7    80471.1   1.118 0.263520    
Treated:as.factor(relative_time)-6    -48895.6    77364.3  -0.632 0.527393    
Treated:as.factor(relative_time)-5      8763.3    76494.7   0.115 0.908796    
Treated:as.factor(relative_time)-4     51651.3    79065.1   0.653 0.513597    
Treated:as.factor(relative_time)-3    -32246.3    75862.0  -0.425 0.670800    
Treated:as.factor(relative_time)-2    -43664.4    74850.2  -0.583 0.559669    
Treated:as.factor(relative_time)-1     55433.2    73068.7   0.759 0.448086    
Treated:as.factor(relative_time)0     276557.2    74499.2   3.712 0.000207 ***
Treated:as.factor(relative_time)1      16656.0    75366.5   0.221 0.825098    
Treated:as.factor(relative_time)2      64345.6    71782.2   0.896 0.370065    
Treated:as.factor(relative_time)3      62075.2    70862.6   0.876 0.381059    
Treated:as.factor(relative_time)4      97182.7    72716.3   1.336 0.181435    
Treated:as.factor(relative_time)5      38931.0    76552.8   0.509 0.611081    
Treated:as.factor(relative_time)6      52282.8    71736.5   0.729 0.466134    
Treated:as.factor(relative_time)7     160722.7    69351.2   2.318 0.020500 *  
Treated:as.factor(relative_time)8      83523.1    70941.1   1.177 0.239087    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 351900 on 8246 degrees of freedom
Multiple R-squared:  0.2696,    Adjusted R-squared:  0.265 
F-statistic: 57.44 on 53 and 8246 DF,  p-value: < 2.2e-16

Step 3: Extract Coefficients, create CIs

Extract the coefficients for the interaction terms (Treated * relative_time) for visualization.

library(broom)

# Extract coefficients and compute confidence intervals
event_coefficients <- tidy(event_study_model) %>%
  filter(grepl("Treated:as.factor", term)) %>%
  mutate(
    relative_time = as.numeric(gsub(".*as\\.factor\\(relative_time\\)([0-9-]+).*", "\\1", term)),  # Extract relative time
    lower_ci = estimate - 1.96 * std.error,
    upper_ci = estimate + 1.96 * std.error
  )

Step 4: Plot the Event Study Chart

Create a chart to visualize the dynamic treatment effects.

ggplot(event_coefficients, aes(x = relative_time, y = estimate)) +
  geom_point(size = 2, color = "blue") +
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.2, color = "blue") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +  # Zero effect line
  geom_vline(xintercept = 0, linetype = "dashed", color = "red") +  # Shock period
  labs(
    title = "Event Study: Impact of Shock on Sale Assessment",
    x = "Relative Time (Quarters)",
    y = "Estimated Effect on Sale Assessment"
  ) +
  theme_minimal()