library(AER)
## Warning: package 'AER' was built under R version 4.3.3
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
data("Fatalities")
help("Fatalities")
head(Fatalities)
##   state year spirits unemp   income   emppop  beertax baptist  mormon drinkage
## 1    al 1982    1.37  14.4 10544.15 50.69204 1.539379 30.3557 0.32829    19.00
## 2    al 1983    1.36  13.7 10732.80 52.14703 1.788991 30.3336 0.34341    19.00
## 3    al 1984    1.32  11.1 11108.79 54.16809 1.714286 30.3115 0.35924    19.00
## 4    al 1985    1.28   8.9 11332.63 55.27114 1.652542 30.2895 0.37579    19.67
## 5    al 1986    1.23   9.8 11661.51 56.51450 1.609907 30.2674 0.39311    21.00
## 6    al 1987    1.18   7.8 11944.00 57.50988 1.560000 30.2453 0.41123    21.00
##       dry youngdrivers    miles breath jail service fatal nfatal sfatal
## 1 25.0063     0.211572 7233.887     no   no      no   839    146     99
## 2 22.9942     0.210768 7836.348     no   no      no   930    154     98
## 3 24.0426     0.211484 8262.990     no   no      no   932    165     94
## 4 23.6339     0.211140 8726.917     no   no      no   882    146     98
## 5 23.4647     0.213400 8952.854     no   no      no  1081    172    119
## 6 23.7924     0.215527 9166.302     no   no      no  1110    181    114
##   fatal1517 nfatal1517 fatal1820 nfatal1820 fatal2124 nfatal2124  afatal
## 1        53          9        99         34       120         32 309.438
## 2        71          8       108         26       124         35 341.834
## 3        49          7       103         25       118         34 304.872
## 4        66          9       100         23       114         45 276.742
## 5        82         10       120         23       119         29 360.716
## 6        94         11       127         31       138         30 368.421
##       pop  pop1517  pop1820  pop2124 milestot unempus emppopus         gsp
## 1 3942002 208999.6 221553.4 290000.1    28516     9.7     57.8 -0.02212476
## 2 3960008 202000.1 219125.5 290000.2    31032     9.6     57.9  0.04655825
## 3 3988992 197000.0 216724.1 288000.2    32961     7.5     59.5  0.06279784
## 4 4021008 194999.7 214349.0 284000.3    35091     7.2     60.1  0.02748997
## 5 4049994 203999.9 212000.0 263000.3    36259     7.0     60.7  0.03214295
## 6 4082999 204999.8 208998.5 258999.8    37426     6.2     61.5  0.04897637
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plm)
## 
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
FatalityRate <- Fatalities$fatal / (Fatalities$pop/10000)
lm1 <- lm(FatalityRate~beertax,data = Fatalities)
summary(lm1)
## 
## Call:
## lm(formula = FatalityRate ~ beertax, data = Fatalities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.09060 -0.37768 -0.09436  0.28548  2.27643 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.85331    0.04357  42.539  < 2e-16 ***
## beertax      0.36461    0.06217   5.865 1.08e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5437 on 334 degrees of freedom
## Multiple R-squared:  0.09336,    Adjusted R-squared:  0.09065 
## F-statistic: 34.39 on 1 and 334 DF,  p-value: 1.082e-08

Basic model: FatalityRate = 1.85331 + 0.36461*BeerTax The intercept is 1.85331 and is statistically significant at the 5% level of significance with a p-value of <2e-16. This would mean when the Beer Tax is 0, the Fatality rate would be 1.85331%. The coefficient for BeerTax is 0.36461 and is statistically significant at the 5% level of significance with a p-value of 1.08e-08. This would mean that a one dollar increase in beer tax would result in a 0.36461 percentage point increase in the fatality rate.

n_distinct(Fatalities$year)
## [1] 7
n_distinct(Fatalities$state)
## [1] 48
is.pbalanced(Fatalities, "year", "states")
## [1] TRUE

It is a balanced panel data.

lm2 <- lm(FatalityRate ~ beertax + factor(state), data = Fatalities)
summary(lm2)
## 
## Call:
## lm(formula = FatalityRate ~ beertax + factor(state), data = Fatalities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58696 -0.08284 -0.00127  0.07955  0.89780 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.47763    0.31336  11.098  < 2e-16 ***
## beertax         -0.65587    0.18785  -3.491 0.000556 ***
## factor(state)az -0.56773    0.26667  -2.129 0.034107 *  
## factor(state)ar -0.65495    0.21902  -2.990 0.003028 ** 
## factor(state)ca -1.50947    0.30435  -4.960 1.21e-06 ***
## factor(state)co -1.48428    0.28735  -5.165 4.50e-07 ***
## factor(state)ct -1.86226    0.28053  -6.638 1.58e-10 ***
## factor(state)de -1.30760    0.29395  -4.448 1.24e-05 ***
## factor(state)fl -0.26813    0.13933  -1.924 0.055284 .  
## factor(state)ga  0.52460    0.18395   2.852 0.004661 ** 
## factor(state)id -0.66902    0.25797  -2.593 0.009989 ** 
## factor(state)il -1.96162    0.29150  -6.730 9.23e-11 ***
## factor(state)in -1.46154    0.27254  -5.363 1.69e-07 ***
## factor(state)ia -1.54393    0.25344  -6.092 3.58e-09 ***
## factor(state)ks -1.22322    0.24544  -4.984 1.08e-06 ***
## factor(state)ky -1.21752    0.28717  -4.240 3.02e-05 ***
## factor(state)la -0.84712    0.18867  -4.490 1.03e-05 ***
## factor(state)me -1.10795    0.19112  -5.797 1.78e-08 ***
## factor(state)md -1.70644    0.28322  -6.025 5.17e-09 ***
## factor(state)ma -2.10975    0.27610  -7.641 3.24e-13 ***
## factor(state)mi -1.48453    0.23602  -6.290 1.18e-09 ***
## factor(state)mn -1.89721    0.26509  -7.157 6.92e-12 ***
## factor(state)ms -0.02908    0.14845  -0.196 0.844839    
## factor(state)mo -1.29626    0.26669  -4.861 1.93e-06 ***
## factor(state)mt -0.36039    0.26396  -1.365 0.173225    
## factor(state)ne -1.52218    0.24928  -6.106 3.30e-09 ***
## factor(state)nv -0.60077    0.28595  -2.101 0.036517 *  
## factor(state)nh -1.25445    0.20968  -5.983 6.53e-09 ***
## factor(state)nj -2.10575    0.30720  -6.855 4.37e-11 ***
## factor(state)nm  0.42637    0.25432   1.677 0.094724 .  
## factor(state)ny -2.18667    0.29890  -7.316 2.57e-12 ***
## factor(state)nc -0.29047    0.11984  -2.424 0.015979 *  
## factor(state)nd -1.62344    0.25381  -6.396 6.45e-10 ***
## factor(state)oh -1.67442    0.25381  -6.597 2.02e-10 ***
## factor(state)ok -0.54506    0.16912  -3.223 0.001415 ** 
## factor(state)or -1.16800    0.28572  -4.088 5.65e-05 ***
## factor(state)pa -1.76747    0.27610  -6.402 6.26e-10 ***
## factor(state)ri -2.26505    0.29376  -7.711 2.07e-13 ***
## factor(state)sc  0.55717    0.11000   5.065 7.30e-07 ***
## factor(state)sd -1.00372    0.20962  -4.788 2.70e-06 ***
## factor(state)tn -0.87566    0.26802  -3.267 0.001218 ** 
## factor(state)tx -0.91747    0.24556  -3.736 0.000225 ***
## factor(state)ut -1.16395    0.19642  -5.926 8.88e-09 ***
## factor(state)vt -0.96604    0.21113  -4.576 7.08e-06 ***
## factor(state)va -1.29018    0.20416  -6.320 9.99e-10 ***
## factor(state)wa -1.65952    0.28346  -5.854 1.31e-08 ***
## factor(state)wv -0.89675    0.24661  -3.636 0.000328 ***
## factor(state)wi -1.75927    0.29395  -5.985 6.44e-09 ***
## factor(state)wy -0.22850    0.31290  -0.730 0.465811    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1899 on 287 degrees of freedom
## Multiple R-squared:  0.905,  Adjusted R-squared:  0.8891 
## F-statistic: 56.97 on 48 and 287 DF,  p-value: < 2.2e-16

The intercept of the equation increases to 3.47763 from 1.85331 and remains statistically significant at the 5% level of significance with a p-value of <2e-16. This would mean when the beer tax is 0, the fatality rate is higher at 3.47763%. The beertax changes significantly from 0.36461 to -0.65587 and remains statistically significant at the 5% level of significance with a p-value of 0.000556. This would mean that a one dollar increase in beer tax would result in a 0.65587 percentage point decrease in the fatality rate. Controlling for time-invariant fixed effects eliminates confounding variable bias caused by state-specific confounders. One reason this could be the case is where a increase in beer tax would cause an increase in fatality rate would be that the substitution of beer for spirits and hard liquor. Spirits and hard liquor are more potent than than beer, and would increase fatality rate when consumed, especially if an individual drives after consuming it.

lm3 <- lm(FatalityRate ~ beertax + factor(year), data = Fatalities)
summary(lm3)
## 
## Call:
## lm(formula = FatalityRate ~ beertax + factor(year), data = Fatalities)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.06068 -0.38427 -0.09853  0.26781  2.23447 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.894848   0.085659  22.121  < 2e-16 ***
## beertax           0.366336   0.062600   5.852 1.18e-08 ***
## factor(year)1983 -0.082036   0.111673  -0.735    0.463    
## factor(year)1984 -0.071733   0.111673  -0.642    0.521    
## factor(year)1985 -0.110546   0.111676  -0.990    0.323    
## factor(year)1986 -0.016118   0.111682  -0.144    0.885    
## factor(year)1987 -0.015535   0.111695  -0.139    0.889    
## factor(year)1988 -0.001027   0.111718  -0.009    0.993    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5471 on 328 degrees of freedom
## Multiple R-squared:  0.09865,    Adjusted R-squared:  0.07941 
## F-statistic: 5.128 on 7 and 328 DF,  p-value: 1.499e-05

The intercept of the equation slightly increases to 1.894848 from 1.85331 and remains statistically significant at the 5% level of significance with a p-value of <2e-16. This would mean when the beer tax is 0, the fatality rate is slightly higher at 1.894848%. The coefficient beertax increases slightly from 0.36461 to 0.366336 and remains statistically significant at the 5% level of significance with a p-value of 1.18e-08. This would mean that a one dollar increase in beer tax would result in a 0.366336 percentage point increase in the fatality rate. The lack of change in intercept and coefficient values means that there are little confounding variables caused by year-specific confounders.

model4 <- plm(FatalityRate ~ beertax, data = Fatalities, index="state")
summary(model4)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = FatalityRate ~ beertax, data = Fatalities, index = "state")
## 
## Balanced Panel: n = 48, T = 7, N = 336
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -0.5869619 -0.0828376 -0.0012701  0.0795454  0.8977960 
## 
## Coefficients:
##         Estimate Std. Error t-value Pr(>|t|)    
## beertax -0.65587    0.18785 -3.4915 0.000556 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    10.785
## Residual Sum of Squares: 10.345
## R-Squared:      0.040745
## Adj. R-Squared: -0.11969
## F-statistic: 12.1904 on 1 and 287 DF, p-value: 0.00055597

The coefficient of beertax is -0.65587 and statistically significant at the 5% level of significance with a p-value of 0.000556. This exactly the same as the coefficient of beertax in model2. This is because both methods are controlling for entity-specific, time-invariant characteristics. Although the method of calculating the coefficient is different, it will arrive at the same outcome.

model5 <- plm(FatalityRate ~ beertax, data = Fatalities, index="year")
summary(model5)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = FatalityRate ~ beertax, data = Fatalities, index = "year")
## 
## Balanced Panel: n = 7, T = 48, N = 336
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -1.224374 -0.410186 -0.037268  0.370379  2.179013 
## 
## Coefficients:
##         Estimate Std. Error t-value Pr(>|t|)
## beertax 0.023113   0.064229  0.3599   0.7192
## 
## Total Sum of Squares:    103.39
## Residual Sum of Squares: 103.35
## R-Squared:      0.00039465
## Adj. R-Squared: -0.020938
## F-statistic: 0.129495 on 1 and 328 DF, p-value: 0.71919

The coefficient of beertax is 0.023113 and statistically insignificant at the 5% level of significance with a p-value of 0.7192. It is not the same when compared to the coefficient of beer tax from model3 of 0.366336. The difference between the two models arises because the lm() model accounts for year-specific effects using dummy variables (which help isolate the impact of beertax), while the plm() model without year fixed effects does not.

# Step 1: Calculate the mean of beertax and FatalityRate for each state
state_means <- Fatalities %>%
  group_by(state) %>%
  summarise(mean_beertax = mean(beertax, na.rm = TRUE),
            mean_fatality_rate = mean(FatalityRate, na.rm = TRUE))

# Step 2: Join the state-level means back to the original dataset and demean the variables
Fatalities_demeaned <- Fatalities %>%
  left_join(state_means, by = "state") %>%    # Join state-level means to the main dataset
  mutate(beertax_demeaned = beertax - mean_beertax,      # Subtract state-level mean beertax
         FatalityRate_demeaned = FatalityRate - mean_fatality_rate)  # Subtract state-level mean fatality rate

# View the dataset with demeaned variables
lm6 <- lm(FatalityRate_demeaned~beertax_demeaned, data = Fatalities_demeaned)
summary(lm6)
## 
## Call:
## lm(formula = FatalityRate_demeaned ~ beertax_demeaned, data = Fatalities_demeaned)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.21650 -0.40954 -0.07664  0.37976  2.08508 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -1.210e-16  3.109e-02   0.000    1.000
## beertax_demeaned -6.559e-01  5.639e-01  -1.163    0.246
## 
## Residual standard error: 0.5699 on 334 degrees of freedom
## Multiple R-squared:  0.004035,   Adjusted R-squared:  0.001053 
## F-statistic: 1.353 on 1 and 334 DF,  p-value: 0.2456