library(AER)
## Warning: package 'AER' was built under R version 4.3.3
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
data("Fatalities")
help("Fatalities")
head(Fatalities)
## state year spirits unemp income emppop beertax baptist mormon drinkage
## 1 al 1982 1.37 14.4 10544.15 50.69204 1.539379 30.3557 0.32829 19.00
## 2 al 1983 1.36 13.7 10732.80 52.14703 1.788991 30.3336 0.34341 19.00
## 3 al 1984 1.32 11.1 11108.79 54.16809 1.714286 30.3115 0.35924 19.00
## 4 al 1985 1.28 8.9 11332.63 55.27114 1.652542 30.2895 0.37579 19.67
## 5 al 1986 1.23 9.8 11661.51 56.51450 1.609907 30.2674 0.39311 21.00
## 6 al 1987 1.18 7.8 11944.00 57.50988 1.560000 30.2453 0.41123 21.00
## dry youngdrivers miles breath jail service fatal nfatal sfatal
## 1 25.0063 0.211572 7233.887 no no no 839 146 99
## 2 22.9942 0.210768 7836.348 no no no 930 154 98
## 3 24.0426 0.211484 8262.990 no no no 932 165 94
## 4 23.6339 0.211140 8726.917 no no no 882 146 98
## 5 23.4647 0.213400 8952.854 no no no 1081 172 119
## 6 23.7924 0.215527 9166.302 no no no 1110 181 114
## fatal1517 nfatal1517 fatal1820 nfatal1820 fatal2124 nfatal2124 afatal
## 1 53 9 99 34 120 32 309.438
## 2 71 8 108 26 124 35 341.834
## 3 49 7 103 25 118 34 304.872
## 4 66 9 100 23 114 45 276.742
## 5 82 10 120 23 119 29 360.716
## 6 94 11 127 31 138 30 368.421
## pop pop1517 pop1820 pop2124 milestot unempus emppopus gsp
## 1 3942002 208999.6 221553.4 290000.1 28516 9.7 57.8 -0.02212476
## 2 3960008 202000.1 219125.5 290000.2 31032 9.6 57.9 0.04655825
## 3 3988992 197000.0 216724.1 288000.2 32961 7.5 59.5 0.06279784
## 4 4021008 194999.7 214349.0 284000.3 35091 7.2 60.1 0.02748997
## 5 4049994 203999.9 212000.0 263000.3 36259 7.0 60.7 0.03214295
## 6 4082999 204999.8 208998.5 258999.8 37426 6.2 61.5 0.04897637
library("dplyr")
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plm)
##
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
FatalityRate <- Fatalities$fatal / (Fatalities$pop/10000)
lm1 <- lm(FatalityRate~beertax,data = Fatalities)
summary(lm1)
##
## Call:
## lm(formula = FatalityRate ~ beertax, data = Fatalities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.09060 -0.37768 -0.09436 0.28548 2.27643
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.85331 0.04357 42.539 < 2e-16 ***
## beertax 0.36461 0.06217 5.865 1.08e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5437 on 334 degrees of freedom
## Multiple R-squared: 0.09336, Adjusted R-squared: 0.09065
## F-statistic: 34.39 on 1 and 334 DF, p-value: 1.082e-08
Basic model: FatalityRate = 1.85331 + 0.36461*BeerTax The intercept is 1.85331 and is statistically significant at the 5% level of significance with a p-value of <2e-16. This would mean when the Beer Tax is 0, the Fatality rate would be 1.85331%. The coefficient for BeerTax is 0.36461 and is statistically significant at the 5% level of significance with a p-value of 1.08e-08. This would mean that a one dollar increase in beer tax would result in a 0.36461 percentage point increase in the fatality rate.
n_distinct(Fatalities$year)
## [1] 7
n_distinct(Fatalities$state)
## [1] 48
is.pbalanced(Fatalities, "year", "states")
## [1] TRUE
It is a balanced panel data.
lm2 <- lm(FatalityRate ~ beertax + factor(state), data = Fatalities)
summary(lm2)
##
## Call:
## lm(formula = FatalityRate ~ beertax + factor(state), data = Fatalities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58696 -0.08284 -0.00127 0.07955 0.89780
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.47763 0.31336 11.098 < 2e-16 ***
## beertax -0.65587 0.18785 -3.491 0.000556 ***
## factor(state)az -0.56773 0.26667 -2.129 0.034107 *
## factor(state)ar -0.65495 0.21902 -2.990 0.003028 **
## factor(state)ca -1.50947 0.30435 -4.960 1.21e-06 ***
## factor(state)co -1.48428 0.28735 -5.165 4.50e-07 ***
## factor(state)ct -1.86226 0.28053 -6.638 1.58e-10 ***
## factor(state)de -1.30760 0.29395 -4.448 1.24e-05 ***
## factor(state)fl -0.26813 0.13933 -1.924 0.055284 .
## factor(state)ga 0.52460 0.18395 2.852 0.004661 **
## factor(state)id -0.66902 0.25797 -2.593 0.009989 **
## factor(state)il -1.96162 0.29150 -6.730 9.23e-11 ***
## factor(state)in -1.46154 0.27254 -5.363 1.69e-07 ***
## factor(state)ia -1.54393 0.25344 -6.092 3.58e-09 ***
## factor(state)ks -1.22322 0.24544 -4.984 1.08e-06 ***
## factor(state)ky -1.21752 0.28717 -4.240 3.02e-05 ***
## factor(state)la -0.84712 0.18867 -4.490 1.03e-05 ***
## factor(state)me -1.10795 0.19112 -5.797 1.78e-08 ***
## factor(state)md -1.70644 0.28322 -6.025 5.17e-09 ***
## factor(state)ma -2.10975 0.27610 -7.641 3.24e-13 ***
## factor(state)mi -1.48453 0.23602 -6.290 1.18e-09 ***
## factor(state)mn -1.89721 0.26509 -7.157 6.92e-12 ***
## factor(state)ms -0.02908 0.14845 -0.196 0.844839
## factor(state)mo -1.29626 0.26669 -4.861 1.93e-06 ***
## factor(state)mt -0.36039 0.26396 -1.365 0.173225
## factor(state)ne -1.52218 0.24928 -6.106 3.30e-09 ***
## factor(state)nv -0.60077 0.28595 -2.101 0.036517 *
## factor(state)nh -1.25445 0.20968 -5.983 6.53e-09 ***
## factor(state)nj -2.10575 0.30720 -6.855 4.37e-11 ***
## factor(state)nm 0.42637 0.25432 1.677 0.094724 .
## factor(state)ny -2.18667 0.29890 -7.316 2.57e-12 ***
## factor(state)nc -0.29047 0.11984 -2.424 0.015979 *
## factor(state)nd -1.62344 0.25381 -6.396 6.45e-10 ***
## factor(state)oh -1.67442 0.25381 -6.597 2.02e-10 ***
## factor(state)ok -0.54506 0.16912 -3.223 0.001415 **
## factor(state)or -1.16800 0.28572 -4.088 5.65e-05 ***
## factor(state)pa -1.76747 0.27610 -6.402 6.26e-10 ***
## factor(state)ri -2.26505 0.29376 -7.711 2.07e-13 ***
## factor(state)sc 0.55717 0.11000 5.065 7.30e-07 ***
## factor(state)sd -1.00372 0.20962 -4.788 2.70e-06 ***
## factor(state)tn -0.87566 0.26802 -3.267 0.001218 **
## factor(state)tx -0.91747 0.24556 -3.736 0.000225 ***
## factor(state)ut -1.16395 0.19642 -5.926 8.88e-09 ***
## factor(state)vt -0.96604 0.21113 -4.576 7.08e-06 ***
## factor(state)va -1.29018 0.20416 -6.320 9.99e-10 ***
## factor(state)wa -1.65952 0.28346 -5.854 1.31e-08 ***
## factor(state)wv -0.89675 0.24661 -3.636 0.000328 ***
## factor(state)wi -1.75927 0.29395 -5.985 6.44e-09 ***
## factor(state)wy -0.22850 0.31290 -0.730 0.465811
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1899 on 287 degrees of freedom
## Multiple R-squared: 0.905, Adjusted R-squared: 0.8891
## F-statistic: 56.97 on 48 and 287 DF, p-value: < 2.2e-16
The intercept of the equation increases to 3.47763 from 1.85331 and remains statistically significant at the 5% level of significance with a p-value of <2e-16. This would mean when the beer tax is 0, the fatality rate is higher at 3.47763%. The beertax changes significantly from 0.36461 to -0.65587 and remains statistically significant at the 5% level of significance with a p-value of 0.000556. This would mean that a one dollar increase in beer tax would result in a 0.65587 percentage point decrease in the fatality rate. Controlling for time-invariant fixed effects eliminates confounding variable bias caused by state-specific confounders. One reason this could be the case is where a increase in beer tax would cause an increase in fatality rate would be that the substitution of beer for spirits and hard liquor. Spirits and hard liquor are more potent than than beer, and would increase fatality rate when consumed, especially if an individual drives after consuming it.
lm3 <- lm(FatalityRate ~ beertax + factor(year), data = Fatalities)
summary(lm3)
##
## Call:
## lm(formula = FatalityRate ~ beertax + factor(year), data = Fatalities)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.06068 -0.38427 -0.09853 0.26781 2.23447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.894848 0.085659 22.121 < 2e-16 ***
## beertax 0.366336 0.062600 5.852 1.18e-08 ***
## factor(year)1983 -0.082036 0.111673 -0.735 0.463
## factor(year)1984 -0.071733 0.111673 -0.642 0.521
## factor(year)1985 -0.110546 0.111676 -0.990 0.323
## factor(year)1986 -0.016118 0.111682 -0.144 0.885
## factor(year)1987 -0.015535 0.111695 -0.139 0.889
## factor(year)1988 -0.001027 0.111718 -0.009 0.993
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5471 on 328 degrees of freedom
## Multiple R-squared: 0.09865, Adjusted R-squared: 0.07941
## F-statistic: 5.128 on 7 and 328 DF, p-value: 1.499e-05
The intercept of the equation slightly increases to 1.894848 from 1.85331 and remains statistically significant at the 5% level of significance with a p-value of <2e-16. This would mean when the beer tax is 0, the fatality rate is slightly higher at 1.894848%. The coefficient beertax increases slightly from 0.36461 to 0.366336 and remains statistically significant at the 5% level of significance with a p-value of 1.18e-08. This would mean that a one dollar increase in beer tax would result in a 0.366336 percentage point increase in the fatality rate. The lack of change in intercept and coefficient values means that there are little confounding variables caused by year-specific confounders.
model4 <- plm(FatalityRate ~ beertax, data = Fatalities, index="state")
summary(model4)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = FatalityRate ~ beertax, data = Fatalities, index = "state")
##
## Balanced Panel: n = 48, T = 7, N = 336
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.5869619 -0.0828376 -0.0012701 0.0795454 0.8977960
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## beertax -0.65587 0.18785 -3.4915 0.000556 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 10.785
## Residual Sum of Squares: 10.345
## R-Squared: 0.040745
## Adj. R-Squared: -0.11969
## F-statistic: 12.1904 on 1 and 287 DF, p-value: 0.00055597
The coefficient of beertax is -0.65587 and statistically significant at the 5% level of significance with a p-value of 0.000556. This exactly the same as the coefficient of beertax in model2. This is because both methods are controlling for entity-specific, time-invariant characteristics. Although the method of calculating the coefficient is different, it will arrive at the same outcome.
model5 <- plm(FatalityRate ~ beertax, data = Fatalities, index="year")
summary(model5)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = FatalityRate ~ beertax, data = Fatalities, index = "year")
##
## Balanced Panel: n = 7, T = 48, N = 336
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.224374 -0.410186 -0.037268 0.370379 2.179013
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## beertax 0.023113 0.064229 0.3599 0.7192
##
## Total Sum of Squares: 103.39
## Residual Sum of Squares: 103.35
## R-Squared: 0.00039465
## Adj. R-Squared: -0.020938
## F-statistic: 0.129495 on 1 and 328 DF, p-value: 0.71919
The coefficient of beertax is 0.023113 and statistically insignificant at the 5% level of significance with a p-value of 0.7192. It is not the same when compared to the coefficient of beer tax from model3 of 0.366336. The difference between the two models arises because the lm() model accounts for year-specific effects using dummy variables (which help isolate the impact of beertax), while the plm() model without year fixed effects does not.
# Step 1: Calculate the mean of beertax and FatalityRate for each state
state_means <- Fatalities %>%
group_by(state) %>%
summarise(mean_beertax = mean(beertax, na.rm = TRUE),
mean_fatality_rate = mean(FatalityRate, na.rm = TRUE))
# Step 2: Join the state-level means back to the original dataset and demean the variables
Fatalities_demeaned <- Fatalities %>%
left_join(state_means, by = "state") %>% # Join state-level means to the main dataset
mutate(beertax_demeaned = beertax - mean_beertax, # Subtract state-level mean beertax
FatalityRate_demeaned = FatalityRate - mean_fatality_rate) # Subtract state-level mean fatality rate
# View the dataset with demeaned variables
lm6 <- lm(FatalityRate_demeaned~beertax_demeaned, data = Fatalities_demeaned)
summary(lm6)
##
## Call:
## lm(formula = FatalityRate_demeaned ~ beertax_demeaned, data = Fatalities_demeaned)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.21650 -0.40954 -0.07664 0.37976 2.08508
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.210e-16 3.109e-02 0.000 1.000
## beertax_demeaned -6.559e-01 5.639e-01 -1.163 0.246
##
## Residual standard error: 0.5699 on 334 degrees of freedom
## Multiple R-squared: 0.004035, Adjusted R-squared: 0.001053
## F-statistic: 1.353 on 1 and 334 DF, p-value: 0.2456