library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(AER)
## Warning: package 'AER' was built under R version 4.3.3
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
movies <- read_excel("GA6-movies.xlsx")
movies$ln_assualts <- log(movies$assaults)
lm1 <- lm(ln_assualts ~ attend_v + year1 + year2 + year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10
+ month1 + month2 + month3 + month4 + month5 + month6 + month7 + month8 + month9 + month10 + month11 + month12 + h_chris + h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, data = movies)
summary(lm1)
##
## Call:
## lm(formula = ln_assualts ~ attend_v + year1 + year2 + year3 +
## year4 + year5 + year6 + year7 + year8 + year9 + year10 +
## month1 + month2 + month3 + month4 + month5 + month6 + month7 +
## month8 + month9 + month10 + month11 + month12 + h_chris +
## h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain +
## w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc,
## data = movies)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31675 -0.02527 0.00034 0.02463 0.19730
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.9113127 0.0149406 596.448 < 2e-16 ***
## attend_v -0.0008530 0.0008919 -0.956 0.33936
## year1 -2.0611838 0.0089017 -231.551 < 2e-16 ***
## year2 -1.3653260 0.0088541 -154.203 < 2e-16 ***
## year3 -1.0528143 0.0087223 -120.703 < 2e-16 ***
## year4 -0.8449344 0.0090232 -93.640 < 2e-16 ***
## year5 -0.6795645 0.0092505 -73.462 < 2e-16 ***
## year6 -0.3806795 0.0088475 -43.027 < 2e-16 ***
## year7 -0.2333885 0.0087551 -26.657 < 2e-16 ***
## year8 -0.1795589 0.0090378 -19.867 < 2e-16 ***
## year9 -0.1241146 0.0086751 -14.307 < 2e-16 ***
## year10 NA NA NA NA
## month1 0.0286075 0.0100976 2.833 0.00480 **
## month2 0.0235822 0.0101790 2.317 0.02094 *
## month3 0.0466878 0.0102839 4.540 7.13e-06 ***
## month4 0.0394186 0.0124549 3.165 0.00165 **
## month5 0.0354012 0.0145478 2.433 0.01532 *
## month6 -0.0096040 0.0157464 -0.610 0.54221
## month7 -0.0204863 0.0177211 -1.156 0.24824
## month8 -0.0129689 0.0170167 -0.762 0.44636
## month9 0.0281646 0.0150528 1.871 0.06195 .
## month10 0.0338791 0.0127922 2.648 0.00835 **
## month11 -0.0225111 0.0105447 -2.135 0.03328 *
## month12 NA NA NA NA
## h_chris -0.1020299 0.0237133 -4.303 2.05e-05 ***
## h_newyr 0.2240654 0.0228015 9.827 < 2e-16 ***
## h_easter -0.0414692 0.0148326 -2.796 0.00538 **
## h_july4 0.0353795 0.0206916 1.710 0.08794 .
## h_mem -0.0144231 0.0145817 -0.989 0.32310
## h_labor 0.0261436 0.0145328 1.799 0.07266 .
## w_rain -0.0339778 0.0130830 -2.597 0.00969 **
## w_snow -0.0674832 0.0302649 -2.230 0.02623 *
## w_maxa 0.1091011 0.0137728 7.921 1.65e-14 ***
## w_maxb 0.1058065 0.0188005 5.628 3.11e-08 ***
## w_maxc 0.0273434 0.0709579 0.385 0.70015
## w_mina -0.3239640 0.0401277 -8.073 5.55e-15 ***
## w_minb -0.1691211 0.0275710 -6.134 1.79e-09 ***
## w_minc -0.1269039 0.0170766 -7.431 4.96e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04282 on 480 degrees of freedom
## Multiple R-squared: 0.9957, Adjusted R-squared: 0.9954
## F-statistic: 3177 on 35 and 480 DF, p-value: < 2.2e-16
The coefficient of attend_v is -0.000850 (3sf). It is not statistically significant at the 5% level of significance as its p-value is more than 0.05 at 0.339 (3sf). This means we cannot reject the null hypothesis and we can expect that a change in the number of assaults and consequently ln_assault will have no effect on the attendance of strongly violent movies (in millions) ceterus paribus.
Q1b) To evaluate whether pr_attend_v is a valid instrumental variable, we must analyse whether it is relevant and exogenous. For relevance, since pr_attend_v is ased on historical attendance patterns, it is a strong predictor of future attendance for similar movies, hence it is relevant. For exogeneity, pr_attend_v should not be correlated with other determinants of the outcome variable, in this case assaults. Since pr_attend_v is based on historical data and not real-time data on that given weekend, it likely does not impact real-time attendance patterns. Hence it is exogenous. pr_attend_v is a valid instrumental variable.
# model that uses pr_attend_v as an instrument variable to replace attend_v (first stage)
lm2 <- lm(attend_v ~ pr_attend_v + year1 + year2 + year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10 + month1 + month2 + month3 + month4 + month5 + month6 + month7 + month8 + month9 + month10 + month11 + month12 + h_chris + h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, data = movies)
summary(lm2)
##
## Call:
## lm(formula = attend_v ~ pr_attend_v + year1 + year2 + year3 +
## year4 + year5 + year6 + year7 + year8 + year9 + year10 +
## month1 + month2 + month3 + month4 + month5 + month6 + month7 +
## month8 + month9 + month10 + month11 + month12 + h_chris +
## h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain +
## w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc,
## data = movies)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3347 -0.2692 -0.0357 0.1963 3.8132
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.245537 0.205670 1.194 0.23313
## pr_attend_v 0.942324 0.012027 78.352 < 2e-16 ***
## year1 -0.324759 0.122595 -2.649 0.00834 **
## year2 -0.088763 0.121984 -0.728 0.46718
## year3 -0.008550 0.120213 -0.071 0.94333
## year4 -0.011520 0.124362 -0.093 0.92623
## year5 -0.133097 0.127475 -1.044 0.29696
## year6 -0.164209 0.121932 -1.347 0.17870
## year7 0.030623 0.120714 0.254 0.79985
## year8 -0.062035 0.124548 -0.498 0.61866
## year9 -0.011700 0.119555 -0.098 0.92208
## year10 NA NA NA NA
## month1 -0.116264 0.139201 -0.835 0.40401
## month2 0.138731 0.140173 0.990 0.32281
## month3 -0.005235 0.141765 -0.037 0.97056
## month4 -0.107027 0.171695 -0.623 0.53335
## month5 -0.114172 0.200481 -0.569 0.56929
## month6 -0.302956 0.217183 -1.395 0.16368
## month7 0.055711 0.244284 0.228 0.81970
## month8 0.237221 0.234339 1.012 0.31190
## month9 0.201379 0.207369 0.971 0.33198
## month10 -0.034039 0.176392 -0.193 0.84706
## month11 0.035045 0.145336 0.241 0.80956
## month12 NA NA NA NA
## h_chris -0.160787 0.326842 -0.492 0.62299
## h_newyr 0.336696 0.314257 1.071 0.28453
## h_easter 0.186345 0.204567 0.911 0.36279
## h_july4 -0.315365 0.285060 -1.106 0.26915
## h_mem -0.064963 0.200974 -0.323 0.74666
## h_labor -0.012914 0.200282 -0.064 0.94861
## w_rain -0.146378 0.180389 -0.811 0.41751
## w_snow 0.908390 0.416803 2.179 0.02979 *
## w_maxa -0.041727 0.189820 -0.220 0.82610
## w_maxb -0.036809 0.259099 -0.142 0.88709
## w_maxc -0.262417 0.977833 -0.268 0.78853
## w_mina -0.832017 0.552898 -1.505 0.13303
## w_minb -0.442310 0.379875 -1.164 0.24486
## w_minc 0.202497 0.235264 0.861 0.38982
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5901 on 480 degrees of freedom
## Multiple R-squared: 0.9375, Adjusted R-squared: 0.9329
## F-statistic: 205.7 on 35 and 480 DF, p-value: < 2.2e-16
#second stage
attend_v_pred <- lm2$fitted.values
lm3 <- lm(ln_assualts ~ attend_v_pred + year1 + year2 + year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10 + month1 + month2 + month3 + month4 + month5 + month6 + month7 + month8 + month9 + month10 + month11 + month12 + h_chris + h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, data = movies)
summary(lm3)
##
## Call:
## lm(formula = ln_assualts ~ attend_v_pred + year1 + year2 + year3 +
## year4 + year5 + year6 + year7 + year8 + year9 + year10 +
## month1 + month2 + month3 + month4 + month5 + month6 + month7 +
## month8 + month9 + month10 + month11 + month12 + h_chris +
## h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain +
## w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc,
## data = movies)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31661 -0.02496 0.00093 0.02441 0.19776
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.9117534 0.0149398 596.512 < 2e-16 ***
## attend_v_pred -0.0010973 0.0009256 -1.185 0.23641
## year1 -2.0612741 0.0088976 -231.666 < 2e-16 ***
## year2 -1.3654345 0.0088503 -154.282 < 2e-16 ***
## year3 -1.0527505 0.0087181 -120.754 < 2e-16 ***
## year4 -0.8450896 0.0090200 -93.691 < 2e-16 ***
## year5 -0.6795975 0.0092459 -73.503 < 2e-16 ***
## year6 -0.3806990 0.0088430 -43.051 < 2e-16 ***
## year7 -0.2335642 0.0087525 -26.685 < 2e-16 ***
## year8 -0.1799484 0.0090420 -19.901 < 2e-16 ***
## year9 -0.1241273 0.0086707 -14.316 < 2e-16 ***
## year10 NA NA NA NA
## month1 0.0286646 0.0100926 2.840 0.00470 **
## month2 0.0238306 0.0101770 2.342 0.01961 *
## month3 0.0469034 0.0102810 4.562 6.44e-06 ***
## month4 0.0395086 0.0124489 3.174 0.00160 **
## month5 0.0353633 0.0145404 2.432 0.01538 *
## month6 -0.0094627 0.0157390 -0.601 0.54797
## month7 -0.0198699 0.0177232 -1.121 0.26280
## month8 -0.0125905 0.0170124 -0.740 0.45961
## month9 0.0283347 0.0150461 1.883 0.06028 .
## month10 0.0341964 0.0127897 2.674 0.00776 **
## month11 -0.0222042 0.0105440 -2.106 0.03573 *
## month12 NA NA NA NA
## h_chris -0.1019405 0.0237014 -4.301 2.06e-05 ***
## h_newyr 0.2240793 0.0227899 9.832 < 2e-16 ***
## h_easter -0.0416665 0.0148264 -2.810 0.00515 **
## h_july4 0.0351880 0.0206820 1.701 0.08952 .
## h_mem -0.0143619 0.0145744 -0.985 0.32491
## h_labor 0.0261129 0.0145254 1.798 0.07285 .
## w_rain -0.0338525 0.0130770 -2.589 0.00993 **
## w_snow -0.0671544 0.0302514 -2.220 0.02689 *
## w_maxa 0.1091570 0.0137659 7.929 1.56e-14 ***
## w_maxb 0.1058051 0.0187910 5.631 3.06e-08 ***
## w_maxc 0.0266674 0.0709251 0.376 0.70709
## w_mina -0.3241934 0.0401079 -8.083 5.18e-15 ***
## w_minb -0.1693078 0.0275576 -6.144 1.69e-09 ***
## w_minc -0.1267221 0.0170689 -7.424 5.21e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04279 on 480 degrees of freedom
## Multiple R-squared: 0.9957, Adjusted R-squared: 0.9954
## F-statistic: 3180 on 35 and 480 DF, p-value: < 2.2e-16
The coefficient of attend_v_pred is -0.00110 (3sf) and is statistically insignificant at the 5% level of significance as its p-value is > 0.05 at 0.236 (3sf). This means changes to the attendance of violent movies is not associated with the number of assaults. Compared to the model in (a) the coefficient decreased from -0.000850 to -0.00110. However, both coefficients are statistically insignificant at the 5% level of significance. We do not have statistically significant evident to support that attending violent movies lead to changes in the number of assaults.
iv1 <- ivreg(ln_assualts ~ attend_v_pred + year1 + year2 + year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10 + month1 + month2 + month3 + month4 + month5 + month6 + month7 + month8 + month9 + month10 + month11 + month12 + h_chris + h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc | attend_v_pred + year1 + year2 + year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10 + month1 + month2 + month3 + month4 + month5 + month6 + month7 + month8 + month9 + month10 + month11 + month12 + h_chris + h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, data = movies)
summary(iv1)
##
## Call:
## ivreg(formula = ln_assualts ~ attend_v_pred + year1 + year2 +
## year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10 +
## month1 + month2 + month3 + month4 + month5 + month6 + month7 +
## month8 + month9 + month10 + month11 + month12 + h_chris +
## h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain +
## w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc |
## attend_v_pred + year1 + year2 + year3 + year4 + year5 + year6 +
## year7 + year8 + year9 + year10 + month1 + month2 + month3 +
## month4 + month5 + month6 + month7 + month8 + month9 +
## month10 + month11 + month12 + h_chris + h_newyr + h_easter +
## h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa +
## w_maxb + w_maxc + w_mina + w_minb + w_minc, data = movies)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3166147 -0.0249645 0.0009334 0.0244127 0.1977610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.9117534 0.0149398 596.512 < 2e-16 ***
## attend_v_pred -0.0010973 0.0009256 -1.185 0.23641
## year1 -2.0612741 0.0088976 -231.666 < 2e-16 ***
## year2 -1.3654345 0.0088503 -154.282 < 2e-16 ***
## year3 -1.0527505 0.0087181 -120.754 < 2e-16 ***
## year4 -0.8450896 0.0090200 -93.691 < 2e-16 ***
## year5 -0.6795975 0.0092459 -73.503 < 2e-16 ***
## year6 -0.3806990 0.0088430 -43.051 < 2e-16 ***
## year7 -0.2335642 0.0087525 -26.685 < 2e-16 ***
## year8 -0.1799484 0.0090420 -19.901 < 2e-16 ***
## year9 -0.1241273 0.0086707 -14.316 < 2e-16 ***
## month1 0.0286646 0.0100926 2.840 0.00470 **
## month2 0.0238306 0.0101770 2.342 0.01961 *
## month3 0.0469034 0.0102810 4.562 6.44e-06 ***
## month4 0.0395086 0.0124489 3.174 0.00160 **
## month5 0.0353633 0.0145404 2.432 0.01538 *
## month6 -0.0094627 0.0157390 -0.601 0.54797
## month7 -0.0198699 0.0177232 -1.121 0.26280
## month8 -0.0125905 0.0170124 -0.740 0.45961
## month9 0.0283347 0.0150461 1.883 0.06028 .
## month10 0.0341964 0.0127897 2.674 0.00776 **
## month11 -0.0222042 0.0105440 -2.106 0.03573 *
## h_chris -0.1019405 0.0237014 -4.301 2.06e-05 ***
## h_newyr 0.2240793 0.0227899 9.832 < 2e-16 ***
## h_easter -0.0416665 0.0148264 -2.810 0.00515 **
## h_july4 0.0351880 0.0206820 1.701 0.08952 .
## h_mem -0.0143619 0.0145744 -0.985 0.32491
## h_labor 0.0261129 0.0145254 1.798 0.07285 .
## w_rain -0.0338525 0.0130770 -2.589 0.00993 **
## w_snow -0.0671544 0.0302514 -2.220 0.02689 *
## w_maxa 0.1091570 0.0137659 7.929 1.56e-14 ***
## w_maxb 0.1058051 0.0187910 5.631 3.06e-08 ***
## w_maxc 0.0266674 0.0709251 0.376 0.70709
## w_mina -0.3241934 0.0401079 -8.083 5.18e-15 ***
## w_minb -0.1693078 0.0275576 -6.144 1.69e-09 ***
## w_minc -0.1267221 0.0170689 -7.424 5.21e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04279 on 480 degrees of freedom
## Multiple R-Squared: 0.9957, Adjusted R-squared: 0.9954
## Wald test: 3180 on 35 and 480 DF, p-value: < 2.2e-16
The coefficient of attend_v_pred is -0.00110 (3sf) and statistically insignificant at the 5% level of significance as its p-value is > 0.05 at 0.236 (3sf). Compared to the model in (a), the coefficient value changes from -0.000850 to -0.00110, while both coefficients remain statistically insignificant at the 5% level of significance as both p-values > 0.05.
Compared to the model in (c), both coefficient and p-value remains the same as they are the same models and are calculated the same way.