library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(AER)
## Warning: package 'AER' was built under R version 4.3.3
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
movies <- read_excel("GA6-movies.xlsx")
movies$ln_assualts <- log(movies$assaults)
lm1 <- lm(ln_assualts ~ attend_v + year1 + year2 + year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10
          + month1 + month2 + month3 + month4 + month5 + month6 + month7 + month8 + month9 + month10 + month11 + month12 + h_chris + h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, data = movies)
summary(lm1)
## 
## Call:
## lm(formula = ln_assualts ~ attend_v + year1 + year2 + year3 + 
##     year4 + year5 + year6 + year7 + year8 + year9 + year10 + 
##     month1 + month2 + month3 + month4 + month5 + month6 + month7 + 
##     month8 + month9 + month10 + month11 + month12 + h_chris + 
##     h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + 
##     w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, 
##     data = movies)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31675 -0.02527  0.00034  0.02463  0.19730 
## 
## Coefficients: (2 not defined because of singularities)
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  8.9113127  0.0149406  596.448  < 2e-16 ***
## attend_v    -0.0008530  0.0008919   -0.956  0.33936    
## year1       -2.0611838  0.0089017 -231.551  < 2e-16 ***
## year2       -1.3653260  0.0088541 -154.203  < 2e-16 ***
## year3       -1.0528143  0.0087223 -120.703  < 2e-16 ***
## year4       -0.8449344  0.0090232  -93.640  < 2e-16 ***
## year5       -0.6795645  0.0092505  -73.462  < 2e-16 ***
## year6       -0.3806795  0.0088475  -43.027  < 2e-16 ***
## year7       -0.2333885  0.0087551  -26.657  < 2e-16 ***
## year8       -0.1795589  0.0090378  -19.867  < 2e-16 ***
## year9       -0.1241146  0.0086751  -14.307  < 2e-16 ***
## year10              NA         NA       NA       NA    
## month1       0.0286075  0.0100976    2.833  0.00480 ** 
## month2       0.0235822  0.0101790    2.317  0.02094 *  
## month3       0.0466878  0.0102839    4.540 7.13e-06 ***
## month4       0.0394186  0.0124549    3.165  0.00165 ** 
## month5       0.0354012  0.0145478    2.433  0.01532 *  
## month6      -0.0096040  0.0157464   -0.610  0.54221    
## month7      -0.0204863  0.0177211   -1.156  0.24824    
## month8      -0.0129689  0.0170167   -0.762  0.44636    
## month9       0.0281646  0.0150528    1.871  0.06195 .  
## month10      0.0338791  0.0127922    2.648  0.00835 ** 
## month11     -0.0225111  0.0105447   -2.135  0.03328 *  
## month12             NA         NA       NA       NA    
## h_chris     -0.1020299  0.0237133   -4.303 2.05e-05 ***
## h_newyr      0.2240654  0.0228015    9.827  < 2e-16 ***
## h_easter    -0.0414692  0.0148326   -2.796  0.00538 ** 
## h_july4      0.0353795  0.0206916    1.710  0.08794 .  
## h_mem       -0.0144231  0.0145817   -0.989  0.32310    
## h_labor      0.0261436  0.0145328    1.799  0.07266 .  
## w_rain      -0.0339778  0.0130830   -2.597  0.00969 ** 
## w_snow      -0.0674832  0.0302649   -2.230  0.02623 *  
## w_maxa       0.1091011  0.0137728    7.921 1.65e-14 ***
## w_maxb       0.1058065  0.0188005    5.628 3.11e-08 ***
## w_maxc       0.0273434  0.0709579    0.385  0.70015    
## w_mina      -0.3239640  0.0401277   -8.073 5.55e-15 ***
## w_minb      -0.1691211  0.0275710   -6.134 1.79e-09 ***
## w_minc      -0.1269039  0.0170766   -7.431 4.96e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04282 on 480 degrees of freedom
## Multiple R-squared:  0.9957, Adjusted R-squared:  0.9954 
## F-statistic:  3177 on 35 and 480 DF,  p-value: < 2.2e-16

The coefficient of attend_v is -0.000850 (3sf). It is not statistically significant at the 5% level of significance as its p-value is more than 0.05 at 0.339 (3sf). This means we cannot reject the null hypothesis and we can expect that a change in the number of assaults and consequently ln_assault will have no effect on the attendance of strongly violent movies (in millions) ceterus paribus.

Q1b) To evaluate whether pr_attend_v is a valid instrumental variable, we must analyse whether it is relevant and exogenous. For relevance, since pr_attend_v is ased on historical attendance patterns, it is a strong predictor of future attendance for similar movies, hence it is relevant. For exogeneity, pr_attend_v should not be correlated with other determinants of the outcome variable, in this case assaults. Since pr_attend_v is based on historical data and not real-time data on that given weekend, it likely does not impact real-time attendance patterns. Hence it is exogenous. pr_attend_v is a valid instrumental variable.

# model that uses pr_attend_v as an instrument variable to replace attend_v (first stage)
lm2 <- lm(attend_v ~ pr_attend_v + year1 + year2 + year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10 + month1 + month2 + month3 + month4 + month5 + month6 + month7 + month8 + month9 + month10 + month11 + month12 + h_chris + h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, data = movies)
summary(lm2)
## 
## Call:
## lm(formula = attend_v ~ pr_attend_v + year1 + year2 + year3 + 
##     year4 + year5 + year6 + year7 + year8 + year9 + year10 + 
##     month1 + month2 + month3 + month4 + month5 + month6 + month7 + 
##     month8 + month9 + month10 + month11 + month12 + h_chris + 
##     h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + 
##     w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, 
##     data = movies)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3347 -0.2692 -0.0357  0.1963  3.8132 
## 
## Coefficients: (2 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.245537   0.205670   1.194  0.23313    
## pr_attend_v  0.942324   0.012027  78.352  < 2e-16 ***
## year1       -0.324759   0.122595  -2.649  0.00834 ** 
## year2       -0.088763   0.121984  -0.728  0.46718    
## year3       -0.008550   0.120213  -0.071  0.94333    
## year4       -0.011520   0.124362  -0.093  0.92623    
## year5       -0.133097   0.127475  -1.044  0.29696    
## year6       -0.164209   0.121932  -1.347  0.17870    
## year7        0.030623   0.120714   0.254  0.79985    
## year8       -0.062035   0.124548  -0.498  0.61866    
## year9       -0.011700   0.119555  -0.098  0.92208    
## year10             NA         NA      NA       NA    
## month1      -0.116264   0.139201  -0.835  0.40401    
## month2       0.138731   0.140173   0.990  0.32281    
## month3      -0.005235   0.141765  -0.037  0.97056    
## month4      -0.107027   0.171695  -0.623  0.53335    
## month5      -0.114172   0.200481  -0.569  0.56929    
## month6      -0.302956   0.217183  -1.395  0.16368    
## month7       0.055711   0.244284   0.228  0.81970    
## month8       0.237221   0.234339   1.012  0.31190    
## month9       0.201379   0.207369   0.971  0.33198    
## month10     -0.034039   0.176392  -0.193  0.84706    
## month11      0.035045   0.145336   0.241  0.80956    
## month12            NA         NA      NA       NA    
## h_chris     -0.160787   0.326842  -0.492  0.62299    
## h_newyr      0.336696   0.314257   1.071  0.28453    
## h_easter     0.186345   0.204567   0.911  0.36279    
## h_july4     -0.315365   0.285060  -1.106  0.26915    
## h_mem       -0.064963   0.200974  -0.323  0.74666    
## h_labor     -0.012914   0.200282  -0.064  0.94861    
## w_rain      -0.146378   0.180389  -0.811  0.41751    
## w_snow       0.908390   0.416803   2.179  0.02979 *  
## w_maxa      -0.041727   0.189820  -0.220  0.82610    
## w_maxb      -0.036809   0.259099  -0.142  0.88709    
## w_maxc      -0.262417   0.977833  -0.268  0.78853    
## w_mina      -0.832017   0.552898  -1.505  0.13303    
## w_minb      -0.442310   0.379875  -1.164  0.24486    
## w_minc       0.202497   0.235264   0.861  0.38982    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5901 on 480 degrees of freedom
## Multiple R-squared:  0.9375, Adjusted R-squared:  0.9329 
## F-statistic: 205.7 on 35 and 480 DF,  p-value: < 2.2e-16
#second stage
attend_v_pred <- lm2$fitted.values
lm3 <- lm(ln_assualts ~ attend_v_pred + year1 + year2 + year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10 + month1 + month2 + month3 + month4 + month5 + month6 + month7 + month8 + month9 + month10 + month11 + month12 + h_chris + h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, data = movies)
summary(lm3)
## 
## Call:
## lm(formula = ln_assualts ~ attend_v_pred + year1 + year2 + year3 + 
##     year4 + year5 + year6 + year7 + year8 + year9 + year10 + 
##     month1 + month2 + month3 + month4 + month5 + month6 + month7 + 
##     month8 + month9 + month10 + month11 + month12 + h_chris + 
##     h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + 
##     w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, 
##     data = movies)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31661 -0.02496  0.00093  0.02441  0.19776 
## 
## Coefficients: (2 not defined because of singularities)
##                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)    8.9117534  0.0149398  596.512  < 2e-16 ***
## attend_v_pred -0.0010973  0.0009256   -1.185  0.23641    
## year1         -2.0612741  0.0088976 -231.666  < 2e-16 ***
## year2         -1.3654345  0.0088503 -154.282  < 2e-16 ***
## year3         -1.0527505  0.0087181 -120.754  < 2e-16 ***
## year4         -0.8450896  0.0090200  -93.691  < 2e-16 ***
## year5         -0.6795975  0.0092459  -73.503  < 2e-16 ***
## year6         -0.3806990  0.0088430  -43.051  < 2e-16 ***
## year7         -0.2335642  0.0087525  -26.685  < 2e-16 ***
## year8         -0.1799484  0.0090420  -19.901  < 2e-16 ***
## year9         -0.1241273  0.0086707  -14.316  < 2e-16 ***
## year10                NA         NA       NA       NA    
## month1         0.0286646  0.0100926    2.840  0.00470 ** 
## month2         0.0238306  0.0101770    2.342  0.01961 *  
## month3         0.0469034  0.0102810    4.562 6.44e-06 ***
## month4         0.0395086  0.0124489    3.174  0.00160 ** 
## month5         0.0353633  0.0145404    2.432  0.01538 *  
## month6        -0.0094627  0.0157390   -0.601  0.54797    
## month7        -0.0198699  0.0177232   -1.121  0.26280    
## month8        -0.0125905  0.0170124   -0.740  0.45961    
## month9         0.0283347  0.0150461    1.883  0.06028 .  
## month10        0.0341964  0.0127897    2.674  0.00776 ** 
## month11       -0.0222042  0.0105440   -2.106  0.03573 *  
## month12               NA         NA       NA       NA    
## h_chris       -0.1019405  0.0237014   -4.301 2.06e-05 ***
## h_newyr        0.2240793  0.0227899    9.832  < 2e-16 ***
## h_easter      -0.0416665  0.0148264   -2.810  0.00515 ** 
## h_july4        0.0351880  0.0206820    1.701  0.08952 .  
## h_mem         -0.0143619  0.0145744   -0.985  0.32491    
## h_labor        0.0261129  0.0145254    1.798  0.07285 .  
## w_rain        -0.0338525  0.0130770   -2.589  0.00993 ** 
## w_snow        -0.0671544  0.0302514   -2.220  0.02689 *  
## w_maxa         0.1091570  0.0137659    7.929 1.56e-14 ***
## w_maxb         0.1058051  0.0187910    5.631 3.06e-08 ***
## w_maxc         0.0266674  0.0709251    0.376  0.70709    
## w_mina        -0.3241934  0.0401079   -8.083 5.18e-15 ***
## w_minb        -0.1693078  0.0275576   -6.144 1.69e-09 ***
## w_minc        -0.1267221  0.0170689   -7.424 5.21e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04279 on 480 degrees of freedom
## Multiple R-squared:  0.9957, Adjusted R-squared:  0.9954 
## F-statistic:  3180 on 35 and 480 DF,  p-value: < 2.2e-16

The coefficient of attend_v_pred is -0.00110 (3sf) and is statistically insignificant at the 5% level of significance as its p-value is > 0.05 at 0.236 (3sf). This means changes to the attendance of violent movies is not associated with the number of assaults. Compared to the model in (a) the coefficient decreased from -0.000850 to -0.00110. However, both coefficients are statistically insignificant at the 5% level of significance. We do not have statistically significant evident to support that attending violent movies lead to changes in the number of assaults.

iv1 <- ivreg(ln_assualts ~ attend_v_pred + year1 + year2 + year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10 + month1 + month2 + month3 + month4 + month5 + month6 + month7 + month8 + month9 + month10 + month11 + month12 + h_chris + h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc | attend_v_pred + year1 + year2 + year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10 + month1 + month2 + month3 + month4 + month5 + month6 + month7 + month8 + month9 + month10 + month11 + month12 + h_chris + h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc, data = movies)
summary(iv1)
## 
## Call:
## ivreg(formula = ln_assualts ~ attend_v_pred + year1 + year2 + 
##     year3 + year4 + year5 + year6 + year7 + year8 + year9 + year10 + 
##     month1 + month2 + month3 + month4 + month5 + month6 + month7 + 
##     month8 + month9 + month10 + month11 + month12 + h_chris + 
##     h_newyr + h_easter + h_july4 + h_mem + h_labor + w_rain + 
##     w_snow + w_maxa + w_maxb + w_maxc + w_mina + w_minb + w_minc | 
##     attend_v_pred + year1 + year2 + year3 + year4 + year5 + year6 + 
##         year7 + year8 + year9 + year10 + month1 + month2 + month3 + 
##         month4 + month5 + month6 + month7 + month8 + month9 + 
##         month10 + month11 + month12 + h_chris + h_newyr + h_easter + 
##         h_july4 + h_mem + h_labor + w_rain + w_snow + w_maxa + 
##         w_maxb + w_maxc + w_mina + w_minb + w_minc, data = movies)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.3166147 -0.0249645  0.0009334  0.0244127  0.1977610 
## 
## Coefficients:
##                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)    8.9117534  0.0149398  596.512  < 2e-16 ***
## attend_v_pred -0.0010973  0.0009256   -1.185  0.23641    
## year1         -2.0612741  0.0088976 -231.666  < 2e-16 ***
## year2         -1.3654345  0.0088503 -154.282  < 2e-16 ***
## year3         -1.0527505  0.0087181 -120.754  < 2e-16 ***
## year4         -0.8450896  0.0090200  -93.691  < 2e-16 ***
## year5         -0.6795975  0.0092459  -73.503  < 2e-16 ***
## year6         -0.3806990  0.0088430  -43.051  < 2e-16 ***
## year7         -0.2335642  0.0087525  -26.685  < 2e-16 ***
## year8         -0.1799484  0.0090420  -19.901  < 2e-16 ***
## year9         -0.1241273  0.0086707  -14.316  < 2e-16 ***
## month1         0.0286646  0.0100926    2.840  0.00470 ** 
## month2         0.0238306  0.0101770    2.342  0.01961 *  
## month3         0.0469034  0.0102810    4.562 6.44e-06 ***
## month4         0.0395086  0.0124489    3.174  0.00160 ** 
## month5         0.0353633  0.0145404    2.432  0.01538 *  
## month6        -0.0094627  0.0157390   -0.601  0.54797    
## month7        -0.0198699  0.0177232   -1.121  0.26280    
## month8        -0.0125905  0.0170124   -0.740  0.45961    
## month9         0.0283347  0.0150461    1.883  0.06028 .  
## month10        0.0341964  0.0127897    2.674  0.00776 ** 
## month11       -0.0222042  0.0105440   -2.106  0.03573 *  
## h_chris       -0.1019405  0.0237014   -4.301 2.06e-05 ***
## h_newyr        0.2240793  0.0227899    9.832  < 2e-16 ***
## h_easter      -0.0416665  0.0148264   -2.810  0.00515 ** 
## h_july4        0.0351880  0.0206820    1.701  0.08952 .  
## h_mem         -0.0143619  0.0145744   -0.985  0.32491    
## h_labor        0.0261129  0.0145254    1.798  0.07285 .  
## w_rain        -0.0338525  0.0130770   -2.589  0.00993 ** 
## w_snow        -0.0671544  0.0302514   -2.220  0.02689 *  
## w_maxa         0.1091570  0.0137659    7.929 1.56e-14 ***
## w_maxb         0.1058051  0.0187910    5.631 3.06e-08 ***
## w_maxc         0.0266674  0.0709251    0.376  0.70709    
## w_mina        -0.3241934  0.0401079   -8.083 5.18e-15 ***
## w_minb        -0.1693078  0.0275576   -6.144 1.69e-09 ***
## w_minc        -0.1267221  0.0170689   -7.424 5.21e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04279 on 480 degrees of freedom
## Multiple R-Squared: 0.9957,  Adjusted R-squared: 0.9954 
## Wald test:  3180 on 35 and 480 DF,  p-value: < 2.2e-16

The coefficient of attend_v_pred is -0.00110 (3sf) and statistically insignificant at the 5% level of significance as its p-value is > 0.05 at 0.236 (3sf). Compared to the model in (a), the coefficient value changes from -0.000850 to -0.00110, while both coefficients remain statistically insignificant at the 5% level of significance as both p-values > 0.05.

Compared to the model in (c), both coefficient and p-value remains the same as they are the same models and are calculated the same way.