1 data management

dta8 <- read.table("https://gattonweb.uky.edu/sheather/book/docs/datasets/boxoffice.txt", header = T)

str(dta8)

## 'data.frame':    32 obs. of  2 variables:
##  $ GrossBoxOffice: num  95.3 86.4 119.4 124.4 154.2 ...
##  $ year          : int  1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 ...

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

dta8 <- dta8 %>% 
  mutate(Year_75 = 1:32 ) %>%
  rename(Box_Office = GrossBoxOffice)

str(dta8)

## 'data.frame':    32 obs. of  3 variables:
##  $ Box_Office: num  95.3 86.4 119.4 124.4 154.2 ...
##  $ year      : int  1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 ...
##  $ Year_75   : int  1 2 3 4 5 6 7 8 9 10 ...

2 Model:

2.1 Box_office_t = β0 + β1 × Year_75t + εt,t = 1, …, 32,where εt ~ N(0, σ).

m4 <- lm(formula = Box_Office ~ Year_75, data = dta8)

summary(m4)

## 
## Call:
## lm(formula = Box_Office ~ Year_75, data = dta8)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -116.382  -79.197    6.083   62.260  121.697 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -55.931     28.034  -1.995   0.0552 .  
## Year_75       29.534      1.483  19.919   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 77.44 on 30 degrees of freedom
## Multiple R-squared:  0.9297, Adjusted R-squared:  0.9274 
## F-statistic: 396.8 on 1 and 30 DF,  p-value: < 2.2e-16

knitr::kable(summary(m4)$coefficients)

	Estimate	Std. Error	t value	Pr(>\|t\|)
(Intercept)	-55.93105	28.034364	-1.995089	0.055186
Year_75	29.53446	1.482698	19.919403	0.000000

library(nlme)

## 
## Attaching package: 'nlme'

## The following object is masked from 'package:dplyr':
## 
##     collapse

knitr::kable(as.data.frame(summary(m4g <- gls(Box_Office ~ Year_75, data = dta8))$tTable))

	Value	Std.Error	t-value	p-value
(Intercept)	-55.93105	28.034364	-1.995089	0.055186
Year_75	29.53446	1.482698	19.919403	0.000000

dta8m1 <- dta8 %>%
  mutate(res0 = resid(m4g, type = "normalized"))

lmtest::dwtest(res0 ~ year, data = dta8m1)

## 
##  Durbin-Watson test
## 
## data:  res0 ~ year
## DW = 0.24809, p-value = 2.344e-13
## alternative hypothesis: true autocorrelation is greater than 0

library(languageR)

acf(resid(m4g, type = "normalized"), main = "LM", ylim = c(-1, 1))

3 Model:

3.1 Box_office_t = β0 + β1 × Year_75t + εt, t = 1, …, 32,where εt = εt-1 + νt, and νt ~ N(0, σ).

m5 <- gls(Box_Office ~ Year_75, data = dta8, method = "ML",correlation=corAR1(form = ~ Year_75))

summary(m5)

## Generalized least squares fit by maximum likelihood
##   Model: Box_Office ~ Year_75 
##   Data: dta8 
##        AIC      BIC    logLik
##   330.3893 336.2522 -161.1947
## 
## Correlation Structure: AR(1)
##  Formula: ~Year_75 
##  Parameter estimate(s):
##       Phi 
## 0.8782065 
## 
## Coefficients:
##                 Value Std.Error  t-value p-value
## (Intercept)  4.514082  72.74393 0.062054  0.9509
## Year_75     27.075395   3.44766 7.853259  0.0000
## 
##  Correlation: 
##         (Intr)
## Year_75 -0.782
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.93420837 -1.38592020  0.01822281  0.33202594  1.54269773 
## 
## Residual standard error: 76.16492 
## Degrees of freedom: 32 total; 30 residual

library(ggplot2)

ggplot(dta8, aes(Year_75, Box_Office)) +
  geom_point(aes(Year_75, Box_Office), color = "blue") +
  stat_smooth(method = "lm", se = F, size = rel(1), lty = 2, color = "red") +
  labs(x = "Year since 1975", y = "Gross Box Office (in million $)") +
  theme(legend.position = "NONE")+
  theme_bw()

## `geom_smooth()` using formula 'y ~ x'

dta9 <- dta8 %>%
  mutate(res1 = resid(m5, type = "normalized"))

ggplot(dta9, aes(Year_75, res1)) +
  geom_point(aes(Year_75, res1), color = "blue") +
  geom_hline(yintercept = 0, size = rel(1), color = "red")+
  labs(x = "Year since 1975", y = "Standarized Residuals") +
  theme(legend.position = "NONE")+
  theme_bw()

lmtest::dwtest(res1 ~ year, data = dta9)

## 
##  Durbin-Watson test
## 
## data:  res1 ~ year
## DW = 2.2012, p-value = 0.6495
## alternative hypothesis: true autocorrelation is greater than 0

acf(resid(m5, type = "normalized"), main = "GLS", ylim = c(-1, 1))

In class Exercise 4 AFC: box office

Hank Lee

2020-09-25

1 data management

2 Model:

2.1 Box_office_t = β0 + β1 × Year_75t + εt,t = 1, …, 32,where εt ~ N(0, σ).

3 Model:

3.1 Box_office_t = β0 + β1 × Year_75t + εt, t = 1, …, 32,where εt = εt-1 + νt, and νt ~ N(0, σ).