dta8 <- read.table("https://gattonweb.uky.edu/sheather/book/docs/datasets/boxoffice.txt", header = T)
str(dta8)
## 'data.frame': 32 obs. of 2 variables:
## $ GrossBoxOffice: num 95.3 86.4 119.4 124.4 154.2 ...
## $ year : int 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dta8 <- dta8 %>%
mutate(Year_75 = 1:32 ) %>%
rename(Box_Office = GrossBoxOffice)
str(dta8)
## 'data.frame': 32 obs. of 3 variables:
## $ Box_Office: num 95.3 86.4 119.4 124.4 154.2 ...
## $ year : int 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 ...
## $ Year_75 : int 1 2 3 4 5 6 7 8 9 10 ...
m4 <- lm(formula = Box_Office ~ Year_75, data = dta8)
summary(m4)
##
## Call:
## lm(formula = Box_Office ~ Year_75, data = dta8)
##
## Residuals:
## Min 1Q Median 3Q Max
## -116.382 -79.197 6.083 62.260 121.697
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -55.931 28.034 -1.995 0.0552 .
## Year_75 29.534 1.483 19.919 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 77.44 on 30 degrees of freedom
## Multiple R-squared: 0.9297, Adjusted R-squared: 0.9274
## F-statistic: 396.8 on 1 and 30 DF, p-value: < 2.2e-16
knitr::kable(summary(m4)$coefficients)
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | -55.93105 | 28.034364 | -1.995089 | 0.055186 |
| Year_75 | 29.53446 | 1.482698 | 19.919403 | 0.000000 |
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
knitr::kable(as.data.frame(summary(m4g <- gls(Box_Office ~ Year_75, data = dta8))$tTable))
| Value | Std.Error | t-value | p-value | |
|---|---|---|---|---|
| (Intercept) | -55.93105 | 28.034364 | -1.995089 | 0.055186 |
| Year_75 | 29.53446 | 1.482698 | 19.919403 | 0.000000 |
dta8m1 <- dta8 %>%
mutate(res0 = resid(m4g, type = "normalized"))
lmtest::dwtest(res0 ~ year, data = dta8m1)
##
## Durbin-Watson test
##
## data: res0 ~ year
## DW = 0.24809, p-value = 2.344e-13
## alternative hypothesis: true autocorrelation is greater than 0
library(languageR)
acf(resid(m4g, type = "normalized"), main = "LM", ylim = c(-1, 1))
m5 <- gls(Box_Office ~ Year_75, data = dta8, method = "ML",correlation=corAR1(form = ~ Year_75))
summary(m5)
## Generalized least squares fit by maximum likelihood
## Model: Box_Office ~ Year_75
## Data: dta8
## AIC BIC logLik
## 330.3893 336.2522 -161.1947
##
## Correlation Structure: AR(1)
## Formula: ~Year_75
## Parameter estimate(s):
## Phi
## 0.8782065
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 4.514082 72.74393 0.062054 0.9509
## Year_75 27.075395 3.44766 7.853259 0.0000
##
## Correlation:
## (Intr)
## Year_75 -0.782
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.93420837 -1.38592020 0.01822281 0.33202594 1.54269773
##
## Residual standard error: 76.16492
## Degrees of freedom: 32 total; 30 residual
library(ggplot2)
ggplot(dta8, aes(Year_75, Box_Office)) +
geom_point(aes(Year_75, Box_Office), color = "blue") +
stat_smooth(method = "lm", se = F, size = rel(1), lty = 2, color = "red") +
labs(x = "Year since 1975", y = "Gross Box Office (in million $)") +
theme(legend.position = "NONE")+
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
dta9 <- dta8 %>%
mutate(res1 = resid(m5, type = "normalized"))
ggplot(dta9, aes(Year_75, res1)) +
geom_point(aes(Year_75, res1), color = "blue") +
geom_hline(yintercept = 0, size = rel(1), color = "red")+
labs(x = "Year since 1975", y = "Standarized Residuals") +
theme(legend.position = "NONE")+
theme_bw()
lmtest::dwtest(res1 ~ year, data = dta9)
##
## Durbin-Watson test
##
## data: res1 ~ year
## DW = 2.2012, p-value = 0.6495
## alternative hypothesis: true autocorrelation is greater than 0
acf(resid(m5, type = "normalized"), main = "GLS", ylim = c(-1, 1))