Setup

library(pacman)
p_load(tidyverse, lmtest)
## also installing the dependency 'zoo'
## 
## The downloaded binary packages are in
##  /var/folders/30/xcfr30vj55q2bbs6dlm27fqw0000gn/T//Rtmp4AaN4l/downloaded_packages
## 
## lmtest installed
df <- read_csv("/Users/jenniputz/Downloads/004-data.csv")
## Parsed with column specification:
## cols(
##   t = col_double(),
##   date = col_date(format = ""),
##   year = col_double(),
##   month = col_double(),
##   price_electricity = col_double(),
##   price_coal = col_double(),
##   price_gas = col_double()
## )

Testing for Autocorrelation

reg <- lm(price_coal ~ lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal, 3) + 
            price_gas + lag(price_gas, 1) + lag(price_gas, 2) + lag(price_gas, 3), data = df)
summary(reg)
## 
## Call:
## lm(formula = price_coal ~ lag(price_coal, 1) + lag(price_coal, 
##     2) + lag(price_coal, 3) + price_gas + lag(price_gas, 1) + 
##     lag(price_gas, 2) + lag(price_gas, 3), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.43058 -0.25740  0.01472  0.24264  2.66523 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.61998    0.46831   1.324    0.188    
## lag(price_coal, 1)  1.02146    0.08414  12.141   <2e-16 ***
## lag(price_coal, 2) -0.16170    0.11945  -1.354    0.178    
## lag(price_coal, 3)  0.02962    0.08456   0.350    0.727    
## price_gas           0.11570    0.17135   0.675    0.501    
## lag(price_gas, 1)   0.02901    0.24703   0.117    0.907    
## lag(price_gas, 2)   0.17311    0.22156   0.781    0.436    
## lag(price_gas, 3)  -0.18463    0.14169  -1.303    0.195    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6509 on 127 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.8495, Adjusted R-squared:  0.8412 
## F-statistic: 102.4 on 7 and 127 DF,  p-value: < 2.2e-16

Next, get the residuals - use 3 NAs because we have 3rd order lags.

df$e <- c(NA, NA, NA, residuals(reg))

Next run the Breusch-Godfrey test. We will check for third-order autocorrelation.

bg_reg <- lm(e ~  lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal, 3) + 
            price_gas + lag(price_gas, 1) + lag(price_gas, 2) + lag(price_gas, 3) +
            lag(e, 1) + lag(e, 2) + lag(e, 3), data = df)

summary(bg_reg)
## 
## Call:
## lm(formula = e ~ lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal, 
##     3) + price_gas + lag(price_gas, 1) + lag(price_gas, 2) + 
##     lag(price_gas, 3) + lag(e, 1) + lag(e, 2) + lag(e, 3), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.32892 -0.28103  0.03465  0.28504  1.81211 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         0.20528    0.83722   0.245    0.807  
## lag(price_coal, 1) -0.39502    0.84092  -0.470    0.639  
## lag(price_coal, 2)  0.87240    1.23899   0.704    0.483  
## lag(price_coal, 3) -0.46207    0.64063  -0.721    0.472  
## price_gas           0.21111    0.16052   1.315    0.191  
## lag(price_gas, 1)  -0.14128    0.24922  -0.567    0.572  
## lag(price_gas, 2)   0.20778    0.25423   0.817    0.415  
## lag(price_gas, 3)  -0.35219    0.16139  -2.182    0.031 *
## lag(e, 1)           0.37563    0.86409   0.435    0.665  
## lag(e, 2)          -0.34113    0.69916  -0.488    0.626  
## lag(e, 3)           0.03102    0.14709   0.211    0.833  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5852 on 121 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.1008, Adjusted R-squared:  0.02649 
## F-statistic: 1.357 on 10 and 121 DF,  p-value: 0.2087

We dont want the lag(e, n) coefficients to be statistically significant because then there is a relationship between the lagged residuals and the residuals (this is autocorrelation!). Can check it with the waldtest function we used in lab.

p_load(lmtest)
#waldtest(bg_reg, c("lag(e, 1)", "lag(e, 2)", "lag(e, 3)"))

# NOTE: if the waldtest function doesn't work for you (like it didn't for me), try using this one from the survey package:
p_load(survey)
## also installing the dependencies 'minqa', 'mitools'
## 
## The downloaded binary packages are in
##  /var/folders/30/xcfr30vj55q2bbs6dlm27fqw0000gn/T//Rtmp4AaN4l/downloaded_packages
## 
## survey installed
regTermTest(bg_reg, c("lag(e, 1)", "lag(e, 2)", "lag(e, 3)"), null=NULL,df=NULL, method=c("Wald"))
## Wald test for lag(e, 1) lag(e, 2) lag(e, 3)
##  in lm(formula = e ~ lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal, 
##     3) + price_gas + lag(price_gas, 1) + lag(price_gas, 2) + 
##     lag(price_gas, 3) + lag(e, 1) + lag(e, 2) + lag(e, 3), data = df)
## F =  0.4910438  on  3  and  121  df: p= 0.68915
# you can find the p-value in the bottom right corner of this output

Remember: \(H_0: r_1 = r_2 = r_3 = 0\) so we reject the null hypotheis and conclude that at least one of the coefficients on the lagged residuals is not equal to zero.