Setup

library(pacman)
p_load(tidyverse, lmtest)

df <- read_csv("/Users/jenniputz/Downloads/004-data.csv")
## Parsed with column specification:
## cols(
##   t = col_double(),
##   date = col_date(format = ""),
##   year = col_double(),
##   month = col_double(),
##   price_electricity = col_double(),
##   price_coal = col_double(),
##   price_gas = col_double()
## )

Testing for Autocorrelation

reg <- lm(price_coal ~ lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal, 3) + 
            price_gas + lag(price_gas, 1) + lag(price_gas, 2) + lag(price_gas, 3), data = df)
summary(reg)
## 
## Call:
## lm(formula = price_coal ~ lag(price_coal, 1) + lag(price_coal, 
##     2) + lag(price_coal, 3) + price_gas + lag(price_gas, 1) + 
##     lag(price_gas, 2) + lag(price_gas, 3), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.43058 -0.25740  0.01472  0.24264  2.66523 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.61998    0.46831   1.324    0.188    
## lag(price_coal, 1)  1.02146    0.08414  12.141   <2e-16 ***
## lag(price_coal, 2) -0.16170    0.11945  -1.354    0.178    
## lag(price_coal, 3)  0.02962    0.08456   0.350    0.727    
## price_gas           0.11570    0.17135   0.675    0.501    
## lag(price_gas, 1)   0.02901    0.24703   0.117    0.907    
## lag(price_gas, 2)   0.17311    0.22156   0.781    0.436    
## lag(price_gas, 3)  -0.18463    0.14169  -1.303    0.195    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6509 on 127 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.8495, Adjusted R-squared:  0.8412 
## F-statistic: 102.4 on 7 and 127 DF,  p-value: < 2.2e-16

Next, get the residuals - use 3 NAs because we have 3rd order lags.

df$e <- c(NA, NA, NA, residuals(reg))

Next run the Breusch-Godfrey test. We will check for third-order autocorrelation.

bg_reg <- lm(e ~ price_coal + lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal, 3) + 
            price_gas + lag(price_gas, 1) + lag(price_gas, 2) + lag(price_gas, 3) +
            lag(e, 1) + lag(e, 2) + lag(e, 3), data = df)

summary(bg_reg)
## 
## Call:
## lm(formula = e ~ price_coal + lag(price_coal, 1) + lag(price_coal, 
##     2) + lag(price_coal, 3) + price_gas + lag(price_gas, 1) + 
##     lag(price_gas, 2) + lag(price_gas, 3) + lag(e, 1) + lag(e, 
##     2) + lag(e, 3), data = df)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.108e-14 -3.744e-16  4.470e-17  4.286e-16  4.630e-15 
## 
## Coefficients:
##                      Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)        -6.200e-01  1.908e-15 -3.249e+14  < 2e-16 ***
## price_coal          1.000e+00  2.064e-16  4.845e+15  < 2e-16 ***
## lag(price_coal, 1) -1.021e+00  1.914e-15 -5.338e+14  < 2e-16 ***
## lag(price_coal, 2)  1.617e-01  2.817e-15  5.741e+13  < 2e-16 ***
## lag(price_coal, 3) -2.962e-02  1.457e-15 -2.033e+13  < 2e-16 ***
## price_gas          -1.157e-01  3.706e-16 -3.122e+14  < 2e-16 ***
## lag(price_gas, 1)  -2.901e-02  5.663e-16 -5.122e+13  < 2e-16 ***
## lag(price_gas, 2)  -1.731e-01  5.825e-16 -2.972e+14  < 2e-16 ***
## lag(price_gas, 3)   1.846e-01  3.828e-16  4.823e+14  < 2e-16 ***
## lag(e, 1)          -3.933e-15  1.963e-15 -2.003e+00  0.04738 *  
## lag(e, 2)          -9.123e-16  1.589e-15 -5.740e-01  0.56690    
## lag(e, 3)          -9.956e-16  3.340e-16 -2.981e+00  0.00348 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.329e-15 on 120 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 2.373e+30 on 11 and 120 DF,  p-value: < 2.2e-16

We dont want the lag(e, n) coefficients to be statistically significant because then there is a relationship between the lagged residuals and the residuals (this is autocorrelation!). Can check it with the waldtest function we used in lab.

p_load(lmtest)
#waldtest(bg_reg, c("lag(e, 1)", "lag(e, 2)", "lag(e, 3)"))

# NOTE: if the waldtest function doesn't work for you (like it didn't for me), try using this one from the survey package:
p_load(survey)
regTermTest(bg_reg, c("lag(e, 1)", "lag(e, 2)", "lag(e, 3)"), null=NULL,df=NULL, method=c("Wald"))
## Wald test for lag(e, 1) lag(e, 2) lag(e, 3)
##  in lm(formula = e ~ price_coal + lag(price_coal, 1) + lag(price_coal, 
##     2) + lag(price_coal, 3) + price_gas + lag(price_gas, 1) + 
##     lag(price_gas, 2) + lag(price_gas, 3) + lag(e, 1) + lag(e, 
##     2) + lag(e, 3), data = df)
## F =  9.502205  on  3  and  120  df: p= 1.1123e-05
# you can find the p-value in the bottom right corner of this output

Remember: \(H_0: r_1 = r_2 = r_3 = 0\) so we reject the null hypotheis and conclude that at least one of the coefficients on the lagged residuals is not equal to zero.