library(pacman)
p_load(tidyverse, lmtest)
df <- read_csv("/Users/jenniputz/Downloads/004-data.csv")
## Parsed with column specification:
## cols(
## t = col_double(),
## date = col_date(format = ""),
## year = col_double(),
## month = col_double(),
## price_electricity = col_double(),
## price_coal = col_double(),
## price_gas = col_double()
## )
reg <- lm(price_coal ~ lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal, 3) +
price_gas + lag(price_gas, 1) + lag(price_gas, 2) + lag(price_gas, 3), data = df)
summary(reg)
##
## Call:
## lm(formula = price_coal ~ lag(price_coal, 1) + lag(price_coal,
## 2) + lag(price_coal, 3) + price_gas + lag(price_gas, 1) +
## lag(price_gas, 2) + lag(price_gas, 3), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.43058 -0.25740 0.01472 0.24264 2.66523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.61998 0.46831 1.324 0.188
## lag(price_coal, 1) 1.02146 0.08414 12.141 <2e-16 ***
## lag(price_coal, 2) -0.16170 0.11945 -1.354 0.178
## lag(price_coal, 3) 0.02962 0.08456 0.350 0.727
## price_gas 0.11570 0.17135 0.675 0.501
## lag(price_gas, 1) 0.02901 0.24703 0.117 0.907
## lag(price_gas, 2) 0.17311 0.22156 0.781 0.436
## lag(price_gas, 3) -0.18463 0.14169 -1.303 0.195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6509 on 127 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8495, Adjusted R-squared: 0.8412
## F-statistic: 102.4 on 7 and 127 DF, p-value: < 2.2e-16
Next, get the residuals - use 3 NAs because we have 3rd order lags.
df$e <- c(NA, NA, NA, residuals(reg))
Next run the Breusch-Godfrey test. We will check for third-order autocorrelation.
bg_reg <- lm(e ~ price_coal + lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal, 3) +
price_gas + lag(price_gas, 1) + lag(price_gas, 2) + lag(price_gas, 3) +
lag(e, 1) + lag(e, 2) + lag(e, 3), data = df)
summary(bg_reg)
##
## Call:
## lm(formula = e ~ price_coal + lag(price_coal, 1) + lag(price_coal,
## 2) + lag(price_coal, 3) + price_gas + lag(price_gas, 1) +
## lag(price_gas, 2) + lag(price_gas, 3) + lag(e, 1) + lag(e,
## 2) + lag(e, 3), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.108e-14 -3.744e-16 4.470e-17 4.286e-16 4.630e-15
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.200e-01 1.908e-15 -3.249e+14 < 2e-16 ***
## price_coal 1.000e+00 2.064e-16 4.845e+15 < 2e-16 ***
## lag(price_coal, 1) -1.021e+00 1.914e-15 -5.338e+14 < 2e-16 ***
## lag(price_coal, 2) 1.617e-01 2.817e-15 5.741e+13 < 2e-16 ***
## lag(price_coal, 3) -2.962e-02 1.457e-15 -2.033e+13 < 2e-16 ***
## price_gas -1.157e-01 3.706e-16 -3.122e+14 < 2e-16 ***
## lag(price_gas, 1) -2.901e-02 5.663e-16 -5.122e+13 < 2e-16 ***
## lag(price_gas, 2) -1.731e-01 5.825e-16 -2.972e+14 < 2e-16 ***
## lag(price_gas, 3) 1.846e-01 3.828e-16 4.823e+14 < 2e-16 ***
## lag(e, 1) -3.933e-15 1.963e-15 -2.003e+00 0.04738 *
## lag(e, 2) -9.123e-16 1.589e-15 -5.740e-01 0.56690
## lag(e, 3) -9.956e-16 3.340e-16 -2.981e+00 0.00348 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.329e-15 on 120 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 2.373e+30 on 11 and 120 DF, p-value: < 2.2e-16
We dont want the lag(e, n) coefficients to be statistically significant because then there is a relationship between the lagged residuals and the residuals (this is autocorrelation!). Can check it with the waldtest
function we used in lab.
p_load(lmtest)
#waldtest(bg_reg, c("lag(e, 1)", "lag(e, 2)", "lag(e, 3)"))
# NOTE: if the waldtest function doesn't work for you (like it didn't for me), try using this one from the survey package:
p_load(survey)
regTermTest(bg_reg, c("lag(e, 1)", "lag(e, 2)", "lag(e, 3)"), null=NULL,df=NULL, method=c("Wald"))
## Wald test for lag(e, 1) lag(e, 2) lag(e, 3)
## in lm(formula = e ~ price_coal + lag(price_coal, 1) + lag(price_coal,
## 2) + lag(price_coal, 3) + price_gas + lag(price_gas, 1) +
## lag(price_gas, 2) + lag(price_gas, 3) + lag(e, 1) + lag(e,
## 2) + lag(e, 3), data = df)
## F = 9.502205 on 3 and 120 df: p= 1.1123e-05
# you can find the p-value in the bottom right corner of this output
Remember: \(H_0: r_1 = r_2 = r_3 = 0\) so we reject the null hypotheis and conclude that at least one of the coefficients on the lagged residuals is not equal to zero.