library(pacman)
p_load(tidyverse, lmtest)
## also installing the dependency 'zoo'
##
## The downloaded binary packages are in
## /var/folders/30/xcfr30vj55q2bbs6dlm27fqw0000gn/T//Rtmp4AaN4l/downloaded_packages
##
## lmtest installed
df <- read_csv("/Users/jenniputz/Downloads/004-data.csv")
## Parsed with column specification:
## cols(
## t = col_double(),
## date = col_date(format = ""),
## year = col_double(),
## month = col_double(),
## price_electricity = col_double(),
## price_coal = col_double(),
## price_gas = col_double()
## )
reg <- lm(price_coal ~ lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal, 3) +
price_gas + lag(price_gas, 1) + lag(price_gas, 2) + lag(price_gas, 3), data = df)
summary(reg)
##
## Call:
## lm(formula = price_coal ~ lag(price_coal, 1) + lag(price_coal,
## 2) + lag(price_coal, 3) + price_gas + lag(price_gas, 1) +
## lag(price_gas, 2) + lag(price_gas, 3), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.43058 -0.25740 0.01472 0.24264 2.66523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.61998 0.46831 1.324 0.188
## lag(price_coal, 1) 1.02146 0.08414 12.141 <2e-16 ***
## lag(price_coal, 2) -0.16170 0.11945 -1.354 0.178
## lag(price_coal, 3) 0.02962 0.08456 0.350 0.727
## price_gas 0.11570 0.17135 0.675 0.501
## lag(price_gas, 1) 0.02901 0.24703 0.117 0.907
## lag(price_gas, 2) 0.17311 0.22156 0.781 0.436
## lag(price_gas, 3) -0.18463 0.14169 -1.303 0.195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6509 on 127 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8495, Adjusted R-squared: 0.8412
## F-statistic: 102.4 on 7 and 127 DF, p-value: < 2.2e-16
Next, get the residuals - use 3 NAs because we have 3rd order lags.
df$e <- c(NA, NA, NA, residuals(reg))
Next run the Breusch-Godfrey test. We will check for third-order autocorrelation.
bg_reg <- lm(e ~ lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal, 3) +
price_gas + lag(price_gas, 1) + lag(price_gas, 2) + lag(price_gas, 3) +
lag(e, 1) + lag(e, 2) + lag(e, 3), data = df)
summary(bg_reg)
##
## Call:
## lm(formula = e ~ lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal,
## 3) + price_gas + lag(price_gas, 1) + lag(price_gas, 2) +
## lag(price_gas, 3) + lag(e, 1) + lag(e, 2) + lag(e, 3), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.32892 -0.28103 0.03465 0.28504 1.81211
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.20528 0.83722 0.245 0.807
## lag(price_coal, 1) -0.39502 0.84092 -0.470 0.639
## lag(price_coal, 2) 0.87240 1.23899 0.704 0.483
## lag(price_coal, 3) -0.46207 0.64063 -0.721 0.472
## price_gas 0.21111 0.16052 1.315 0.191
## lag(price_gas, 1) -0.14128 0.24922 -0.567 0.572
## lag(price_gas, 2) 0.20778 0.25423 0.817 0.415
## lag(price_gas, 3) -0.35219 0.16139 -2.182 0.031 *
## lag(e, 1) 0.37563 0.86409 0.435 0.665
## lag(e, 2) -0.34113 0.69916 -0.488 0.626
## lag(e, 3) 0.03102 0.14709 0.211 0.833
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5852 on 121 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1008, Adjusted R-squared: 0.02649
## F-statistic: 1.357 on 10 and 121 DF, p-value: 0.2087
We dont want the lag(e, n) coefficients to be statistically significant because then there is a relationship between the lagged residuals and the residuals (this is autocorrelation!). Can check it with the waldtest
function we used in lab.
p_load(lmtest)
#waldtest(bg_reg, c("lag(e, 1)", "lag(e, 2)", "lag(e, 3)"))
# NOTE: if the waldtest function doesn't work for you (like it didn't for me), try using this one from the survey package:
p_load(survey)
## also installing the dependencies 'minqa', 'mitools'
##
## The downloaded binary packages are in
## /var/folders/30/xcfr30vj55q2bbs6dlm27fqw0000gn/T//Rtmp4AaN4l/downloaded_packages
##
## survey installed
regTermTest(bg_reg, c("lag(e, 1)", "lag(e, 2)", "lag(e, 3)"), null=NULL,df=NULL, method=c("Wald"))
## Wald test for lag(e, 1) lag(e, 2) lag(e, 3)
## in lm(formula = e ~ lag(price_coal, 1) + lag(price_coal, 2) + lag(price_coal,
## 3) + price_gas + lag(price_gas, 1) + lag(price_gas, 2) +
## lag(price_gas, 3) + lag(e, 1) + lag(e, 2) + lag(e, 3), data = df)
## F = 0.4910438 on 3 and 121 df: p= 0.68915
# you can find the p-value in the bottom right corner of this output
Remember: \(H_0: r_1 = r_2 = r_3 = 0\) so we reject the null hypotheis and conclude that at least one of the coefficients on the lagged residuals is not equal to zero.