The linear regression model is one of the most common methods for identifying and quantifying the relationship between a dependent variable and a single (univariate linear regression) or multiple (multivariate linear regression) independent variables. This mode has a wide range of applications, from causal inference to predictive analysis and, in particular, time series forecasting. The focus of this chapter is on methods and approaches for forecasting time series data with linear regression. That includes methods for decomposing and forecasting the series components (for example, the trend and seasonal patterns), handling special events (such as outliers and holidays), and using external variables as regressors.
library(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.3.3
data("USgas")
ts_plot(USgas, title = "US Monthly Natural Gas consumption", Ytitle = "Billion Cubic Feet", Xtitle = "Year")
ts_info(USgas)
## The USgas series is a ts object with 1 variable and 238 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2019 10
## The USgas series is a ts object with 1 variable and 238 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2019 10
ts_decompose(USgas)
USgas_df <- ts_to_prophet(USgas)
head(USgas_df)
## ds y
## 1 2000-01-01 2510.5
## 2 2000-02-01 2330.7
## 3 2000-03-01 2050.6
## 4 2000-04-01 1783.3
## 5 2000-05-01 1632.9
## 6 2000-06-01 1513.1
## ds y
## 1 2000-01-01 2510.5
## 2 2000-02-01 2330.7
## 3 2000-03-01 2050.6
## 4 2000-04-01 1783.3
## 5 2000-05-01 1632.9
## 6 2000-06-01 1513.1
USgas_df$trend <- 1:nrow(USgas_df)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.3.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
USgas_df$seasonal <- factor(month(USgas_df$ds, label = T), ordered = FALSE)
head(USgas_df)
## ds y trend seasonal
## 1 2000-01-01 2510.5 1 ene
## 2 2000-02-01 2330.7 2 feb
## 3 2000-03-01 2050.6 3 mar
## 4 2000-04-01 1783.3 4 abr
## 5 2000-05-01 1632.9 5 may
## 6 2000-06-01 1513.1 6 jun
## ds y trend seasonal
## 1 2000-01-01 2510.5 1 ene
## 2 2000-02-01 2330.7 2 feb
## 3 2000-03-01 2050.6 3 mar
## 4 2000-04-01 1783.3 4 abr
## 5 2000-05-01 1632.9 5 may
## 6 2000-06-01 1513.1 6 jun
h <- 12
train <- USgas_df[1:(nrow(USgas_df) - h), ]
test <- USgas_df[(nrow(USgas_df) - h + 1):nrow(USgas_df), ]
md_trend <- lm (y ~ trend, data = train)
summary(md_trend)
##
## Call:
## lm(formula = y ~ trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -547.2 -307.4 -153.2 333.1 1052.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1751.0074 52.6435 33.26 < 2e-16 ***
## trend 2.4489 0.4021 6.09 4.86e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 394.4 on 224 degrees of freedom
## Multiple R-squared: 0.1421, Adjusted R-squared: 0.1382
## F-statistic: 37.09 on 1 and 224 DF, p-value: 4.861e-09
##
## Call:
## lm(formula = y ~ trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -547.2 -307.4 -153.2 333.1 1052.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1751.0074 52.6435 33.26 < 2e-16 ***
## trend 2.4489 0.4021 6.09 4.86e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 394.4 on 224 degrees of freedom
## Multiple R-squared: 0.1421, Adjusted R-squared: 0.1382
## F-statistic: 37.09 on 1 and 224 DF, p-value: 4.861e-09
train$yhat <- predict(md_trend, newdata = train)
test$yhat <- predict(md_trend, newdata = test)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_lm <- function(data, train, test, title = NULL) {
p <- plot_ly(data = data,
x = ~ ds,
y = ~ y,
type = "scatter",
mode = "line",
name = "Actual") %>%
add_lines(x = ~ train$ds,
y = ~ train$yhat,
line = list(color = "red"),
name = "Fitted") %>%
add_lines(x = ~ test$ds,
y = ~ test$yhat,
line = list(color = "green", dash = "dot", width = 3),
name = "Forecasted") %>%
layout(title = title,
xaxis = list(title = "Year"),
yaxis = list("Billion Cubic Feet"),
legend = list(x = 0.05, y = 0.95))
return(p)
}
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Trend Component of the Series")
mape_trend <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_trend
## [1] 0.1644088 0.1299951
md_seasonal <- lm (y ~ seasonal, data = train)
summary(md_seasonal)
##
## Call:
## lm(formula = y ~ seasonal, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -608.98 -162.34 -50.77 148.40 566.89
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2774.28 49.75 55.759 < 2e-16 ***
## seasonalfeb -297.92 70.36 -4.234 3.41e-05 ***
## seasonalmar -479.10 70.36 -6.809 9.77e-11 ***
## seasonalabr -905.28 70.36 -12.866 < 2e-16 ***
## seasonalmay -1088.42 70.36 -15.468 < 2e-16 ***
## seasonaljun -1105.49 70.36 -15.711 < 2e-16 ***
## seasonaljul -939.35 70.36 -13.350 < 2e-16 ***
## seasonalago -914.12 70.36 -12.991 < 2e-16 ***
## seasonalsept -1114.74 70.36 -15.843 < 2e-16 ***
## seasonaloct -1022.21 70.36 -14.527 < 2e-16 ***
## seasonalnov -797.53 71.33 -11.180 < 2e-16 ***
## seasonaldic -256.67 71.33 -3.598 0.000398 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 216.9 on 214 degrees of freedom
## Multiple R-squared: 0.7521, Adjusted R-squared: 0.7394
## F-statistic: 59.04 on 11 and 214 DF, p-value: < 2.2e-16
train$yhat <- predict(md_seasonal, newdata = train)
test$yhat <- predict(md_seasonal, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Seasonal Component of the Series")
mape_seasonal <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_seasonal
## [1] 0.08628973 0.21502100
md1 <- lm (y ~ seasonal + trend, data = train)
summary(md1)
##
## Call:
## lm(formula = y ~ seasonal + trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -514.73 -77.17 -17.70 85.80 336.95
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2488.8994 32.6011 76.344 < 2e-16 ***
## seasonalfeb -300.5392 41.4864 -7.244 7.84e-12 ***
## seasonalmar -484.3363 41.4870 -11.674 < 2e-16 ***
## seasonalabr -913.1334 41.4880 -22.010 < 2e-16 ***
## seasonalmay -1098.8884 41.4895 -26.486 < 2e-16 ***
## seasonaljun -1118.5855 41.4913 -26.960 < 2e-16 ***
## seasonaljul -955.0563 41.4936 -23.017 < 2e-16 ***
## seasonalago -932.4482 41.4962 -22.471 < 2e-16 ***
## seasonalsept -1135.6874 41.4993 -27.366 < 2e-16 ***
## seasonaloct -1045.7687 41.5028 -25.198 < 2e-16 ***
## seasonalnov -808.0016 42.0617 -19.210 < 2e-16 ***
## seasonaldic -269.7642 42.0635 -6.413 9.05e-10 ***
## trend 2.6182 0.1305 20.065 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 127.9 on 213 degrees of freedom
## Multiple R-squared: 0.9142, Adjusted R-squared: 0.9094
## F-statistic: 189.2 on 12 and 213 DF, p-value: < 2.2e-16
train$yhat <- predict(md1, newdata = train)
test$yhat <- predict(md1, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Trend and Seasonal Components of the Series")
mape_md1 <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_md1
## [1] 0.04774945 0.09143290
md2 <- lm(y ~ seasonal + trend + I(trend^2), data = train)
summary(md2)
##
## Call:
## lm(formula = y ~ seasonal + trend + I(trend^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -468.47 -54.66 -2.21 63.11 294.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.635e+03 3.224e+01 81.738 < 2e-16 ***
## seasonalfeb -3.004e+02 3.540e+01 -8.487 3.69e-15 ***
## seasonalmar -4.841e+02 3.540e+01 -13.676 < 2e-16 ***
## seasonalabr -9.128e+02 3.540e+01 -25.787 < 2e-16 ***
## seasonalmay -1.099e+03 3.540e+01 -31.033 < 2e-16 ***
## seasonaljun -1.118e+03 3.540e+01 -31.588 < 2e-16 ***
## seasonaljul -9.547e+02 3.540e+01 -26.968 < 2e-16 ***
## seasonalago -9.322e+02 3.541e+01 -26.329 < 2e-16 ***
## seasonalsept -1.136e+03 3.541e+01 -32.070 < 2e-16 ***
## seasonaloct -1.046e+03 3.541e+01 -29.532 < 2e-16 ***
## seasonalnov -8.001e+02 3.590e+01 -22.286 < 2e-16 ***
## seasonaldic -2.618e+02 3.590e+01 -7.293 5.95e-12 ***
## trend -1.270e+00 4.472e-01 -2.840 0.00494 **
## I(trend^2) 1.713e-02 1.908e-03 8.977 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared: 0.9379, Adjusted R-squared: 0.9341
## F-statistic: 246.1 on 13 and 212 DF, p-value: < 2.2e-16
train$yhat <- predict(md2, newdata = train)
test$yhat <- predict(md2, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Trend (Polynomial) and Seasonal Components of the Series")
mape_md2 <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_md2
## [1] 0.03688770 0.04212618
USgas_split <- ts_split(USgas, sample.out = h)
train.ts <- USgas_split$train
test.ts <- USgas_split$test
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
md3 <- tslm(train.ts ~ season + trend + I(trend^2))
summary(md3)
##
## Call:
## tslm(formula = train.ts ~ season + trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -468.47 -54.66 -2.21 63.11 294.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.635e+03 3.224e+01 81.738 < 2e-16 ***
## season2 -3.004e+02 3.540e+01 -8.487 3.69e-15 ***
## season3 -4.841e+02 3.540e+01 -13.676 < 2e-16 ***
## season4 -9.128e+02 3.540e+01 -25.787 < 2e-16 ***
## season5 -1.099e+03 3.540e+01 -31.033 < 2e-16 ***
## season6 -1.118e+03 3.540e+01 -31.588 < 2e-16 ***
## season7 -9.547e+02 3.540e+01 -26.968 < 2e-16 ***
## season8 -9.322e+02 3.541e+01 -26.329 < 2e-16 ***
## season9 -1.136e+03 3.541e+01 -32.070 < 2e-16 ***
## season10 -1.046e+03 3.541e+01 -29.532 < 2e-16 ***
## season11 -8.001e+02 3.590e+01 -22.286 < 2e-16 ***
## season12 -2.618e+02 3.590e+01 -7.293 5.95e-12 ***
## trend -1.270e+00 4.472e-01 -2.840 0.00494 **
## I(trend^2) 1.713e-02 1.908e-03 8.977 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared: 0.9379, Adjusted R-squared: 0.9341
## F-statistic: 246.1 on 13 and 212 DF, p-value: < 2.2e-16
r <- which(USgas_df$ds == as.Date("2014-01-01"))
USgas_df$s_break <- ifelse(year(USgas_df$ds) >= 2010, 1, 0)
USgas_df$s_break[r] <- 1
md3 <- tslm(USgas ~ season + trend + I(trend^2) + s_break, data = USgas_df)
summary(md3)
##
## Call:
## tslm(formula = USgas ~ season + trend + I(trend^2) + s_break,
## data = USgas_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -469.25 -50.68 -2.66 63.63 275.89
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.661e+03 3.200e+01 83.164 < 2e-16 ***
## season2 -3.054e+02 3.448e+01 -8.858 2.61e-16 ***
## season3 -4.849e+02 3.448e+01 -14.062 < 2e-16 ***
## season4 -9.272e+02 3.449e+01 -26.885 < 2e-16 ***
## season5 -1.108e+03 3.449e+01 -32.114 < 2e-16 ***
## season6 -1.127e+03 3.450e+01 -32.660 < 2e-16 ***
## season7 -9.568e+02 3.450e+01 -27.730 < 2e-16 ***
## season8 -9.340e+02 3.451e+01 -27.061 < 2e-16 ***
## season9 -1.138e+03 3.452e+01 -32.972 < 2e-16 ***
## season10 -1.040e+03 3.453e+01 -30.122 < 2e-16 ***
## season11 -7.896e+02 3.497e+01 -22.577 < 2e-16 ***
## season12 -2.649e+02 3.498e+01 -7.571 9.72e-13 ***
## trend -1.928e+00 4.479e-01 -4.304 2.51e-05 ***
## I(trend^2) 1.862e-02 1.676e-03 11.113 < 2e-16 ***
## s_break 6.060e+01 2.836e+01 2.137 0.0337 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109 on 223 degrees of freedom
## Multiple R-squared: 0.9423, Adjusted R-squared: 0.9387
## F-statistic: 260.3 on 14 and 223 DF, p-value: < 2.2e-16
library(UKgrid)
## Warning: package 'UKgrid' was built under R version 4.3.3
UKdaily <- extract_grid(type = "data.frame",
columns = "ND",
aggregate = "daily")
head(UKdaily)
## TIMESTAMP ND
## 1 2005-04-01 1920069
## 2 2005-04-02 1674699
## 3 2005-04-03 1631352
## 4 2005-04-04 1916693
## 5 2005-04-05 1952082
## 6 2005-04-06 1964584
## TIMESTAMP ND
## 1 2005-04-01 1920069
## 2 2005-04-02 1674699
## 3 2005-04-03 1631352
## 4 2005-04-04 1916693
## 5 2005-04-05 1952082
## 6 2005-04-06 1964584
library(TSstudio)
ts_plot(UKdaily,
title = "The UK National Demand for Electricity",
Ytitle = "MW",
Xtitle = "Year")
ts_heatmap(UKdaily[which(year(UKdaily$TIMESTAMP) >= 2016),],
title = "UK the Daily National Grid Demand Heatmap")
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
UKdaily <- UKdaily %>%
mutate(wday = wday(TIMESTAMP, label = TRUE),
month = month(TIMESTAMP, label = TRUE),
lag365 = dplyr::lag(ND, 365)) %>%
filter(!is.na(lag365)) %>%
arrange(TIMESTAMP)
## 'data.frame': 4939 obs. of 5 variables:
## $ TIMESTAMP: Date, format: "2006-04-01" "2006-04-02" ...
## $ ND : int 1718405 1691341 1960325 2023886 2026204 2008422 1981175 1770440 1749715 2012865 ...
## $ wday : Ord.factor w/ 7 levels "dom\\."<"lun\\."<..: 7 1 2 3 4 5 6 7 1 2 ...
## $ month : Ord.factor w/ 12 levels "ene"<"feb"<"mar"<..: 4 4 4 4 4 4 4 4 4 4 ...
## $ lag365 : int 1920069 1674699 1631352 1916693 1952082 1964584 1990895 2003982 1811436 1684720 ...
str(UKdaily)
## 'data.frame': 4939 obs. of 5 variables:
## $ TIMESTAMP: Date, format: "2006-04-01" "2006-04-02" ...
## $ ND : int 1718405 1691341 1960325 2023886 2026204 2008422 1981175 1770440 1749715 2012865 ...
## $ wday : Ord.factor w/ 7 levels "dom\\."<"lun\\."<..: 7 1 2 3 4 5 6 7 1 2 ...
## $ month : Ord.factor w/ 12 levels "ene"<"feb"<"mar"<..: 4 4 4 4 4 4 4 4 4 4 ...
## $ lag365 : int 1920069 1674699 1631352 1916693 1952082 1964584 1990895 2003982 1811436 1684720 ...
start_date <- min(UKdaily$TIMESTAMP)
start <- c(year(start_date), yday(start_date))
UK_ts <- ts(UKdaily$ND,
start = start,
frequency = 365)
ts_cor(UK_ts, lag.max = 365 * 4)
h <- 365
UKpartitions <- ts_split(UK_ts, sample.out = h)
train_ts <- UKpartitions$train
test_ts <- UKpartitions$test
train_df <- UKdaily[1:(nrow(UKdaily) - h), ]
test_df <- UKdaily[(nrow(UKdaily) - h + 1):nrow(UKdaily), ]
md_tslm1 <- tslm(train_ts ~ season + trend)
fc_tslm1 <- forecast(md_tslm1, h = h)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm1,
test = test_ts)
accuracy(fc_tslm1, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -4.781286e-12 121551.4 100439.64 -0.5721337 6.294111 0.8418677
## Test set -1.740215e+04 123156.6 96785.27 -1.8735336 7.160573 0.8112374
## ACF1 Theil's U
## Training set 0.5277884 NA
## Test set 0.5106681 1.071899
md_tslm2 <- tslm(train_ts ~ season + trend + wday, data = train_df)
fc_tslm2 <- forecast(md_tslm2, h = h, newdata = test_df)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm2,
test = test_ts)
accuracy(fc_tslm2, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 8.172823e-12 70245.98 52146.79 -0.1738708 3.167605 0.4370853
## Test set -1.764563e+04 80711.71 65373.21 -1.3715505 4.682071 0.5479470
## ACF1 Theil's U
## Training set 0.7513664 NA
## Test set 0.6075598 0.68445
## ME RMSE MAE MPE MAPE MASE
## Training set 8.172823e-12 70245.98 52146.79 -0.1738708 3.167605 0.4370853
## Test set -1.764563e+04 80711.71 65373.21 -1.3715505 4.682071 0.5479470
## ACF1 Theil's U
## Training set 0.7513664 NA
## Test set 0.6075598 0.68445
md_tslm3 <- tslm(train_ts ~ season + trend + wday + month + lag365, data = train_df)
fc_tslm3 <- forecast(md_tslm3, h = h, newdata = test_df)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm3,
test = test_ts)
accuracy(fc_tslm3, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -9.836939e-13 69904.92 52006.75 -0.1717563 3.163385 0.4359116
## Test set -1.754061e+04 81783.55 65957.66 -1.3613252 4.722083 0.5528457
## ACF1 Theil's U
## Training set 0.7500146 NA
## Test set 0.6094666 0.6925767
summary(md_tslm3)$coefficients %>% tail(1)
## Estimate Std. Error t value Pr(>|t|)
## lag365 -0.06038328 0.01545495 -3.907051 9.490321e-05
anova(md_tslm3)
## Analysis of Variance Table
##
## Response: train_ts
## Df Sum Sq Mean Sq F value Pr(>F)
## season 364 1.4279e+14 3.9227e+11 73.5348 < 2.2e-16 ***
## trend 1 7.2634e+13 7.2634e+13 13615.7078 < 2.2e-16 ***
## wday 6 4.5009e+13 7.5016e+12 1406.2214 < 2.2e-16 ***
## month 11 1.3721e+11 1.2473e+10 2.3382 0.007266 **
## lag365 1 8.1432e+10 8.1432e+10 15.2650 9.49e-05 ***
## Residuals 4190 2.2352e+13 5.3345e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Variance Table
##
## Response: train_ts
## Df Sum Sq Mean Sq F value Pr(>F)
## season 364 1.4279e+14 3.9227e+11 73.5348 < 2.2e-16 ***
## trend 1 7.2634e+13 7.2634e+13 13615.7078 < 2.2e-16 ***
## wday 6 4.5009e+13 7.5016e+12 1406.2214 < 2.2e-16 ***
## month 11 1.3721e+11 1.2473e+10 2.3382 0.007266 **
## lag365 1 8.1432e+10 8.1432e+10 15.2650 9.49e-05 ***
## Residuals 4190 2.2352e+13 5.3345e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final_md <- tslm(UK_ts ~ season + trend + wday + month + lag365,
data = UKdaily)
checkresiduals(final_md)
##
## Breusch-Godfrey test for serial correlation of order up to 730
##
## data: Residuals from Linear regression model
## LM test = 3301.1, df = 730, p-value < 2.2e-16
##
## Breusch-Godfrey test for serial correlation of order up to 730
##
## data: Residuals from Linear regression model
## LM test = 3301.1, df = 730, p-value < 2.2e-16
UK_fc_df <- data.frame(date = seq.Date(from = max(UKdaily$TIMESTAMP) + days(1),
by = "day",
length.out = h))
UK_fc_df$wday <- factor(lubridate::wday(UK_fc_df$date, label = TRUE), ordered = FALSE)
UK_fc_df$month <- factor(month(UK_fc_df$date, label = TRUE), ordered = FALSE)
UK_fc_df$lag365 <- tail(UKdaily$ND, h)
UKgrid_fc <- forecast(final_md, h = h, newdata = UK_fc_df)
plot_forecast(UKgrid_fc,
title = "The UK National Demand for Electricity Forecast",
Ytitle = "MW",
Xtitle = "Year")