Tercer informe- Análisis estadístico

Sección 9 - Forecasting with Linear Regression

The linear regression model is one of the most common methods for identifying and quantifying the relationship between a dependent variable and a single (univariate linear regression) or multiple (multivariate linear regression) independent variables. This mode has a wide range of applications, from causal inference to predictive analysis and, in particular, time series forecasting. The focus of this chapter is on methods and approaches for forecasting time series data with linear regression. That includes methods for decomposing and forecasting the series components (for example, the trend and seasonal patterns), handling special events (such as outliers and holidays), and using external variables as regressors.

library(TSstudio)

## Warning: package 'TSstudio' was built under R version 4.3.3

data("USgas")

ts_plot(USgas, title = "US Monthly Natural Gas consumption", Ytitle = "Billion Cubic Feet", Xtitle = "Year")

ts_info(USgas)

##  The USgas series is a ts object with 1 variable and 238 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2019 10

##  The USgas series is a ts object with 1 variable and 238 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2019 10

ts_decompose(USgas)

USgas_df <- ts_to_prophet(USgas)

head(USgas_df)

##           ds      y
## 1 2000-01-01 2510.5
## 2 2000-02-01 2330.7
## 3 2000-03-01 2050.6
## 4 2000-04-01 1783.3
## 5 2000-05-01 1632.9
## 6 2000-06-01 1513.1

##           ds      y
## 1 2000-01-01 2510.5
## 2 2000-02-01 2330.7
## 3 2000-03-01 2050.6
## 4 2000-04-01 1783.3
## 5 2000-05-01 1632.9
## 6 2000-06-01 1513.1

USgas_df$trend <- 1:nrow(USgas_df)

library(lubridate)

## Warning: package 'lubridate' was built under R version 4.3.3

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

USgas_df$seasonal <- factor(month(USgas_df$ds, label = T), ordered = FALSE)
head(USgas_df)

##           ds      y trend seasonal
## 1 2000-01-01 2510.5     1      ene
## 2 2000-02-01 2330.7     2      feb
## 3 2000-03-01 2050.6     3      mar
## 4 2000-04-01 1783.3     4      abr
## 5 2000-05-01 1632.9     5      may
## 6 2000-06-01 1513.1     6      jun

##           ds      y trend seasonal
## 1 2000-01-01 2510.5     1      ene
## 2 2000-02-01 2330.7     2      feb
## 3 2000-03-01 2050.6     3      mar
## 4 2000-04-01 1783.3     4      abr
## 5 2000-05-01 1632.9     5      may
## 6 2000-06-01 1513.1     6      jun

h <- 12 
train <- USgas_df[1:(nrow(USgas_df) - h), ]
test <- USgas_df[(nrow(USgas_df) - h + 1):nrow(USgas_df), ]

MODELING THE SERIES TREND AND SEASONAL COMPONENTS

md_trend <- lm (y ~ trend, data = train)
summary(md_trend)

## 
## Call:
## lm(formula = y ~ trend, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -547.2 -307.4 -153.2  333.1 1052.6 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1751.0074    52.6435   33.26  < 2e-16 ***
## trend          2.4489     0.4021    6.09 4.86e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 394.4 on 224 degrees of freedom
## Multiple R-squared:  0.1421, Adjusted R-squared:  0.1382 
## F-statistic: 37.09 on 1 and 224 DF,  p-value: 4.861e-09

## 
## Call:
## lm(formula = y ~ trend, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -547.2 -307.4 -153.2  333.1 1052.6 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1751.0074    52.6435   33.26  < 2e-16 ***
## trend          2.4489     0.4021    6.09 4.86e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 394.4 on 224 degrees of freedom
## Multiple R-squared:  0.1421, Adjusted R-squared:  0.1382 
## F-statistic: 37.09 on 1 and 224 DF,  p-value: 4.861e-09

train$yhat <- predict(md_trend, newdata = train)
test$yhat <- predict(md_trend, newdata = test)

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.3.3

library(plotly)

## Warning: package 'plotly' was built under R version 4.3.3

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

plot_lm <- function(data, train, test, title = NULL) {
  p <- plot_ly(data = data,
               x = ~ ds,
               y = ~ y,
               type = "scatter",
               mode = "line",
               name = "Actual") %>%
    add_lines(x = ~ train$ds,
              y = ~ train$yhat,
              line = list(color = "red"),
              name = "Fitted") %>%
    add_lines(x = ~ test$ds,
              y = ~ test$yhat,
              line = list(color = "green", dash = "dot", width = 3),
              name = "Forecasted") %>%
    layout(title = title,
           xaxis = list(title = "Year"),
           yaxis = list("Billion Cubic Feet"),
           legend = list(x = 0.05, y = 0.95))
  return(p)
}

plot_lm(data = USgas_df,
        train = train,
        test = test,
        title = "Predicting the Trend Component of the Series")

mape_trend <- c(mean(abs(train$y - train$yhat) / train$y),
                mean(abs(test$y - test$yhat) / test$y))
mape_trend

## [1] 0.1644088 0.1299951

md_seasonal <- lm (y ~ seasonal, data = train)
summary(md_seasonal)

## 
## Call:
## lm(formula = y ~ seasonal, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -608.98 -162.34  -50.77  148.40  566.89 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2774.28      49.75  55.759  < 2e-16 ***
## seasonalfeb   -297.92      70.36  -4.234 3.41e-05 ***
## seasonalmar   -479.10      70.36  -6.809 9.77e-11 ***
## seasonalabr   -905.28      70.36 -12.866  < 2e-16 ***
## seasonalmay  -1088.42      70.36 -15.468  < 2e-16 ***
## seasonaljun  -1105.49      70.36 -15.711  < 2e-16 ***
## seasonaljul   -939.35      70.36 -13.350  < 2e-16 ***
## seasonalago   -914.12      70.36 -12.991  < 2e-16 ***
## seasonalsept -1114.74      70.36 -15.843  < 2e-16 ***
## seasonaloct  -1022.21      70.36 -14.527  < 2e-16 ***
## seasonalnov   -797.53      71.33 -11.180  < 2e-16 ***
## seasonaldic   -256.67      71.33  -3.598 0.000398 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 216.9 on 214 degrees of freedom
## Multiple R-squared:  0.7521, Adjusted R-squared:  0.7394 
## F-statistic: 59.04 on 11 and 214 DF,  p-value: < 2.2e-16

train$yhat <- predict(md_seasonal, newdata = train)
test$yhat <- predict(md_seasonal, newdata = test)

plot_lm(data = USgas_df,
        train = train,
        test = test,
        title = "Predicting the Seasonal Component of the Series")

mape_seasonal <- c(mean(abs(train$y - train$yhat) / train$y),
                   mean(abs(test$y - test$yhat) / test$y))
mape_seasonal

## [1] 0.08628973 0.21502100

md1 <- lm (y ~ seasonal + trend, data = train)
summary(md1)

## 
## Call:
## lm(formula = y ~ seasonal + trend, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -514.73  -77.17  -17.70   85.80  336.95 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2488.8994    32.6011  76.344  < 2e-16 ***
## seasonalfeb   -300.5392    41.4864  -7.244 7.84e-12 ***
## seasonalmar   -484.3363    41.4870 -11.674  < 2e-16 ***
## seasonalabr   -913.1334    41.4880 -22.010  < 2e-16 ***
## seasonalmay  -1098.8884    41.4895 -26.486  < 2e-16 ***
## seasonaljun  -1118.5855    41.4913 -26.960  < 2e-16 ***
## seasonaljul   -955.0563    41.4936 -23.017  < 2e-16 ***
## seasonalago   -932.4482    41.4962 -22.471  < 2e-16 ***
## seasonalsept -1135.6874    41.4993 -27.366  < 2e-16 ***
## seasonaloct  -1045.7687    41.5028 -25.198  < 2e-16 ***
## seasonalnov   -808.0016    42.0617 -19.210  < 2e-16 ***
## seasonaldic   -269.7642    42.0635  -6.413 9.05e-10 ***
## trend            2.6182     0.1305  20.065  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 127.9 on 213 degrees of freedom
## Multiple R-squared:  0.9142, Adjusted R-squared:  0.9094 
## F-statistic: 189.2 on 12 and 213 DF,  p-value: < 2.2e-16

train$yhat <- predict(md1, newdata = train)
test$yhat <- predict(md1, newdata = test)

plot_lm(data = USgas_df,
        train = train,
        test = test,
        title = "Predicting the Trend and Seasonal Components of the Series")

mape_md1 <- c(mean(abs(train$y - train$yhat) / train$y),
              mean(abs(test$y - test$yhat) / test$y))
mape_md1

## [1] 0.04774945 0.09143290

md2 <- lm(y ~ seasonal + trend + I(trend^2), data = train)
summary(md2)

## 
## Call:
## lm(formula = y ~ seasonal + trend + I(trend^2), data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -468.47  -54.66   -2.21   63.11  294.32 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.635e+03  3.224e+01  81.738  < 2e-16 ***
## seasonalfeb  -3.004e+02  3.540e+01  -8.487 3.69e-15 ***
## seasonalmar  -4.841e+02  3.540e+01 -13.676  < 2e-16 ***
## seasonalabr  -9.128e+02  3.540e+01 -25.787  < 2e-16 ***
## seasonalmay  -1.099e+03  3.540e+01 -31.033  < 2e-16 ***
## seasonaljun  -1.118e+03  3.540e+01 -31.588  < 2e-16 ***
## seasonaljul  -9.547e+02  3.540e+01 -26.968  < 2e-16 ***
## seasonalago  -9.322e+02  3.541e+01 -26.329  < 2e-16 ***
## seasonalsept -1.136e+03  3.541e+01 -32.070  < 2e-16 ***
## seasonaloct  -1.046e+03  3.541e+01 -29.532  < 2e-16 ***
## seasonalnov  -8.001e+02  3.590e+01 -22.286  < 2e-16 ***
## seasonaldic  -2.618e+02  3.590e+01  -7.293 5.95e-12 ***
## trend        -1.270e+00  4.472e-01  -2.840  0.00494 ** 
## I(trend^2)    1.713e-02  1.908e-03   8.977  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared:  0.9379, Adjusted R-squared:  0.9341 
## F-statistic: 246.1 on 13 and 212 DF,  p-value: < 2.2e-16

train$yhat <- predict(md2, newdata = train)
test$yhat <- predict(md2, newdata = test)

plot_lm(data = USgas_df,
        train = train,
        test = test,
        title = "Predicting the Trend (Polynomial) and Seasonal Components of the Series")

mape_md2 <- c(mean(abs(train$y - train$yhat) / train$y),
              mean(abs(test$y - test$yhat) / test$y))
mape_md2

## [1] 0.03688770 0.04212618

THE TSLM FUNCTION

USgas_split <- ts_split(USgas, sample.out = h)
train.ts <- USgas_split$train
test.ts <- USgas_split$test

library(forecast)

## Warning: package 'forecast' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

md3 <- tslm(train.ts ~ season + trend + I(trend^2))
summary(md3)

## 
## Call:
## tslm(formula = train.ts ~ season + trend + I(trend^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -468.47  -54.66   -2.21   63.11  294.32 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.635e+03  3.224e+01  81.738  < 2e-16 ***
## season2     -3.004e+02  3.540e+01  -8.487 3.69e-15 ***
## season3     -4.841e+02  3.540e+01 -13.676  < 2e-16 ***
## season4     -9.128e+02  3.540e+01 -25.787  < 2e-16 ***
## season5     -1.099e+03  3.540e+01 -31.033  < 2e-16 ***
## season6     -1.118e+03  3.540e+01 -31.588  < 2e-16 ***
## season7     -9.547e+02  3.540e+01 -26.968  < 2e-16 ***
## season8     -9.322e+02  3.541e+01 -26.329  < 2e-16 ***
## season9     -1.136e+03  3.541e+01 -32.070  < 2e-16 ***
## season10    -1.046e+03  3.541e+01 -29.532  < 2e-16 ***
## season11    -8.001e+02  3.590e+01 -22.286  < 2e-16 ***
## season12    -2.618e+02  3.590e+01  -7.293 5.95e-12 ***
## trend       -1.270e+00  4.472e-01  -2.840  0.00494 ** 
## I(trend^2)   1.713e-02  1.908e-03   8.977  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared:  0.9379, Adjusted R-squared:  0.9341 
## F-statistic: 246.1 on 13 and 212 DF,  p-value: < 2.2e-16

MODELING SINGLE EVENTS AND NON-SEASONAL EVENTS

r <- which(USgas_df$ds == as.Date("2014-01-01"))
USgas_df$s_break <- ifelse(year(USgas_df$ds) >= 2010, 1, 0)
USgas_df$s_break[r] <- 1

md3 <- tslm(USgas ~ season + trend + I(trend^2) + s_break, data = USgas_df)

summary(md3)

## 
## Call:
## tslm(formula = USgas ~ season + trend + I(trend^2) + s_break, 
##     data = USgas_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -469.25  -50.68   -2.66   63.63  275.89 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.661e+03  3.200e+01  83.164  < 2e-16 ***
## season2     -3.054e+02  3.448e+01  -8.858 2.61e-16 ***
## season3     -4.849e+02  3.448e+01 -14.062  < 2e-16 ***
## season4     -9.272e+02  3.449e+01 -26.885  < 2e-16 ***
## season5     -1.108e+03  3.449e+01 -32.114  < 2e-16 ***
## season6     -1.127e+03  3.450e+01 -32.660  < 2e-16 ***
## season7     -9.568e+02  3.450e+01 -27.730  < 2e-16 ***
## season8     -9.340e+02  3.451e+01 -27.061  < 2e-16 ***
## season9     -1.138e+03  3.452e+01 -32.972  < 2e-16 ***
## season10    -1.040e+03  3.453e+01 -30.122  < 2e-16 ***
## season11    -7.896e+02  3.497e+01 -22.577  < 2e-16 ***
## season12    -2.649e+02  3.498e+01  -7.571 9.72e-13 ***
## trend       -1.928e+00  4.479e-01  -4.304 2.51e-05 ***
## I(trend^2)   1.862e-02  1.676e-03  11.113  < 2e-16 ***
## s_break      6.060e+01  2.836e+01   2.137   0.0337 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 109 on 223 degrees of freedom
## Multiple R-squared:  0.9423, Adjusted R-squared:  0.9387 
## F-statistic: 260.3 on 14 and 223 DF,  p-value: < 2.2e-16

THE UKGRID SERIES

library(UKgrid)

## Warning: package 'UKgrid' was built under R version 4.3.3

UKdaily <- extract_grid(type = "data.frame",
                        columns = "ND",
                        aggregate = "daily")
head(UKdaily)

##    TIMESTAMP      ND
## 1 2005-04-01 1920069
## 2 2005-04-02 1674699
## 3 2005-04-03 1631352
## 4 2005-04-04 1916693
## 5 2005-04-05 1952082
## 6 2005-04-06 1964584

##    TIMESTAMP      ND
## 1 2005-04-01 1920069
## 2 2005-04-02 1674699
## 3 2005-04-03 1631352
## 4 2005-04-04 1916693
## 5 2005-04-05 1952082
## 6 2005-04-06 1964584

library(TSstudio)
ts_plot(UKdaily,
        title = "The UK National Demand for Electricity",
        Ytitle = "MW",
        Xtitle = "Year")

ts_heatmap(UKdaily[which(year(UKdaily$TIMESTAMP) >= 2016),],
           title = "UK the Daily National Grid Demand Heatmap")

PREPROCESSING AND FEATURE ENGINEERING OF THE UKDAILY SERIES

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.3.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

UKdaily <- UKdaily %>%
  mutate(wday = wday(TIMESTAMP, label = TRUE),
         month = month(TIMESTAMP, label = TRUE),
         lag365 = dplyr::lag(ND, 365)) %>%
  filter(!is.na(lag365)) %>%
  arrange(TIMESTAMP)

## 'data.frame':    4939 obs. of  5 variables:
##  $ TIMESTAMP: Date, format: "2006-04-01" "2006-04-02" ...
##  $ ND       : int  1718405 1691341 1960325 2023886 2026204 2008422 1981175 1770440 1749715 2012865 ...
##  $ wday     : Ord.factor w/ 7 levels "dom\\."<"lun\\."<..: 7 1 2 3 4 5 6 7 1 2 ...
##  $ month    : Ord.factor w/ 12 levels "ene"<"feb"<"mar"<..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ lag365   : int  1920069 1674699 1631352 1916693 1952082 1964584 1990895 2003982 1811436 1684720 ...

str(UKdaily)

## 'data.frame':    4939 obs. of  5 variables:
##  $ TIMESTAMP: Date, format: "2006-04-01" "2006-04-02" ...
##  $ ND       : int  1718405 1691341 1960325 2023886 2026204 2008422 1981175 1770440 1749715 2012865 ...
##  $ wday     : Ord.factor w/ 7 levels "dom\\."<"lun\\."<..: 7 1 2 3 4 5 6 7 1 2 ...
##  $ month    : Ord.factor w/ 12 levels "ene"<"feb"<"mar"<..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ lag365   : int  1920069 1674699 1631352 1916693 1952082 1964584 1990895 2003982 1811436 1684720 ...

start_date <- min(UKdaily$TIMESTAMP)
start <- c(year(start_date), yday(start_date))

UK_ts <- ts(UKdaily$ND,
            start = start,
            frequency = 365)
ts_cor(UK_ts, lag.max = 365 * 4)

h <- 365

UKpartitions <- ts_split(UK_ts, sample.out = h)

train_ts <- UKpartitions$train
test_ts <- UKpartitions$test

train_df <- UKdaily[1:(nrow(UKdaily) - h), ]
test_df <- UKdaily[(nrow(UKdaily) - h + 1):nrow(UKdaily), ]

TRAINING AND TESTING THE FORECASTING MODEL

md_tslm1 <- tslm(train_ts ~ season + trend)
fc_tslm1 <- forecast(md_tslm1, h = h)

test_forecast(actual = UK_ts,
              forecast.obj = fc_tslm1,
              test = test_ts)

accuracy(fc_tslm1, test_ts)

##                         ME     RMSE       MAE        MPE     MAPE      MASE
## Training set -4.781286e-12 121551.4 100439.64 -0.5721337 6.294111 0.8418677
## Test set     -1.740215e+04 123156.6  96785.27 -1.8735336 7.160573 0.8112374
##                   ACF1 Theil's U
## Training set 0.5277884        NA
## Test set     0.5106681  1.071899

md_tslm2 <- tslm(train_ts ~ season + trend + wday, data = train_df)
fc_tslm2 <- forecast(md_tslm2, h = h, newdata = test_df)
test_forecast(actual = UK_ts,
              forecast.obj = fc_tslm2,
              test = test_ts)

accuracy(fc_tslm2, test_ts)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  8.172823e-12 70245.98 52146.79 -0.1738708 3.167605 0.4370853
## Test set     -1.764563e+04 80711.71 65373.21 -1.3715505 4.682071 0.5479470
##                   ACF1 Theil's U
## Training set 0.7513664        NA
## Test set     0.6075598   0.68445

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  8.172823e-12 70245.98 52146.79 -0.1738708 3.167605 0.4370853
## Test set     -1.764563e+04 80711.71 65373.21 -1.3715505 4.682071 0.5479470
##                   ACF1 Theil's U
## Training set 0.7513664        NA
## Test set     0.6075598   0.68445

md_tslm3 <- tslm(train_ts ~ season + trend + wday + month + lag365, data = train_df)
fc_tslm3 <- forecast(md_tslm3, h = h, newdata = test_df)
test_forecast(actual = UK_ts,
              forecast.obj = fc_tslm3,
              test = test_ts)

accuracy(fc_tslm3, test_ts)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -9.836939e-13 69904.92 52006.75 -0.1717563 3.163385 0.4359116
## Test set     -1.754061e+04 81783.55 65957.66 -1.3613252 4.722083 0.5528457
##                   ACF1 Theil's U
## Training set 0.7500146        NA
## Test set     0.6094666 0.6925767

summary(md_tslm3)$coefficients %>% tail(1)

##           Estimate Std. Error   t value     Pr(>|t|)
## lag365 -0.06038328 0.01545495 -3.907051 9.490321e-05

anova(md_tslm3)

## Analysis of Variance Table
## 
## Response: train_ts
##             Df     Sum Sq    Mean Sq    F value    Pr(>F)    
## season     364 1.4279e+14 3.9227e+11    73.5348 < 2.2e-16 ***
## trend        1 7.2634e+13 7.2634e+13 13615.7078 < 2.2e-16 ***
## wday         6 4.5009e+13 7.5016e+12  1406.2214 < 2.2e-16 ***
## month       11 1.3721e+11 1.2473e+10     2.3382  0.007266 ** 
## lag365       1 8.1432e+10 8.1432e+10    15.2650  9.49e-05 ***
## Residuals 4190 2.2352e+13 5.3345e+09                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Analysis of Variance Table
## 
## Response: train_ts
##             Df     Sum Sq    Mean Sq    F value    Pr(>F)    
## season     364 1.4279e+14 3.9227e+11    73.5348 < 2.2e-16 ***
## trend        1 7.2634e+13 7.2634e+13 13615.7078 < 2.2e-16 ***
## wday         6 4.5009e+13 7.5016e+12  1406.2214 < 2.2e-16 ***
## month       11 1.3721e+11 1.2473e+10     2.3382  0.007266 ** 
## lag365       1 8.1432e+10 8.1432e+10    15.2650  9.49e-05 ***
## Residuals 4190 2.2352e+13 5.3345e+09                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

final_md <- tslm(UK_ts ~ season + trend + wday + month + lag365, 
                 data = UKdaily)

checkresiduals(final_md)

## 
##  Breusch-Godfrey test for serial correlation of order up to 730
## 
## data:  Residuals from Linear regression model
## LM test = 3301.1, df = 730, p-value < 2.2e-16

## 
##  Breusch-Godfrey test for serial correlation of order up to 730
## 
## data:  Residuals from Linear regression model
## LM test = 3301.1, df = 730, p-value < 2.2e-16

UK_fc_df <- data.frame(date = seq.Date(from = max(UKdaily$TIMESTAMP) + days(1), 
                                       by = "day", 
                                       length.out = h))

UK_fc_df$wday <- factor(lubridate::wday(UK_fc_df$date, label = TRUE), ordered = FALSE)

UK_fc_df$month <- factor(month(UK_fc_df$date, label = TRUE), ordered = FALSE)

UK_fc_df$lag365 <- tail(UKdaily$ND, h)

UKgrid_fc <- forecast(final_md, h = h, newdata = UK_fc_df)

plot_forecast(UKgrid_fc,
              title = "The UK National Demand for Electricity Forecast",
              Ytitle = "MW",
              Xtitle = "Year")