Informe 3

Training approaches

library(TSstudio)

## Warning: package 'TSstudio' was built under R version 4.3.3

data(USgas)
ts_info(USgas)

##  The USgas series is a ts object with 1 variable and 238 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2019 10

train<- window(USgas,
               start=time(USgas) [1],
               end=time(USgas) [length(USgas)-12])
test <- window(USgas,
               start=time(USgas)[length(USgas) -12 + 1],
               end=time(USgas)[length(USgas)])

ts_info(test)

##  The test series is a ts object with 1 variable and 12 observations
##  Frequency: 12 
##  Start time: 2018 11 
##  End time: 2019 10

USgas_partitions <- ts_split(USgas,sample.out=12)
train <- USgas_partitions$train
test <- USgas_partitions$test

ts_info(train)

##  The train series is a ts object with 1 variable and 226 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2018 10

ts_info(test)

##  The test series is a ts object with 1 variable and 12 observations
##  Frequency: 12 
##  Start time: 2018 11 
##  End time: 2019 10

Forecasting with backtesting

library(forecast)

## Warning: package 'forecast' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

md <- auto.arima(train)
checkresiduals(md)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,1)(2,1,1)[12]
## Q* = 24.95, df = 18, p-value = 0.1263
## 
## Model df: 6.   Total lags used: 24

fc <- forecast(md,h=12)
accuracy(fc,test)

##                     ME      RMSE      MAE       MPE     MAPE      MASE
## Training set  5.844136  97.81626 73.42657 0.1170672 3.522348 0.6376860
## Test set     37.847885 103.22848 81.46603 1.3107987 3.261643 0.7075062
##                      ACF1 Theil's U
## Training set -0.004183172        NA
## Test set     -0.046708926 0.3404092

test_forecast(actual=USgas,
              forecast.obj=fc,
              test=test)

Forecast benchmark

library(forecast)
naive_model <- naive(train, h=12)
test_forecast(actual=USgas,
              forecast.obj=naive_model,
              test=test)

accuracy(naive_model,test)

##                      ME     RMSE      MAE        MPE     MAPE     MASE
## Training set  -1.028444 285.6607 228.5084 -0.9218463 10.97123 1.984522
## Test set     301.891667 499.6914 379.1417  9.6798015 13.28187 3.292723
##                   ACF1 Theil's U
## Training set 0.3761105        NA
## Test set     0.7002486  1.499679

snaive_model <- snaive(train, h=12)
test_forecast(actual=USgas,
              forecast.obj=snaive_model,
              test=test)

accuracy(snaive_model,test)

##                    ME     RMSE      MAE      MPE     MAPE     MASE       ACF1
## Training set 33.99953 148.7049 115.1453 1.379869 5.494048 1.000000  0.4859501
## Test set     96.45000 164.6967 135.8833 3.612060 5.220458 1.180103 -0.2120929
##              Theil's U
## Training set        NA
## Test set     0.4289964

Finalizing the forecast

md_final <- auto.arima(USgas)
fc_final <- forecast(md_final, h=12)

plot_forecast(fc_final,
              title="The US Natural Gas Consumption Forecast",
              Xtitle="Year",
              Ytitle="Billion Cubic Feet")

Handling forecast uncertainty

fc_final12 <- forecast(md_final,
                       h=60,
                       level=c(80,90))
plot_forecast(fc_final12,
              title="The US Natural Gas Consumption Forecast",
              Xtitle="Year",
              Ytitle="Billion Cubic Feet")

fc_final13 <- forecast_sim(model=md_final,
                           h=60,
                           n=500)

library(plotly)

## Warning: package 'plotly' was built under R version 4.3.3

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.3.3

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

fc_final13$plot %>%
  layout(title="US Natural Gas Consumption - Forecasting Simulation",
         yaxis=list(title="Billion Cubic Feet"),
         xaxis=list(title="Year"))

Capitulo 9 Forecasting with linear regression

library(TSstudio)
data(USgas)
ts_plot(USgas,
        title="US Monthly Natural Gas consumption",
        Ytitle="Billion Cubic Feet",
        Xtitle="Year")

ts_info(USgas)

##  The USgas series is a ts object with 1 variable and 238 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2019 10

ts_decompose(USgas)

USgas_df <-ts_to_prophet(USgas)

head(USgas_df)

##           ds      y
## 1 2000-01-01 2510.5
## 2 2000-02-01 2330.7
## 3 2000-03-01 2050.6
## 4 2000-04-01 1783.3
## 5 2000-05-01 1632.9
## 6 2000-06-01 1513.1

USgas_df$trend <- 1:nrow(USgas_df)

library(lubridate)

## Warning: package 'lubridate' was built under R version 4.3.3

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

USgas_df$seasonal <- factor(month(USgas_df$ds, label=T),ordered=FALSE)

head(USgas_df)

##           ds      y trend seasonal
## 1 2000-01-01 2510.5     1      ene
## 2 2000-02-01 2330.7     2      feb
## 3 2000-03-01 2050.6     3      mar
## 4 2000-04-01 1783.3     4      abr
## 5 2000-05-01 1632.9     5      may
## 6 2000-06-01 1513.1     6      jun

h <- 12 
train <- USgas_df[1:(nrow(USgas_df)-h),]
test <- USgas_df[(nrow(USgas_df)-h+1):nrow(USgas_df),]

Modeling the series trend and seasonal components

md_trend <- lm(y ~ trend, data = train)
summary (md_trend)

## 
## Call:
## lm(formula = y ~ trend, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -547.2 -307.4 -153.2  333.1 1052.6 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1751.0074    52.6435   33.26  < 2e-16 ***
## trend          2.4489     0.4021    6.09 4.86e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 394.4 on 224 degrees of freedom
## Multiple R-squared:  0.1421, Adjusted R-squared:  0.1382 
## F-statistic: 37.09 on 1 and 224 DF,  p-value: 4.861e-09

train$yhat <- predict(md_trend, newdata = train)

# Predictions on the testing partition
test$yhat <- predict(md_trend, newdata = test)

library(plotly)
plot_lm <- function(data, train, test, title = NULL) {
  p <- plot_ly(data = data, x = ~ds, y = ~y,
               type = "scatter", mode = "line", name = "Actual") %>%
    add_lines(x = ~train$ds, y = ~train$yhat,
              line = list(color = "red"), name = "Fitted") %>%
    add_lines(x = ~test$ds, y = ~test$yhat,
              line = list(color = "green", dash = "dot", width = 3),
              name = "Forecasted") %>%
    layout(title = title,
           xaxis = list(title = "Year"),
           yaxis = list(title = "Billion Cubic Feet"),
           legend = list(x = 0.05, y = 0.95))
  
  return(p)
}
plot_lm(data = USgas_df,
        train = train,
        test = test,
        title = "Predicting the Trend Component of the Series")

mape_train <- mean(abs(train$y - train$yhat) / train$y) * 100

# Calculate MAPE for the testing set
mape_test <- mean(abs(test$y - test$yhat) / test$y) * 100

# Combine the results into a vector
mape_trend <- c(mape_train, mape_test)

# Display MAPE for both training and testing sets
mape_trend

## [1] 16.44088 12.99951

md_seasonal <- lm(y ~ seasonal, data = train)
summary (md_seasonal)

## 
## Call:
## lm(formula = y ~ seasonal, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -608.98 -162.34  -50.77  148.40  566.89 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2774.28      49.75  55.759  < 2e-16 ***
## seasonalfeb   -297.92      70.36  -4.234 3.41e-05 ***
## seasonalmar   -479.10      70.36  -6.809 9.77e-11 ***
## seasonalabr   -905.28      70.36 -12.866  < 2e-16 ***
## seasonalmay  -1088.42      70.36 -15.468  < 2e-16 ***
## seasonaljun  -1105.49      70.36 -15.711  < 2e-16 ***
## seasonaljul   -939.35      70.36 -13.350  < 2e-16 ***
## seasonalago   -914.12      70.36 -12.991  < 2e-16 ***
## seasonalsept -1114.74      70.36 -15.843  < 2e-16 ***
## seasonaloct  -1022.21      70.36 -14.527  < 2e-16 ***
## seasonalnov   -797.53      71.33 -11.180  < 2e-16 ***
## seasonaldic   -256.67      71.33  -3.598 0.000398 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 216.9 on 214 degrees of freedom
## Multiple R-squared:  0.7521, Adjusted R-squared:  0.7394 
## F-statistic: 59.04 on 11 and 214 DF,  p-value: < 2.2e-16

train$yhat <- predict(md_seasonal, newdata = train)

# Predictions on the testing partition
test$yhat <- predict(md_seasonal, newdata = test)

plot_lm(data = USgas_df,
        train = train,
        test = test,
        title = "Predicting the Seasonal Component of the Series")

mape_train_seasonal <- mean(abs(train$y - train$yhat) / train$y) * 100

# Calculate MAPE for the testing set
mape_test_seasonal <- mean(abs(test$y - test$yhat) / test$y) * 100

# Combine the results into a vector
mape_seasonal <- c(mape_train_seasonal, mape_test_seasonal)

# Display MAPE for both training and testing sets
mape_seasonal

## [1]  8.628973 21.502100

md1 <- lm(y ~ seasonal + trend, data = train)

summary (md1)

## 
## Call:
## lm(formula = y ~ seasonal + trend, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -514.73  -77.17  -17.70   85.80  336.95 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2488.8994    32.6011  76.344  < 2e-16 ***
## seasonalfeb   -300.5392    41.4864  -7.244 7.84e-12 ***
## seasonalmar   -484.3363    41.4870 -11.674  < 2e-16 ***
## seasonalabr   -913.1334    41.4880 -22.010  < 2e-16 ***
## seasonalmay  -1098.8884    41.4895 -26.486  < 2e-16 ***
## seasonaljun  -1118.5855    41.4913 -26.960  < 2e-16 ***
## seasonaljul   -955.0563    41.4936 -23.017  < 2e-16 ***
## seasonalago   -932.4482    41.4962 -22.471  < 2e-16 ***
## seasonalsept -1135.6874    41.4993 -27.366  < 2e-16 ***
## seasonaloct  -1045.7687    41.5028 -25.198  < 2e-16 ***
## seasonalnov   -808.0016    42.0617 -19.210  < 2e-16 ***
## seasonaldic   -269.7642    42.0635  -6.413 9.05e-10 ***
## trend            2.6182     0.1305  20.065  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 127.9 on 213 degrees of freedom
## Multiple R-squared:  0.9142, Adjusted R-squared:  0.9094 
## F-statistic: 189.2 on 12 and 213 DF,  p-value: < 2.2e-16

train$yhat <- predict(md1, newdata = train)

# Make predictions on the testing partition
test$yhat <- predict(md1, newdata = test)

# Plot the combined trend and seasonal components
plot_lm(data = USgas_df,
        train = train,
        test = test,
        title = "Predicting the Trend and Seasonal Components of the Series")

mape_train_md1 <- mean(abs(train$y - train$yhat) / train$y) * 100

# Calculate MAPE for the testing set
mape_test_md1 <- mean(abs(test$y - test$yhat) / test$y) * 100

# Combine the results into a vector
mape_md1 <- c(mape_train_md1, mape_test_md1)

# Display MAPE for both training and testing sets
mape_md1

## [1] 4.774945 9.143290

md2 <- lm(y ~ seasonal + trend + I(trend^2), data = train)
summary (md2)

## 
## Call:
## lm(formula = y ~ seasonal + trend + I(trend^2), data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -468.47  -54.66   -2.21   63.11  294.32 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.635e+03  3.224e+01  81.738  < 2e-16 ***
## seasonalfeb  -3.004e+02  3.540e+01  -8.487 3.69e-15 ***
## seasonalmar  -4.841e+02  3.540e+01 -13.676  < 2e-16 ***
## seasonalabr  -9.128e+02  3.540e+01 -25.787  < 2e-16 ***
## seasonalmay  -1.099e+03  3.540e+01 -31.033  < 2e-16 ***
## seasonaljun  -1.118e+03  3.540e+01 -31.588  < 2e-16 ***
## seasonaljul  -9.547e+02  3.540e+01 -26.968  < 2e-16 ***
## seasonalago  -9.322e+02  3.541e+01 -26.329  < 2e-16 ***
## seasonalsept -1.136e+03  3.541e+01 -32.070  < 2e-16 ***
## seasonaloct  -1.046e+03  3.541e+01 -29.532  < 2e-16 ***
## seasonalnov  -8.001e+02  3.590e+01 -22.286  < 2e-16 ***
## seasonaldic  -2.618e+02  3.590e+01  -7.293 5.95e-12 ***
## trend        -1.270e+00  4.472e-01  -2.840  0.00494 ** 
## I(trend^2)    1.713e-02  1.908e-03   8.977  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared:  0.9379, Adjusted R-squared:  0.9341 
## F-statistic: 246.1 on 13 and 212 DF,  p-value: < 2.2e-16

# Make predictions on the training partition
train$yhat <- predict(md2, newdata = train)

# Make predictions on the testing partition
test$yhat <- predict(md2, newdata = test)

# Plot the trend (polynomial) and seasonal components
plot_lm(data = USgas_df,
        train = train,
        test = test,
        title = "Predicting the Trend (Polynomial) and Seasonal Components of the Series")

mape_train_md2 <- mean(abs(train$y - train$yhat) / train$y) * 100

# Calculate MAPE for the testing set
mape_test_md2 <- mean(abs(test$y - test$yhat) / test$y) * 100

# Combine the results into a vector
mape_md2 <- c(mape_train_md2, mape_test_md2)

# Display MAPE for both training and testing sets
mape_md2

## [1] 3.688770 4.212618

The tslm function

USgas_split <- ts_split(USgas, sample.out = h)

# Access the training and testing partitions
train.ts <- USgas_split$train
test.ts <- USgas_split$test

library(forecast)
md3 <- tslm(train.ts ~ season + trend + I(trend^2))
summary (md3)

## 
## Call:
## tslm(formula = train.ts ~ season + trend + I(trend^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -468.47  -54.66   -2.21   63.11  294.32 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.635e+03  3.224e+01  81.738  < 2e-16 ***
## season2     -3.004e+02  3.540e+01  -8.487 3.69e-15 ***
## season3     -4.841e+02  3.540e+01 -13.676  < 2e-16 ***
## season4     -9.128e+02  3.540e+01 -25.787  < 2e-16 ***
## season5     -1.099e+03  3.540e+01 -31.033  < 2e-16 ***
## season6     -1.118e+03  3.540e+01 -31.588  < 2e-16 ***
## season7     -9.547e+02  3.540e+01 -26.968  < 2e-16 ***
## season8     -9.322e+02  3.541e+01 -26.329  < 2e-16 ***
## season9     -1.136e+03  3.541e+01 -32.070  < 2e-16 ***
## season10    -1.046e+03  3.541e+01 -29.532  < 2e-16 ***
## season11    -8.001e+02  3.590e+01 -22.286  < 2e-16 ***
## season12    -2.618e+02  3.590e+01  -7.293 5.95e-12 ***
## trend       -1.270e+00  4.472e-01  -2.840  0.00494 ** 
## I(trend^2)   1.713e-02  1.908e-03   8.977  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared:  0.9379, Adjusted R-squared:  0.9341 
## F-statistic: 246.1 on 13 and 212 DF,  p-value: < 2.2e-16

r <- which(USgas_df$ds == as.Date("2014-01-01"))

USgas_df$s_break <- ifelse(year(USgas_df$ds) >= 2010, 1, 0)

USgas_df$s_break[r] <- 1

md3 <- tslm(USgas ~ season + trend + I(trend^2) + s_break, data = USgas_df)
summary (md3)

## 
## Call:
## tslm(formula = USgas ~ season + trend + I(trend^2) + s_break, 
##     data = USgas_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -469.25  -50.68   -2.66   63.63  275.89 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.661e+03  3.200e+01  83.164  < 2e-16 ***
## season2     -3.054e+02  3.448e+01  -8.858 2.61e-16 ***
## season3     -4.849e+02  3.448e+01 -14.062  < 2e-16 ***
## season4     -9.272e+02  3.449e+01 -26.885  < 2e-16 ***
## season5     -1.108e+03  3.449e+01 -32.114  < 2e-16 ***
## season6     -1.127e+03  3.450e+01 -32.660  < 2e-16 ***
## season7     -9.568e+02  3.450e+01 -27.730  < 2e-16 ***
## season8     -9.340e+02  3.451e+01 -27.061  < 2e-16 ***
## season9     -1.138e+03  3.452e+01 -32.972  < 2e-16 ***
## season10    -1.040e+03  3.453e+01 -30.122  < 2e-16 ***
## season11    -7.896e+02  3.497e+01 -22.577  < 2e-16 ***
## season12    -2.649e+02  3.498e+01  -7.571 9.72e-13 ***
## trend       -1.928e+00  4.479e-01  -4.304 2.51e-05 ***
## I(trend^2)   1.862e-02  1.676e-03  11.113  < 2e-16 ***
## s_break      6.060e+01  2.836e+01   2.137   0.0337 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 109 on 223 degrees of freedom
## Multiple R-squared:  0.9423, Adjusted R-squared:  0.9387 
## F-statistic: 260.3 on 14 and 223 DF,  p-value: < 2.2e-16

Forecasting a series with multiseasonality components-a case study

library(UKgrid)

## Warning: package 'UKgrid' was built under R version 4.3.3

UKdaily <- extract_grid(type = "data.frame", columns = "ND", aggregate = "daily")
head (UKdaily)

##    TIMESTAMP      ND
## 1 2005-04-01 1920069
## 2 2005-04-02 1674699
## 3 2005-04-03 1631352
## 4 2005-04-04 1916693
## 5 2005-04-05 1952082
## 6 2005-04-06 1964584

library(TSstudio)

# Plot the time series
ts_plot(UKdaily,
        title = "The UK National Demand for Electricity",
        Ytitle = "MW",
        Xtitle = "Year")

ts_heatmap(UKdaily[which(year(UKdaily$TIMESTAMP) >= 2016),],title = "UK the Daily National Grid Demand Heatmap")

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.3.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

UKdaily <- UKdaily %>%
  mutate(wday = wday(TIMESTAMP, label = TRUE),
         month = month(TIMESTAMP, label = TRUE),
         lag365 = dplyr::lag(ND, 365)) %>%
  filter(!is.na(lag365)) %>%
  arrange(TIMESTAMP)

str (UKdaily)

## 'data.frame':    4939 obs. of  5 variables:
##  $ TIMESTAMP: Date, format: "2006-04-01" "2006-04-02" ...
##  $ ND       : int  1718405 1691341 1960325 2023886 2026204 2008422 1981175 1770440 1749715 2012865 ...
##  $ wday     : Ord.factor w/ 7 levels "dom\\."<"lun\\."<..: 7 1 2 3 4 5 6 7 1 2 ...
##  $ month    : Ord.factor w/ 12 levels "ene"<"feb"<"mar"<..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ lag365   : int  1920069 1674699 1631352 1916693 1952082 1964584 1990895 2003982 1811436 1684720 ...

start_date <- min(UKdaily$TIMESTAMP)

# Extract the year and day of the year from the start date
start <- c(year(start_date), yday(start_date))

UK_ts <- ts(UKdaily$ND,
            start = start,
            frequency = 365)

library(TSstudio)
acf(UK_ts, lag.max = 365 * 4)

h <- 365
# Split the time series data into training and testing partitions
UKpartitions <- ts_split(UK_ts, sample.out = h)

# Access the training and testing partitions
train_ts <- UKpartitions$train
test_ts <- UKpartitions$test

train_df <- UKdaily[1:(nrow(UKdaily) - h), ]

# Create the testing data frame
test_df <- UKdaily[(nrow(UKdaily) - h + 1):nrow(UKdaily), ]

Entrenamiento y prueba del modelo de pronostico

md_tslm1 <- tslm(train_ts ~ season + trend)

fc_tslm1 <- forecast(md_tslm1, h = h)

test_forecast(actual = UK_ts,
              forecast.obj = fc_tslm1,
              test = test_ts)

accuracy (fc_tslm1, test_ts)

##                         ME     RMSE       MAE        MPE     MAPE      MASE
## Training set -4.781286e-12 121551.4 100439.64 -0.5721337 6.294111 0.8418677
## Test set     -1.740215e+04 123156.6  96785.27 -1.8735336 7.160573 0.8112374
##                   ACF1 Theil's U
## Training set 0.5277884        NA
## Test set     0.5106681  1.071899

md_tslm2 <- tslm(train_ts ~ season + trend + wday + month, data = train_df)

accuracy(fc_tslm1, test_ts)

##                         ME     RMSE       MAE        MPE     MAPE      MASE
## Training set -4.781286e-12 121551.4 100439.64 -0.5721337 6.294111 0.8418677
## Test set     -1.740215e+04 123156.6  96785.27 -1.8735336 7.160573 0.8112374
##                   ACF1 Theil's U
## Training set 0.5277884        NA
## Test set     0.5106681  1.071899

md_tslm3 <- tslm(train_ts ~ season + trend + wday + month + lag365, data = train_df)

# Generate forecasts
fc_tslm3 <- forecast(md_tslm3, h = h, newdata = test_df)

test_forecast(actual = UK_ts,
              forecast.obj = fc_tslm3,
              test = test_ts)

accuracy (fc_tslm3, test_ts)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -9.836939e-13 69904.92 52006.75 -0.1717563 3.163385 0.4359116
## Test set     -1.754061e+04 81783.55 65957.66 -1.3613252 4.722083 0.5528457
##                   ACF1 Theil's U
## Training set 0.7500146        NA
## Test set     0.6094666 0.6925767

summary (md_tslm3)$coefficients %>% tail (1)

##           Estimate Std. Error   t value     Pr(>|t|)
## lag365 -0.06038328 0.01545495 -3.907051 9.490321e-05

anova (md_tslm3)

## Analysis of Variance Table
## 
## Response: train_ts
##             Df     Sum Sq    Mean Sq    F value    Pr(>F)    
## season     364 1.4279e+14 3.9227e+11    73.5348 < 2.2e-16 ***
## trend        1 7.2634e+13 7.2634e+13 13615.7078 < 2.2e-16 ***
## wday         6 4.5009e+13 7.5016e+12  1406.2214 < 2.2e-16 ***
## month       11 1.3721e+11 1.2473e+10     2.3382  0.007266 ** 
## lag365       1 8.1432e+10 8.1432e+10    15.2650  9.49e-05 ***
## Residuals 4190 2.2352e+13 5.3345e+09                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

final_md <- tslm(UK_ts ~ season + trend + wday + month + lag365, data = UKdaily)

checkresiduals (final_md)

## 
##  Breusch-Godfrey test for serial correlation of order up to 730
## 
## data:  Residuals from Linear regression model
## LM test = 3301.1, df = 730, p-value < 2.2e-16

# Crear un marco de datos para almacenar las fechas de los pronósticos
UK_fc_df <- data.frame(date = seq.Date(from = max(UKdaily$TIMESTAMP) + 1,
                                       by = "day",
                                       length.out = h))
# Agregar columna para representar el día de la semana
UK_fc_df$wday <- factor(wday(UK_fc_df$date, label = TRUE), ordered = FALSE)

# Agregar columna para representar el mes
UK_fc_df$month <- factor(month(UK_fc_df$date, label = TRUE), ordered = FALSE)

# Agregar columna para el valor rezagado de 365 días
UK_fc_df$lag365 <- tail(UKdaily$ND, h)

UKgrid_fc <- forecast(final_md, h = h, newdata = UK_fc_df)

# Trazar los pronósticos
plot_forecast(UKgrid_fc,
              title = "The UK National Demand for Electricity Forecast",
              Ytitle = "MW",
              Xtitle = "Year")

Informe 3

Daniel Celis

2024-05-01