Capitulo 8
# Chapter 8 Code
# -------- Code Chank 1 --------
library(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.2.3
data(USgas)
ts_info(USgas)
## The USgas series is a ts object with 1 variable and 238 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2019 10
# Using the series time index to set the start and end point of each partiton
train <- window(USgas,
start = time(USgas)[1],
end = time(USgas)[length(USgas) - 12])
test <- window(USgas,
start = time(USgas)[length(USgas) - 12 + 1],
end = time(USgas)[length(USgas)])
ts_info(train)
## The train series is a ts object with 1 variable and 226 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2018 10
ts_info(test)
## The test series is a ts object with 1 variable and 12 observations
## Frequency: 12
## Start time: 2018 11
## End time: 2019 10
# -------- Code Chank 2 --------
# The sample.out argument set the size of the testing partition
# (and therefore the training partition)
USgas_partitions <- ts_split(USgas, sample.out = 12)
train <- USgas_partitions$train
test <- USgas_partitions$test
ts_info(train)
## The train series is a ts object with 1 variable and 226 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2018 10
ts_info(test)
## The test series is a ts object with 1 variable and 12 observations
## Frequency: 12
## Start time: 2018 11
## End time: 2019 10
# -------- Code Chank 3 --------
library(forecast)
## Warning: package 'forecast' was built under R version 4.2.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
md <- auto.arima(train)
# -------- Code Chank 4 --------
checkresiduals(md)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1)(2,1,1)[12]
## Q* = 24.95, df = 18, p-value = 0.1263
##
## Model df: 6. Total lags used: 24
# -------- Code Chank 5 --------
fc <- forecast(md, h = 12)
# -------- Code Chank 6 --------
accuracy(fc, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 5.844136 97.81626 73.42657 0.1170672 3.522348 0.6376860
## Test set 37.847885 103.22848 81.46603 1.3107987 3.261643 0.7075062
## ACF1 Theil's U
## Training set -0.004183172 NA
## Test set -0.046708926 0.3404092
# -------- Code Chank 7 --------
test_forecast(actual = USgas,
forecast.obj = fc,
test = test)
# -------- Code Chank 8 --------
library(forecast)
naive_model <- naive(train, h = 12)
test_forecast(actual = USgas,
forecast.obj = naive_model,
test = test)
accuracy(naive_model, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.028444 285.6607 228.5084 -0.9218463 10.97123 1.984522
## Test set 301.891667 499.6914 379.1417 9.6798015 13.28187 3.292723
## ACF1 Theil's U
## Training set 0.3761105 NA
## Test set 0.7002486 1.499679
# -------- Code Chank 9 --------
snaive_model <- snaive(train, h = 12)
test_forecast(actual = USgas,
forecast.obj = snaive_model,
test = test)
accuracy(snaive_model, test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 33.99953 148.7049 115.1453 1.379869 5.494048 1.000000 0.4859501
## Test set 96.45000 164.6967 135.8833 3.612060 5.220458 1.180103 -0.2120929
## Theil's U
## Training set NA
## Test set 0.4289964
# -------- Code Chank 10 --------
md_final <- auto.arima(USgas)
fc_final <- forecast(md_final, h = 12)
# -------- Code Chank 11 --------
plot_forecast(fc_final,
title = "The US Natural Gas Consumption Forecast",
Xtitle = "Year",
Ytitle = "Billion Cubic Feet")
# -------- Code Chank 12 --------
fc_final2 <- forecast(md_final,
h = 60,
level = c(80, 90))
plot_forecast(fc_final2,
title = "The US Natural Gas Consumption Forecast",
Xtitle = "Year",
Ytitle = "Billion Cubic Feet")
# -------- Code Chank 13 --------
fc_final3 <- forecast_sim(model = md_final,
h = 60,
n = 500)
# -------- Code Chank 14 --------
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
fc_final3$plot %>%
layout(title = "US Natural Gas Consumption - Forecasting Simulation",
yaxis = list(title = "Billion Cubic Feet"),
xaxis = list(title = "Year"))
# -------- Code Chank 15 --------
set.seed(1234)
#USgas_forecast$summary_plot
Capitulo 9
# Chapter 9 Code
# -------- Code 1 --------
library(TSstudio)
data("USgas")
ts_plot(USgas,
title = "US Monthly Natural Gas consumption",
Ytitle = "Billion Cubic Feet",
Xtitle = "Year")
# -------- Code 2 --------
ts_info(USgas)
## The USgas series is a ts object with 1 variable and 238 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2019 10
# -------- Code Chank 3 --------
ts_decompose(USgas)
# -------- Code Chank 4 --------
USgas_df <- ts_to_prophet(USgas)
# -------- Code Chank 5 --------
head(USgas_df)
## ds y
## 1 2000-01-01 2510.5
## 2 2000-02-01 2330.7
## 3 2000-03-01 2050.6
## 4 2000-04-01 1783.3
## 5 2000-05-01 1632.9
## 6 2000-06-01 1513.1
# -------- Code Chank 6 --------
USgas_df$trend <- 1:nrow(USgas_df)
# -------- Code Chank 7 --------
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
USgas_df$seasonal <- month(USgas_df$ds, label = T)
# -------- Code Chank 8 --------
#head(plot_error(md))
# -------- Code Chank 9 --------
h <- 12 # setting a testing partition length
train <- USgas_df[1:(nrow(USgas_df) - h), ]
test <- USgas_df[(nrow(USgas_df) - h + 1):nrow(USgas_df), ]
# -------- Code Chank 10 --------
md_trend <- lm(y ~ trend, data = train)
summary(md_trend)
##
## Call:
## lm(formula = y ~ trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -547.2 -307.4 -153.2 333.1 1052.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1751.0074 52.6435 33.26 < 2e-16 ***
## trend 2.4489 0.4021 6.09 4.86e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 394.4 on 224 degrees of freedom
## Multiple R-squared: 0.1421, Adjusted R-squared: 0.1382
## F-statistic: 37.09 on 1 and 224 DF, p-value: 4.861e-09
# -------- Code Chank 11 --------
train$yhat <- predict(md_trend, newdata = train)
test$yhat <- predict(md_trend, newdata = test)
# -------- Code Chank 12 --------
library(plotly)
library(ggplot2)
plot_lm <- function(data, train, test, title = NULL){
p <- plot_ly(data = data,
x = ~ ds,
y = ~ y,
type = "scatter",
mode = "line",
name = "Actual") %>%
add_lines(x = ~ train$ds,
y = ~ train$yhat,
line = list(color = "red"),
name = "Fitted") %>%
add_lines(x = ~ test$ds,
y = ~ test$yhat,
line = list(color = "green", dash = "dot", width = 3),
name = "Forecasted") %>%
layout(title = title,
xaxis = list(title = ""),
yaxis = list(title = "Billion Cubic Feet"),
legend = list(x = 0.05, y = 0.95))
return(p)
}
# -------- Code Chank 13 --------
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Trend Component of the Series")
# -------- Code Chank 14 --------
mape_trend <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_trend
## [1] 0.1644088 0.1299951
# -------- Code Chank 15 --------
md_seasonal <- lm(y ~ seasonal, data = train)
summary(md_seasonal)
##
## Call:
## lm(formula = y ~ seasonal, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -608.98 -162.34 -50.77 148.40 566.89
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2030.88 14.43 140.747 < 2e-16 ***
## seasonal.L -480.00 50.24 -9.554 < 2e-16 ***
## seasonal.Q 1103.83 50.17 22.000 < 2e-16 ***
## seasonal.C 72.42 50.05 1.447 0.149389
## seasonal^4 174.60 50.07 3.487 0.000592 ***
## seasonal^5 288.01 50.13 5.745 3.13e-08 ***
## seasonal^6 -44.67 50.09 -0.892 0.373460
## seasonal^7 -187.91 49.96 -3.762 0.000218 ***
## seasonal^8 84.95 49.84 1.705 0.089706 .
## seasonal^9 46.16 49.78 0.927 0.354828
## seasonal^10 77.55 49.76 1.559 0.120587
## seasonal^11 -11.09 49.75 -0.223 0.823856
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 216.9 on 214 degrees of freedom
## Multiple R-squared: 0.7521, Adjusted R-squared: 0.7394
## F-statistic: 59.04 on 11 and 214 DF, p-value: < 2.2e-16
# -------- Code Chank 16 --------
train$yhat <- predict(md_seasonal, newdata = train)
test$yhat <- predict(md_seasonal, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Seasonal Component of the Series")
# -------- Code Chank 17 --------
mape_seasonal <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_seasonal
## [1] 0.08628973 0.21502100
# -------- Code Chank 18 --------
md1 <- lm(y ~ seasonal + trend, data = train)
summary(md1)
##
## Call:
## lm(formula = y ~ seasonal + trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -514.73 -77.17 -17.70 85.80 336.95
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1733.7153 17.0794 101.509 < 2e-16 ***
## seasonal.L -498.1709 29.6354 -16.810 < 2e-16 ***
## seasonal.Q 1115.2951 29.5872 37.695 < 2e-16 ***
## seasonal.C 78.9835 29.5103 2.676 0.00802 **
## seasonal^4 175.6505 29.5196 5.950 1.09e-08 ***
## seasonal^5 285.0192 29.5568 9.643 < 2e-16 ***
## seasonal^6 -49.3611 29.5319 -1.671 0.09610 .
## seasonal^7 -192.3050 29.4540 -6.529 4.77e-10 ***
## seasonal^8 81.8787 29.3835 2.787 0.00581 **
## seasonal^9 44.4849 29.3480 1.516 0.13106
## seasonal^10 76.8636 29.3372 2.620 0.00943 **
## seasonal^11 -11.2755 29.3353 -0.384 0.70109
## trend 2.6182 0.1305 20.065 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 127.9 on 213 degrees of freedom
## Multiple R-squared: 0.9142, Adjusted R-squared: 0.9094
## F-statistic: 189.2 on 12 and 213 DF, p-value: < 2.2e-16
# -------- Code Chank 19 --------
train$yhat <- predict(md1, newdata = train)
test$yhat <- predict(md1, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Seasonal Component of the Series")
# -------- Code Chank 20 --------
mape_md1 <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_md1
## [1] 0.04774945 0.09143290
# -------- Code Chank 21 --------
md2 <- lm(y ~ seasonal + trend + I(trend^2), data = train)
summary(md2)
##
## Call:
## lm(formula = y ~ seasonal + trend + I(trend^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -468.47 -54.66 -2.21 63.11 294.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.882e+03 2.199e+01 85.568 < 2e-16 ***
## seasonal.L -4.917e+02 2.530e+01 -19.438 < 2e-16 ***
## seasonal.Q 1.121e+03 2.525e+01 44.381 < 2e-16 ***
## seasonal.C 8.247e+01 2.518e+01 3.275 0.00123 **
## seasonal^4 1.763e+02 2.519e+01 6.999 3.35e-11 ***
## seasonal^5 2.835e+02 2.522e+01 11.243 < 2e-16 ***
## seasonal^6 -5.175e+01 2.520e+01 -2.054 0.04123 *
## seasonal^7 -1.946e+02 2.513e+01 -7.741 3.97e-13 ***
## seasonal^8 8.030e+01 2.507e+01 3.203 0.00157 **
## seasonal^9 4.362e+01 2.504e+01 1.742 0.08293 .
## seasonal^10 7.651e+01 2.503e+01 3.057 0.00253 **
## seasonal^11 -1.137e+01 2.503e+01 -0.454 0.65005
## trend -1.270e+00 4.472e-01 -2.840 0.00494 **
## I(trend^2) 1.713e-02 1.908e-03 8.977 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared: 0.9379, Adjusted R-squared: 0.9341
## F-statistic: 246.1 on 13 and 212 DF, p-value: < 2.2e-16
# -------- Code Chank 22 --------
train$yhat <- predict(md2, newdata = train)
test$yhat <- predict(md2, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Seasonal Component of the Series")
mape_md2 <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_md2
## [1] 0.03688770 0.04212618
# -------- Code Chank 23 --------
USgas_split <- ts_split(USgas, sample.out = h)
train.ts <- USgas_split$train
test.ts <- USgas_split$test
# -------- Code Chank 24 --------
library(forecast)
md3 <- tslm(train.ts ~ season + trend + I(trend^2))
summary(md3)
##
## Call:
## tslm(formula = train.ts ~ season + trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -468.47 -54.66 -2.21 63.11 294.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.635e+03 3.224e+01 81.738 < 2e-16 ***
## season2 -3.004e+02 3.540e+01 -8.487 3.69e-15 ***
## season3 -4.841e+02 3.540e+01 -13.676 < 2e-16 ***
## season4 -9.128e+02 3.540e+01 -25.787 < 2e-16 ***
## season5 -1.099e+03 3.540e+01 -31.033 < 2e-16 ***
## season6 -1.118e+03 3.540e+01 -31.588 < 2e-16 ***
## season7 -9.547e+02 3.540e+01 -26.968 < 2e-16 ***
## season8 -9.322e+02 3.541e+01 -26.329 < 2e-16 ***
## season9 -1.136e+03 3.541e+01 -32.070 < 2e-16 ***
## season10 -1.046e+03 3.541e+01 -29.532 < 2e-16 ***
## season11 -8.001e+02 3.590e+01 -22.286 < 2e-16 ***
## season12 -2.618e+02 3.590e+01 -7.293 5.95e-12 ***
## trend -1.270e+00 4.472e-01 -2.840 0.00494 **
## I(trend^2) 1.713e-02 1.908e-03 8.977 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared: 0.9379, Adjusted R-squared: 0.9341
## F-statistic: 246.1 on 13 and 212 DF, p-value: < 2.2e-16
# -------- Code Chank 25 --------
r <- which(USgas_df$ds == as.Date("2014-01-01"))
USgas_df$s_break <- ifelse(year(USgas_df$ds) >= 2010, 1, 0)
USgas_df$s_break[r] <- 1
md3 <- tslm(USgas ~ season + trend + I(trend^2) + s_break, data = USgas_df)
summary(md3)
##
## Call:
## tslm(formula = USgas ~ season + trend + I(trend^2) + s_break,
## data = USgas_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -469.25 -50.68 -2.66 63.63 275.89
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.661e+03 3.200e+01 83.164 < 2e-16 ***
## season2 -3.054e+02 3.448e+01 -8.858 2.61e-16 ***
## season3 -4.849e+02 3.448e+01 -14.062 < 2e-16 ***
## season4 -9.272e+02 3.449e+01 -26.885 < 2e-16 ***
## season5 -1.108e+03 3.449e+01 -32.114 < 2e-16 ***
## season6 -1.127e+03 3.450e+01 -32.660 < 2e-16 ***
## season7 -9.568e+02 3.450e+01 -27.730 < 2e-16 ***
## season8 -9.340e+02 3.451e+01 -27.061 < 2e-16 ***
## season9 -1.138e+03 3.452e+01 -32.972 < 2e-16 ***
## season10 -1.040e+03 3.453e+01 -30.122 < 2e-16 ***
## season11 -7.896e+02 3.497e+01 -22.577 < 2e-16 ***
## season12 -2.649e+02 3.498e+01 -7.571 9.72e-13 ***
## trend -1.928e+00 4.479e-01 -4.304 2.51e-05 ***
## I(trend^2) 1.862e-02 1.676e-03 11.113 < 2e-16 ***
## s_break 6.060e+01 2.836e+01 2.137 0.0337 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109 on 223 degrees of freedom
## Multiple R-squared: 0.9423, Adjusted R-squared: 0.9387
## F-statistic: 260.3 on 14 and 223 DF, p-value: < 2.2e-16
library(UKgrid)
## Warning: package 'UKgrid' was built under R version 4.2.3
UKdaily <- extract_grid(type = "data.frame",
columns = "ND",
aggregate = "daily")
head(UKdaily)
## TIMESTAMP ND
## 1 2005-04-01 1920069
## 2 2005-04-02 1674699
## 3 2005-04-03 1631352
## 4 2005-04-04 1916693
## 5 2005-04-05 1952082
## 6 2005-04-06 1964584
ts_plot(UKdaily,
title = "The UK National Demand for Electricity",
Ytitle = "MW",
Xtitle = "Year")
# -------- Code Chank 26 --------
ts_heatmap(UKdaily[which(year(UKdaily$TIMESTAMP) >= 2016),],
title = "UK the Daily National Grid Demand Heatmap")
# -------- Code Chank 27 --------
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
UKdaily <- UKdaily %>%
mutate(wday = wday(TIMESTAMP, label = TRUE),
month = month(TIMESTAMP, label = TRUE),
lag365 = dplyr::lag(ND, 365)) %>%
filter(!is.na(lag365)) %>%
arrange(TIMESTAMP)
str(UKdaily)
## 'data.frame': 4939 obs. of 5 variables:
## $ TIMESTAMP: Date, format: "2006-04-01" "2006-04-02" ...
## $ ND : int 1718405 1691341 1960325 2023886 2026204 2008422 1981175 1770440 1749715 2012865 ...
## $ wday : Ord.factor w/ 7 levels "dom\\."<"lun\\."<..: 7 1 2 3 4 5 6 7 1 2 ...
## $ month : Ord.factor w/ 12 levels "ene"<"feb"<"mar"<..: 4 4 4 4 4 4 4 4 4 4 ...
## $ lag365 : int 1920069 1674699 1631352 1916693 1952082 1964584 1990895 2003982 1811436 1684720 ...
# -------- Code Chank 28 --------
start_date <- min(UKdaily$TIMESTAMP)
UK_ts <- ts(UKdaily$ND,
start = c(year(start_date), yday(start_date)),
frequency = 365)
#ts_acf(UK_ts, lag.max = 365 * 4)
# -------- Code Chank 29 --------
h <- 365
UKpartitions <- ts_split(UK_ts, sample.out = h)
train_ts <- UKpartitions$train
test_ts <- UKpartitions$test
train_df <- UKdaily[1:(nrow(UKdaily) - h), ]
test_df <- UKdaily[(nrow(UKdaily) - h + 1):nrow(UKdaily), ]
# -------- Code Chank 30 --------
md_tslm1 <- tslm(train_ts ~ season + trend)
fc_tslm1 <- forecast(md_tslm1, h = h)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm1,
test = test_ts)
accuracy(fc_tslm1, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -9.030002e-12 121551.4 100439.64 -0.5721337 6.294111 0.8418677
## Test set -1.740215e+04 123156.6 96785.27 -1.8735336 7.160573 0.8112374
## ACF1 Theil's U
## Training set 0.5277884 NA
## Test set 0.5106681 1.071899
# -------- Code Chank 31 --------
md_tslm2 <- tslm(train_ts ~ season + trend + wday, data = train_df)
fc_tslm2 <- forecast(md_tslm2, h = h, newdata = test_df)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm2,
test = test_ts)
accuracy(fc_tslm2, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -9.812170e-13 70245.98 52146.79 -0.1738708 3.167605 0.4370853
## Test set -1.764563e+04 80711.71 65373.21 -1.3715505 4.682071 0.5479470
## ACF1 Theil's U
## Training set 0.7513664 NA
## Test set 0.6075598 0.68445
# -------- Code Chank 32 --------
md_tslm3 <- tslm(train_ts ~ season + trend + wday + month + lag365, data = train_df)
fc_tslm3 <- forecast(md_tslm3, h = h, newdata = test_df)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm3,
test = test_ts)