library(TSstudio)
data("USgas")
ts_plot(USgas,
title = "US Monthly Natural Gas consumption",
Ytitle = "Billion Cubic Feet", Xtitle = "Year")
ts_info(USgas)
## The USgas series is a ts object with 1 variable and 235 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2019 7
ts_decompose(USgas)
# transform the series from a ts object to a data.frame object. ”
# The function transforms the ts object into two columns of data.frame, where the two columns represent the time and numeric components of the series
USgas_df <- ts_to_prophet(USgas)
head(USgas)
## [1] 2510.5 2330.7 2050.6 1783.3 1632.9 1513.1
head(USgas_df)
# 1)“The first feature we will create is the series trend. ”
USgas_df$trend <- 1:nrow(USgas_df)
# 2)“The second feature we want to create is the seasonal component.”
# use the month function from the lubridate package to extract the month of the year from the ds date variable
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
USgas_df$seasonal <- month(USgas_df$ds, label = T)
head(USgas_df)
h <- 12
train <- USgas_df[1:(nrow(USgas_df) - h), ]
test <- USgas_df[(nrow(USgas_df) - h + 1):nrow(USgas_df), ]
md_trend <- lm(y ~ trend, data = train)
summary(md_trend)
##
## Call:
## lm(formula = y ~ trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -547.5 -307.6 -156.5 346.0 1038.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1750.3156 53.2454 32.873 < 2e-16 ***
## trend 2.4576 0.4122 5.963 9.74e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 396.2 on 221 degrees of freedom
## Multiple R-squared: 0.1386, Adjusted R-squared: 0.1347
## F-statistic: 35.55 on 1 and 221 DF, p-value: 9.739e-09
train$yhat <- predict(md_trend, newdata = train)
test$yhat <- predict(md_trend, newdata = test)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_lm <- function(data, train, test, title = NULL){
p <- plot_ly(data = data,
x = ~ ds,
y = ~ y,
type = "scatter",
mode = "line",
name = "Actual") %>%
add_lines(x = ~ train$ds,
y = ~ train$yhat,
line = list(color = "red"),
name = "Fitted") %>%
add_lines(x = ~ test$ds,
y = ~ test$yhat,
line = list(color = "green", dash = "dot", width = 3),
name = "Forecasted") %>%
layout(title = title,
xaxis = list(title = ""),
yaxis = list(title = "Billion Cubic Feet"),
legend = list(x = 0.05, y = 0.95))
return(p)
}
# -------- Code Chank 13 --------
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Trend Component of the Series")
# -------- Code Chank 14 --------
mape_trend <- c(mean(abs(train$y - train$yhat) / train$y), mean(abs(test$y - test$yhat) / test$y))
mape_trend
## [1] 0.1661594 0.1240414
md_seasonal <- lm(y ~ seasonal, data = train)
summary(md_seasonal)
##
## Call:
## lm(formula = y ~ seasonal, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -606.98 -152.27 -52.98 141.70 549.23
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2023.62 14.00 144.494 < 2e-16 ***
## seasonal.L -496.21 48.62 -10.206 < 2e-16 ***
## seasonal.Q 1112.76 48.49 22.948 < 2e-16 ***
## seasonal.C 96.77 48.61 1.991 0.047815 *
## seasonal^4 185.39 48.50 3.823 0.000174 ***
## seasonal^5 278.16 48.59 5.725 3.53e-08 ***
## seasonal^6 -61.14 48.51 -1.260 0.208911
## seasonal^7 -195.16 48.55 -4.020 8.12e-05 ***
## seasonal^8 85.22 48.52 1.756 0.080516 .
## seasonal^9 44.72 48.48 0.922 0.357355
## seasonal^10 71.65 48.55 1.476 0.141551
## seasonal^11 -17.28 48.22 -0.358 0.720457
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 209.1 on 211 degrees of freedom
## Multiple R-squared: 0.771, Adjusted R-squared: 0.7591
## F-statistic: 64.59 on 11 and 211 DF, p-value: < 2.2e-16
train$yhat <- predict(md_seasonal, newdata = train)
test$yhat <- predict(md_seasonal, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Seasonal Component of the Series")
# -------- Code Chank 17 --------
mape_seasonal <- c(mean(abs(train$y - train$yhat) / train$y), mean(abs(test$y - test$yhat) / test$y))
mape_seasonal
## [1] 0.08340338 0.20408252
md1 <- lm(y ~ seasonal + trend, data = train)
summary(md1)
##
## Call:
## lm(formula = y ~ seasonal + trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -515.91 -72.13 -13.97 85.37 328.98
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1740.2757 16.8832 103.077 < 2e-16 ***
## seasonal.L -504.2525 29.2110 -17.262 < 2e-16 ***
## seasonal.Q 1117.6084 29.1299 38.366 < 2e-16 ***
## seasonal.C 89.3614 29.2045 3.060 0.00250 **
## seasonal^4 180.6430 29.1331 6.201 2.94e-09 ***
## seasonal^5 281.5304 29.1901 9.645 < 2e-16 ***
## seasonal^6 -56.6069 29.1400 -1.943 0.05340 .
## seasonal^7 -196.1621 29.1676 -6.725 1.63e-10 ***
## seasonal^8 81.0555 29.1509 2.781 0.00592 **
## seasonal^9 43.7237 29.1241 1.501 0.13478
## seasonal^10 75.1242 29.1685 2.576 0.01070 *
## seasonal^11 -13.4849 28.9703 -0.465 0.64207
## trend 2.5299 0.1307 19.357 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 125.6 on 210 degrees of freedom
## Multiple R-squared: 0.9178, Adjusted R-squared: 0.9131
## F-statistic: 195.3 on 12 and 210 DF, p-value: < 2.2e-16
# -------- Code Chank 19 --------
train$yhat <- predict(md1, newdata = train)
test$yhat <- predict(md1, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Seasonal Component of the Series")
# -------- Code Chank 20 --------
mape_md1 <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_md1
## [1] 0.04652981 0.08404506
md2 <- lm(y ~ seasonal + trend + I(trend^2), data = train)
summary(md2)
##
## Call:
## lm(formula = y ~ seasonal + trend + I(trend^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -470.11 -54.52 -3.69 61.13 297.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1877.37494 22.20507 84.547 < 2e-16 ***
## seasonal.L -493.60713 25.47215 -19.378 < 2e-16 ***
## seasonal.Q 1119.88762 25.37023 44.142 < 2e-16 ***
## seasonal.C 85.88620 25.43713 3.376 0.000876 ***
## seasonal^4 178.25462 25.37315 7.025 2.96e-11 ***
## seasonal^5 283.10061 25.42187 11.136 < 2e-16 ***
## seasonal^6 -54.39236 25.37889 -2.143 0.033252 *
## seasonal^7 -196.60962 25.40160 -7.740 4.19e-13 ***
## seasonal^8 79.04275 25.38814 3.113 0.002109 **
## seasonal^9 43.22206 25.36376 1.704 0.089851 .
## seasonal^10 76.79860 25.40316 3.023 0.002814 **
## seasonal^11 -11.64491 25.23068 -0.462 0.644893
## trend -1.12357 0.45779 -2.454 0.014932 *
## I(trend^2) 0.01631 0.00198 8.239 1.89e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109.4 on 209 degrees of freedom
## Multiple R-squared: 0.9379, Adjusted R-squared: 0.9341
## F-statistic: 242.9 on 13 and 209 DF, p-value: < 2.2e-16
train$yhat <- predict(md2, newdata = train)
test$yhat <- predict(md2, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Seasonal Component of the Series")
mape_md2 <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_md2
## [1] 0.03696217 0.03962603
USgas_split <- ts_split(USgas, sample.out = 12)
train.ts <- USgas_split$train
test.ts <- USgas_split$test
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
md3 <- tslm(train.ts ~ season + trend + I(trend^2))
summary(md3)
##
## Call:
## tslm(formula = train.ts ~ season + trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -470.11 -54.52 -3.69 61.13 297.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.631e+03 3.237e+01 81.258 < 2e-16 ***
## season2 -2.993e+02 3.549e+01 -8.434 5.49e-15 ***
## season3 -4.828e+02 3.549e+01 -13.604 < 2e-16 ***
## season4 -9.115e+02 3.549e+01 -25.686 < 2e-16 ***
## season5 -1.096e+03 3.549e+01 -30.895 < 2e-16 ***
## season6 -1.116e+03 3.549e+01 -31.458 < 2e-16 ***
## season7 -9.529e+02 3.549e+01 -26.849 < 2e-16 ***
## season8 -9.316e+02 3.599e+01 -25.888 < 2e-16 ***
## season9 -1.138e+03 3.599e+01 -31.614 < 2e-16 ***
## season10 -1.049e+03 3.599e+01 -29.161 < 2e-16 ***
## season11 -7.986e+02 3.599e+01 -22.191 < 2e-16 ***
## season12 -2.610e+02 3.599e+01 -7.253 7.81e-12 ***
## trend -1.124e+00 4.578e-01 -2.454 0.0149 *
## I(trend^2) 1.631e-02 1.980e-03 8.239 1.89e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109.4 on 209 degrees of freedom
## Multiple R-squared: 0.9379, Adjusted R-squared: 0.9341
## F-statistic: 242.9 on 13 and 209 DF, p-value: < 2.2e-16
r <- which(USgas_df$ds == as.Date("2014-01-01"))
USgas_df$s_break <- ifelse(year(USgas_df$ds) >= 2010, 1, 0)
USgas_df$s_break[r] <- 1
md3 <- tslm(USgas ~ season + trend + I(trend^2) + s_break, data = USgas_df)
summary(md3)
##
## Call:
## tslm(formula = USgas ~ season + trend + I(trend^2) + s_break,
## data = USgas_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -469.46 -51.75 -5.44 66.41 277.89
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.656e+03 3.183e+01 83.458 < 2e-16 ***
## season2 -3.043e+02 3.428e+01 -8.876 2.46e-16 ***
## season3 -4.834e+02 3.428e+01 -14.101 < 2e-16 ***
## season4 -9.256e+02 3.428e+01 -27.001 < 2e-16 ***
## season5 -1.105e+03 3.429e+01 -32.230 < 2e-16 ***
## season6 -1.124e+03 3.429e+01 -32.789 < 2e-16 ***
## season7 -9.534e+02 3.430e+01 -27.796 < 2e-16 ***
## season8 -9.341e+02 3.475e+01 -26.883 < 2e-16 ***
## season9 -1.137e+03 3.475e+01 -32.727 < 2e-16 ***
## season10 -1.047e+03 3.476e+01 -30.135 < 2e-16 ***
## season11 -7.881e+02 3.477e+01 -22.670 < 2e-16 ***
## season12 -2.646e+02 3.478e+01 -7.608 8.06e-13 ***
## trend -1.758e+00 4.498e-01 -3.908 0.000124 ***
## I(trend^2) 1.742e-02 1.722e-03 10.112 < 2e-16 ***
## s_break 6.945e+01 2.837e+01 2.448 0.015129 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 108.4 on 220 degrees of freedom
## Multiple R-squared: 0.9429, Adjusted R-squared: 0.9393
## F-statistic: 259.4 on 14 and 220 DF, p-value: < 2.2e-16
library(UKgrid)
UKdaily <- extract_grid(type = "data.frame", columns = "ND", aggregate = "daily")
head(UKdaily)
ts_plot(UKdaily,
title = "The UK National Demand for Electricity",
Ytitle = "MW",
Xtitle = "Year")
# -------- Code Chank 26 --------
ts_heatmap(UKdaily[which(year(UKdaily$TIMESTAMP) >= 2016),],
title = "UK the Daily National Grid Demand Heatmap")
# -------- Code Chank 27 --------
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
UKdaily <- UKdaily %>%
mutate(wday = wday(TIMESTAMP, label = TRUE),
month = month(TIMESTAMP, label = TRUE),
lag365 = dplyr::lag(ND, 365)) %>%
filter(!is.na(lag365)) %>%
arrange(TIMESTAMP)
str(UKdaily)
## 'data.frame': 4939 obs. of 5 variables:
## $ TIMESTAMP: Date, format: "2006-04-01" "2006-04-02" ...
## $ ND : int 1718405 1691341 1960325 2023886 2026204 2008422 1981175 1770440 1749715 2012865 ...
## $ wday : Ord.factor w/ 7 levels "Sun"<"Mon"<"Tue"<..: 7 1 2 3 4 5 6 7 1 2 ...
## $ month : Ord.factor w/ 12 levels "Jan"<"Feb"<"Mar"<..: 4 4 4 4 4 4 4 4 4 4 ...
## $ lag365 : int 1920069 1674699 1631352 1916693 1952082 1964584 1990895 2003982 1811436 1684720 ...
# -------- Code Chank 28 --------
start_date <- min(UKdaily$TIMESTAMP)
head(UKdaily)
UK_ts <- ts(UKdaily$ND,
start = c(year(start_date), yday(start_date)),
frequency = 365)
head(UK_ts)
## Time Series:
## Start = c(2006, 91)
## End = c(2006, 96)
## Frequency = 365
## [1] 1718405 1691341 1960325 2023886 2026204 2008422
ts_acf(UK_ts, lag.max = 365 * 4)
## Warning: The 'ts_acf' function is deprecated, please use 'ts_cor' instead
h <- 365
UKpartitions <- ts_split(UK_ts, sample.out = h)
train_ts <- UKpartitions$train
test_ts <- UKpartitions$test
train_df <- UKdaily[1:(nrow(UKdaily) - h), ]
test_df <- UKdaily[(nrow(UKdaily) - h + 1):nrow(UKdaily), ]
md_tslm1 <- tslm(train_ts ~ season + trend)
fc_tslm1 <- forecast(md_tslm1, h = h)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm1,
test = test_ts)
accuracy(fc_tslm1, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -9.030002e-12 121551.4 100439.64 -0.5721337 6.294111 0.8418677
## Test set -1.740215e+04 123156.6 96785.27 -1.8735336 7.160573 0.8112374
## ACF1 Theil's U
## Training set 0.5277884 NA
## Test set 0.5106681 1.071899
md_tslm2 <- tslm(train_ts ~ season + trend + wday, data = train_df)
fc_tslm2 <- forecast(md_tslm2, h = h, newdata = test_df)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm2,
test = test_ts)
accuracy(fc_tslm2, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -9.812170e-13 70245.98 52146.79 -0.1738708 3.167605 0.4370853
## Test set -1.764563e+04 80711.71 65373.21 -1.3715505 4.682071 0.5479470
## ACF1 Theil's U
## Training set 0.7513664 NA
## Test set 0.6075598 0.68445
md_tslm3 <- tslm(train_ts ~ season + trend + wday + month + lag365, data = train_df)
fc_tslm3 <- forecast(md_tslm3, h = h, newdata = test_df)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm3,
test = test_ts)
accuracy(fc_tslm3, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.175109e-11 69904.92 52006.75 -0.1717563 3.163385 0.4359116
## Test set -1.754061e+04 81783.55 65957.66 -1.3613252 4.722083 0.5528457
## ACF1 Theil's U
## Training set 0.7500146 NA
## Test set 0.6094666 0.6925767
summary(md_tslm3)$coefficients %>% tail(1)
## Estimate Std. Error t value Pr(>|t|)
## lag365 -0.06038328 0.01545495 -3.907051 9.490321e-05
anova(md_tslm3)
final_md <- tslm(UK_ts ~ season + trend + wday + month + lag365, data = UKdaily)
checkresiduals(final_md)

##
## Breusch-Godfrey test for serial correlation of order up to 730
##
## data: Residuals from Linear regression model
## LM test = 3301.1, df = 730, p-value < 2.2e-16
UK_fc_df <- data.frame(date = seq.Date(from = max(UKdaily$TIMESTAMP) + days(1),
by = "day", length.out = h))
UK_fc_df$wday <- factor(lubridate::wday(UK_fc_df$date, label = TRUE), ordered = FALSE)
UK_fc_df$month <- factor(month(UK_fc_df$date, label = TRUE), ordered = FALSE)
UK_fc_df$lag365 <- tail(UKdaily$ND, h)
head(UK_fc_df)
UKgrid_fc <- forecast(final_md, h = h, newdata = UK_fc_df)
plot_forecast(UKgrid_fc,
title = "The UK National Demand for Electricity Forecast",
Ytitle = "MW", Xtitle = "Year")