library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(h2o)
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit https://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## Attaching package: 'h2o'
## The following objects are masked from 'package:stats':
##
## cor, sd, var
## The following objects are masked from 'package:base':
##
## &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
## colnames<-, ifelse, is.character, is.factor, is.numeric, log,
## log10, log1p, log2, round, signif, trunc
library(TSstudio)
library(tidyr)
library(Quandl)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggplot2)
library(maps)
library(ggmap)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
## (status 2 uses the sf package in place of rgdal)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(sp)
library(stats)
library(graphics)
library(rnaturalearth)
## Support for Spatial objects (`sp`) will be deprecated in {rnaturalearth} and will be removed in a future release of the package. Please use `sf` objects with {rnaturalearth}. For example: `ne_download(returnclass = 'sf')`
library(listviewer)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggmap':
##
## wind
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:xts':
##
## first, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(carat)
library (sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
#Seccion 10
library(TSstudio)
data("Coffee_Prices")
ts_info(Coffee_Prices)
## The Coffee_Prices series is a mts object with 2 variables and 701 observations
## Frequency: 12
## Start time: 1960 1
## End time: 2018 5
robusta <- Coffee_Prices[,1]
library(plotly)
ts_plot(robusta,
title = "The Robusta Coffee Monthly Prices",
Ytitle = "Price in USD",
Xtitle = "Year")
library(tidyr)
sma_forecast <- function(df, h, m, w = NULL){
# Error handling
if(h > nrow(df)){
stop("The length of the forecast horizon must be shorter than the length of the series")}
if(m > nrow(df)){
stop("The length of the rolling window must be shorter than the length of the series")}
if(!is.null(w)){
if(length(w) != m){
stop("The weight argument is not aligned with the length of the rolling window")
} else if(sum(w) !=1){
stop("The sum of the average weight is different than 1")
}
}
# Setting the average weigths
if(is.null(w)){
w <- rep(1/m, m)
}
# Setting the data frame
#-----------------------
# Changing the Date object column name
names(df)[1] <- "date"
# Setting the training and testing partition
# according to the forecast horizon
df$type <- c(rep("train", nrow(df) - h),
rep("test", h))
# Spreading the table by the partition type
df1 <- df %>% spread(key = type, value = y)
# Create the target variable
df1$yhat <- df1$train
# Simple moving average function
for(i in (nrow(df1) - h + 1):nrow(df1)){
r <- (i-m):(i-1)
df1$yhat[i] <- sum(df1$yhat[r] * w)
}
# dropping from the yhat variable the actual values
# that were used for the rolling window
df1$yhat <- ifelse(is.na(df1$test), NA, df1$yhat)
df1$y <- ifelse(is.na(df1$test), df1$train, df1$test)
return(df1)
}
robusta_df <- ts_to_prophet(robusta)
robusta_fc_m1 <- sma_forecast(robusta_df, h = 24, m = 1)
robusta_fc_m6 <- sma_forecast(robusta_df, h = 24, m = 6)
robusta_fc_m12 <- sma_forecast(robusta_df, h = 24, m = 12)
robusta_fc_m24 <- sma_forecast(robusta_df, h = 24, m = 24)
robusta_fc_m36 <- sma_forecast(robusta_df, h = 24, m = 36)
library(plotly)
plot_ly(data = robusta_df[650:nrow(robusta_df),], x = ~ ds, y = ~ y,
type = "scatter", mode = "lines",
name = "Actual") %>%
add_lines(x = robusta_fc_m1$date, y = robusta_fc_m1$yhat,
name = "SMA - 1", line = list(dash = "dash")) %>%
add_lines(x = robusta_fc_m6$date, y = robusta_fc_m6$yhat,
name = "SMA - 6", line = list(dash = "dash")) %>%
add_lines(x = robusta_fc_m12$date, y = robusta_fc_m12$yhat,
name = "SMA - 12", line = list(dash = "dash")) %>%
add_lines(x = robusta_fc_m24$date, y = robusta_fc_m24$yhat,
name = "SMA - 24", line = list(dash = "dash")) %>%
add_lines(x = robusta_fc_m36$date, y = robusta_fc_m36$yhat,
name = "SMA - 36", line = list(dash = "dash")) %>%
layout(title = "Forecasting the Robusta Coffee Monthly Prices",
xaxis = list(title = ""),
yaxis = list(title = "USD per Kg."))
data(USgas)
USgas_df <- ts_to_prophet(USgas)
USgas_fc_m12a <- sma_forecast(USgas_df,
h = 24,
m = 12,
w = c(1, rep(0,11)))
USgas_fc_m12b <- sma_forecast(USgas_df,
h = 24,
m = 12,
w = c(0.8, rep(0,10), 0.2))
plot_ly(data = USgas_df[190:nrow(USgas_df),], x = ~ ds, y = ~ y,
type = "scatter", mode = "lines",
name = "Actual") %>%
add_lines(x = USgas_fc_m12a$date, y = USgas_fc_m12a$yhat,
name = "WMA - Seasonal Lag", line = list(dash = "dash")) %>%
add_lines(x = USgas_fc_m12b$date, y = USgas_fc_m12b$yhat,
name = "WMA - 12 (0.2/0.8)", line = list(dash = "dash")) %>%
layout(title = "Forecasting the Monthly Consumption of Natural Gas in the US",
xaxis = list(title = ""),
yaxis = list(title = "Billion Cubic Feet"))
alpha_df <- data.frame(index = seq(from = 1, to = 15, by = 1),
power = seq(from = 14, to = 0, by = -1))
alpha_df$alpha_0.01 <- 0.01 * (1 - 0.01) ^ alpha_df$power
alpha_df$alpha_0.2 <- 0.2 * (1 - 0.2) ^ alpha_df$power
alpha_df$alpha_0.4 <- 0.4 * (1 - 0.4) ^ alpha_df$power
alpha_df$alpha_0.6 <- 0.6 * (1 - 0.6) ^ alpha_df$power
alpha_df$alpha_0.8 <- 0.8 * (1 - 0.8) ^ alpha_df$power
alpha_df$alpha_1 <- 1 * (1 - 1) ^ alpha_df$power
plot_ly(data = alpha_df) %>%
add_lines(x = ~ index, y = ~ alpha_0.01, name = "alpha = 0.01") %>%
add_lines(x = ~ index, y = ~ alpha_0.2, name = "alpha = 0.2") %>%
add_lines(x = ~ index, y = ~ alpha_0.4, name = "alpha = 0.4") %>%
add_lines(x = ~ index, y = ~ alpha_0.6, name = "alpha = 0.6") %>%
add_lines(x = ~ index, y = ~ alpha_0.8, name = "alpha = 0.8") %>%
add_lines(x = ~ index, y = ~ alpha_1, name = "alpha = 1") %>%
layout(title = "Decay Rate of the SES Weights",
xaxis = list(title = "Index"),
yaxis = list(title = "Weight"))
robusta_par <- ts_split(robusta, sample.out = 12)
train <- robusta_par$train
test <- robusta_par$test
library(forecast)
fc_ses <- ses(train, h = 12, initial = "optimal")
fc_ses$model
## Simple exponential smoothing
##
## Call:
## ses(y = train, h = 12, initial = "optimal")
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 0.6957
##
## sigma: 0.161
##
## AIC AICc BIC
## 1989.646 1989.681 2003.252
test_forecast(actual = robusta,
forecast.obj = fc_ses,
test = test) %>%
layout(title = "Robusta Coffee Prices Forecast vs. Actual",
xaxis = list(range = c(2010, max(time(robusta)))),
yaxis = list(range = c(1, 3)))
plot_forecast(fc_ses) %>%
add_lines(x = time(test) + deltat(test),
y = as.numeric(test),
name = "Testing Partition") %>%
layout(title = "Robusta Coffee Prices Forecast vs. Actual",
xaxis = list(range = c(2010, max(time(robusta)) +
deltat(robusta))),
yaxis = list(range = c(0, 4)))
robusta_par1 <- ts_split(robusta, sample.out = 24)
train1 <- robusta_par1$train
test1 <- ts_split(robusta_par1$test, sample.out = 12)$train
robusta_par2 <- ts_split(robusta, sample.out = 12)
train2 <- robusta_par2$train
valid <- robusta_par2$test
alpha <- seq(from = 0, to = 1, by = 0.01)
alpha[1] <- 0.001
library(dplyr)
ses_grid <- lapply(alpha, function(i){
md1 <- md_accuracy1 <- md2 <- md_accuracy2 <- results <- NULL
md1 <- ses(train1, h = 12, alpha = i, initial = "simple")
md_accuracy1 <- accuracy(md1, test1)
md2 <- ses(train2, h = 12, alpha = i, initial = "simple")
md_accuracy2 <- accuracy(md2, valid)
resutls <- data.frame(alpha = i,
train = md_accuracy1[9],
test = md_accuracy1[10],
valid = md_accuracy2[10])
}) %>% bind_rows()
plot_ly(data = ses_grid, x = ~ alpha, y = ~ train,
line = list(color = 'rgb(205, 12, 24)'),
type = "scatter",
mode = "lines",
name = "Training") %>%
add_lines(x = ~ alpha, y = ~ test, line = list(color = "rgb(22, 96, 167)", dash = "dash"), name= "Testing") %>%
add_lines(x = ~ alpha, y = ~ valid, line = list(color = "green", dash = "dot"), name = "Validation") %>%
layout(title = "SES Model Grid Search Results",
yaxis = list(title = "MAPE (%)"))
library(Quandl)
gdp <- Quandl("FRED/GDP", start_date = "2010-01-01", type = "ts")
ts_info(gdp)
## The gdp series is a ts object with 1 variable and 48 observations
## Frequency: 4
## Start time: 2010 1
## End time: 2021 4
ts_plot(gdp,
title = "Gross Domestic Product",
Ytitle = "Billions of Dollars",
Xtitle = "Source: U.S. Bureau of Economic Analysis / fred.stlouisfed.org")
gdp_par <- ts_split(gdp, sample.out = 8)
train <- gdp_par$train
test <- gdp_par$test
fc_holt <- holt(train, h = 8, initial = "optimal")
fc_holt$model
## Holt's method
##
## Call:
## holt(y = train, h = 8, initial = "optimal")
##
## Smoothing parameters:
## alpha = 0.9985
## beta = 0.0941
##
## Initial states:
## l = 14603.0487
## b = 161.9411
##
## sigma: 80.9869
##
## AIC AICc BIC
## 504.8838 506.6485 513.3282
accuracy(fc_holt, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 10.99164 76.83093 61.7626 0.05241543 0.3460477 0.08790283
## Test set -665.87355 1141.19097 854.8069 -3.29699460 4.0874667 1.21659297
## ACF1 Theil's U
## Training set 0.0192117 NA
## Test set 0.2975704 1.08055
fc_holt_exp <- holt(train,
h = 8,
beta = 0.75 ,
initial = "optimal",
exponential = TRUE)
fc_holt_exp$model
## Holt's method with exponential trend
##
## Call:
## holt(y = train, h = 8, initial = "optimal", exponential = TRUE,
##
## Call:
## beta = 0.75)
##
## Smoothing parameters:
## alpha = 0.781
## beta = 0.75
##
## Initial states:
## l = 14572.2291
## b = 1.0142
##
## sigma: 0.0053
##
## AIC AICc BIC
## 515.9291 517.0720 522.6847
accuracy(fc_holt_exp, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -2.792636 90.20753 74.14323 -0.01756593 0.4165413 0.1055234
## Test set -703.014806 1151.21262 858.97402 -3.46212317 4.1130726 1.2225238
## ACF1 Theil's U
## Training set -0.1541725 NA
## Test set 0.2834645 1.090637
data(USgas)
decompose(USgas) %>% plot()
USgas_par <- ts_split(USgas, 12)
train <- USgas_par$train
test <- USgas_par$test
md_hw <- HoltWinters(train)
md_hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = train)
##
## Smoothing parameters:
## alpha: 0.371213
## beta : 0
## gamma: 0.4422456
##
## Coefficients:
## [,1]
## a 2491.9930104
## b -0.1287005
## s1 32.0972651
## s2 597.1088003
## s3 834.9628439
## s4 353.8593860
## s5 318.1927058
## s6 -173.0721496
## s7 -346.6229990
## s8 -329.7169608
## s9 -112.1664217
## s10 -140.3186476
## s11 -324.5343787
## s12 -243.9334551
fc_hw <- forecast(md_hw, h = 12)
accuracy(fc_hw, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.828361 115.2062 87.67420 0.2991952 4.248131 0.7614222
## Test set 51.013877 115.1555 98.06531 1.7994297 3.766099 0.8516656
## ACF1 Theil's U
## Training set 0.21911103 NA
## Test set -0.01991923 0.3652142
test_forecast(actual = USgas,
forecast.obj = fc_hw,
test = test)