Informe #3, seccion 10

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(h2o)

## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit https://docs.h2o.ai
## 
## ----------------------------------------------------------------------

## 
## Attaching package: 'h2o'

## The following objects are masked from 'package:stats':
## 
##     cor, sd, var

## The following objects are masked from 'package:base':
## 
##     &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc

library(TSstudio)
library(tidyr)
library(Quandl)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(ggplot2)
library(maps)
library(ggmap)

## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
##      (status 2 uses the sf package in place of rgdal)

## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.

library(sp)
library(stats)
library(graphics)
library(rnaturalearth)

## Support for Spatial objects (`sp`) will be deprecated in {rnaturalearth} and will be removed in a future release of the package. Please use `sf` objects with {rnaturalearth}. For example: `ne_download(returnclass = 'sf')`

library(listviewer)
library(plotly)

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggmap':
## 
##     wind
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library(dplyr)

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:xts':
## 
##     first, last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(carat)
library (sf)

## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE

#Seccion 10

library(TSstudio)

data("Coffee_Prices")

ts_info(Coffee_Prices)

##  The Coffee_Prices series is a mts object with 2 variables and 701 observations
##  Frequency: 12 
##  Start time: 1960 1 
##  End time: 2018 5

robusta <- Coffee_Prices[,1]

library(plotly)


ts_plot(robusta,
        title = "The Robusta Coffee Monthly Prices",
        Ytitle = "Price in USD",
        Xtitle = "Year")

library(tidyr)

sma_forecast <- function(df, h, m, w = NULL){
  
  # Error handling
  if(h > nrow(df)){
    stop("The length of the forecast horizon must be shorter than the length of the series")}
  
  if(m > nrow(df)){
    stop("The length of the rolling window must be shorter than the length of the series")}
  if(!is.null(w)){
    if(length(w) != m){
      stop("The weight argument is not aligned with the length of the rolling window")
    } else if(sum(w) !=1){
      stop("The sum of the average weight is different than 1")
    }
  }
  
  # Setting the average weigths
  if(is.null(w)){
    w <- rep(1/m, m)
  }
  
  # Setting the data frame 
  #-----------------------
  # Changing the Date object column name
  names(df)[1] <- "date" 
  # Setting the training and testing partition 
  # according to the forecast horizon
  df$type <- c(rep("train", nrow(df) - h), 
               rep("test", h)) 
  
  # Spreading the table by the partition type
  df1 <- df %>% spread(key = type, value = y)
  
  # Create the target variable
  df1$yhat <- df1$train
  
  
  # Simple moving average function
  for(i in (nrow(df1) - h + 1):nrow(df1)){
    r <- (i-m):(i-1) 
    df1$yhat[i] <- sum(df1$yhat[r] * w) 
  } 
  
  # dropping from the yhat variable the actual values
  # that were used for the rolling window
  df1$yhat <- ifelse(is.na(df1$test), NA, df1$yhat)
  
  df1$y <- ifelse(is.na(df1$test), df1$train, df1$test)
  
  return(df1)
}

robusta_df <- ts_to_prophet(robusta)

robusta_fc_m1 <-  sma_forecast(robusta_df, h = 24, m = 1)
robusta_fc_m6 <-  sma_forecast(robusta_df, h = 24, m = 6)
robusta_fc_m12 <- sma_forecast(robusta_df, h = 24, m = 12)
robusta_fc_m24 <- sma_forecast(robusta_df, h = 24, m = 24)
robusta_fc_m36 <- sma_forecast(robusta_df, h = 24, m = 36)

library(plotly)

plot_ly(data = robusta_df[650:nrow(robusta_df),], x = ~ ds, y = ~ y,
        type = "scatter", mode = "lines", 
        name = "Actual") %>%
  add_lines(x = robusta_fc_m1$date, y = robusta_fc_m1$yhat, 
            name = "SMA - 1", line = list(dash = "dash")) %>%
  add_lines(x = robusta_fc_m6$date, y = robusta_fc_m6$yhat, 
            name = "SMA - 6", line = list(dash = "dash")) %>%
  add_lines(x = robusta_fc_m12$date, y = robusta_fc_m12$yhat, 
            name = "SMA - 12", line = list(dash = "dash")) %>%
  add_lines(x = robusta_fc_m24$date, y = robusta_fc_m24$yhat, 
            name = "SMA - 24", line = list(dash = "dash")) %>%
  add_lines(x = robusta_fc_m36$date, y = robusta_fc_m36$yhat, 
            name = "SMA - 36", line = list(dash = "dash")) %>%
  layout(title = "Forecasting the Robusta Coffee Monthly Prices",
         xaxis = list(title = ""),
         yaxis = list(title = "USD per Kg."))

data(USgas)

USgas_df <- ts_to_prophet(USgas)

USgas_fc_m12a <- sma_forecast(USgas_df,
                              h = 24,
                              m = 12,
                              w = c(1, rep(0,11)))
USgas_fc_m12b <- sma_forecast(USgas_df,
                              h = 24,
                              m = 12,
                              w = c(0.8, rep(0,10), 0.2))

plot_ly(data = USgas_df[190:nrow(USgas_df),], x = ~ ds, y = ~ y,
        type = "scatter", mode = "lines", 
        name = "Actual") %>%
  add_lines(x = USgas_fc_m12a$date, y = USgas_fc_m12a$yhat, 
            name = "WMA - Seasonal Lag", line = list(dash = "dash")) %>%
  add_lines(x = USgas_fc_m12b$date, y = USgas_fc_m12b$yhat, 
            name = "WMA - 12 (0.2/0.8)", line = list(dash = "dash")) %>%
  layout(title = "Forecasting the Monthly Consumption of Natural Gas in the US",
         xaxis = list(title = ""),
         yaxis = list(title = "Billion Cubic Feet"))

alpha_df <- data.frame(index = seq(from = 1, to = 15, by = 1),
                       power = seq(from = 14, to = 0, by = -1))

alpha_df$alpha_0.01 <- 0.01 * (1 - 0.01) ^ alpha_df$power
alpha_df$alpha_0.2 <- 0.2 * (1 - 0.2) ^ alpha_df$power
alpha_df$alpha_0.4 <- 0.4 * (1 - 0.4) ^ alpha_df$power
alpha_df$alpha_0.6 <- 0.6 * (1 - 0.6) ^ alpha_df$power
alpha_df$alpha_0.8 <- 0.8 * (1 - 0.8) ^ alpha_df$power
alpha_df$alpha_1 <- 1 * (1 - 1) ^ alpha_df$power

plot_ly(data = alpha_df) %>%
  add_lines(x = ~ index, y = ~ alpha_0.01, name = "alpha = 0.01") %>%
  add_lines(x = ~ index, y = ~ alpha_0.2, name = "alpha = 0.2") %>%
  add_lines(x = ~ index, y = ~ alpha_0.4, name = "alpha = 0.4") %>%
  add_lines(x = ~ index, y = ~ alpha_0.6, name = "alpha = 0.6") %>%
  add_lines(x = ~ index, y = ~ alpha_0.8, name = "alpha = 0.8") %>%
  add_lines(x = ~ index, y = ~ alpha_1, name = "alpha = 1") %>%
  layout(title = "Decay Rate of the SES Weights",
         xaxis = list(title = "Index"),
         yaxis = list(title = "Weight"))

robusta_par <- ts_split(robusta, sample.out = 12)
train <- robusta_par$train
test <- robusta_par$test

library(forecast)
fc_ses <- ses(train, h = 12, initial = "optimal")

fc_ses$model

## Simple exponential smoothing 
## 
## Call:
##  ses(y = train, h = 12, initial = "optimal") 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 0.6957 
## 
##   sigma:  0.161
## 
##      AIC     AICc      BIC 
## 1989.646 1989.681 2003.252

test_forecast(actual = robusta,
              forecast.obj = fc_ses,
              test = test) %>%
  layout(title = "Robusta Coffee Prices Forecast vs. Actual",
         xaxis = list(range = c(2010, max(time(robusta)))),
         yaxis = list(range = c(1, 3)))

plot_forecast(fc_ses) %>%
  add_lines(x = time(test) + deltat(test),
            y = as.numeric(test),
            name = "Testing Partition") %>%
  layout(title = "Robusta Coffee Prices Forecast vs. Actual",
         xaxis = list(range = c(2010, max(time(robusta)) +
                                  deltat(robusta))),
         yaxis = list(range = c(0, 4)))

robusta_par1 <- ts_split(robusta, sample.out = 24)

train1 <- robusta_par1$train 
test1 <- ts_split(robusta_par1$test, sample.out = 12)$train

robusta_par2 <- ts_split(robusta, sample.out = 12)

train2 <- robusta_par2$train
valid <- robusta_par2$test

alpha <- seq(from = 0, to = 1, by = 0.01)

alpha[1] <- 0.001

library(dplyr)

ses_grid <- lapply(alpha, function(i){
  md1 <- md_accuracy1 <- md2 <- md_accuracy2 <- results <-  NULL
  md1 <- ses(train1, h = 12, alpha = i, initial = "simple")
  md_accuracy1 <- accuracy(md1, test1)
  
  md2 <- ses(train2, h = 12, alpha = i, initial = "simple")
  md_accuracy2 <- accuracy(md2, valid)
  
  resutls <- data.frame(alpha = i, 
                        train = md_accuracy1[9], 
                        test = md_accuracy1[10], 
                        valid = md_accuracy2[10])
  
}) %>% bind_rows()

plot_ly(data = ses_grid, x = ~ alpha, y = ~ train, 
        line = list(color = 'rgb(205, 12, 24)'),
        type = "scatter", 
        mode = "lines", 
        name = "Training") %>%
  add_lines(x = ~ alpha, y = ~ test, line = list(color = "rgb(22, 96, 167)", dash = "dash"), name=  "Testing") %>%
  add_lines(x = ~ alpha, y = ~ valid, line = list(color = "green", dash = "dot"), name = "Validation") %>%
  layout(title = "SES Model Grid Search Results",
         yaxis = list(title = "MAPE (%)"))

library(Quandl)

gdp <- Quandl("FRED/GDP", start_date = "2010-01-01", type = "ts")

ts_info(gdp)

##  The gdp series is a ts object with 1 variable and 48 observations
##  Frequency: 4 
##  Start time: 2010 1 
##  End time: 2021 4

ts_plot(gdp, 
        title = "Gross Domestic Product",
        Ytitle = "Billions of Dollars",
        Xtitle = "Source: U.S. Bureau of Economic Analysis / fred.stlouisfed.org")

gdp_par <- ts_split(gdp, sample.out = 8)

train <- gdp_par$train
test <- gdp_par$test

fc_holt <- holt(train,  h = 8, initial = "optimal") 

fc_holt$model

## Holt's method 
## 
## Call:
##  holt(y = train, h = 8, initial = "optimal") 
## 
##   Smoothing parameters:
##     alpha = 0.9985 
##     beta  = 0.0941 
## 
##   Initial states:
##     l = 14603.0487 
##     b = 161.9411 
## 
##   sigma:  80.9869
## 
##      AIC     AICc      BIC 
## 504.8838 506.6485 513.3282

accuracy(fc_holt, test)

##                      ME       RMSE      MAE         MPE      MAPE       MASE
## Training set   10.99164   76.83093  61.7626  0.05241543 0.3460477 0.08790283
## Test set     -665.87355 1141.19097 854.8069 -3.29699460 4.0874667 1.21659297
##                   ACF1 Theil's U
## Training set 0.0192117        NA
## Test set     0.2975704   1.08055

fc_holt_exp <- holt(train,
                    h = 8,
                    beta = 0.75 ,
                    initial = "optimal",
                    exponential = TRUE) 

fc_holt_exp$model

## Holt's method with exponential trend 
## 
## Call:
##  holt(y = train, h = 8, initial = "optimal", exponential = TRUE,  
## 
##  Call:
##      beta = 0.75) 
## 
##   Smoothing parameters:
##     alpha = 0.781 
##     beta  = 0.75 
## 
##   Initial states:
##     l = 14572.2291 
##     b = 1.0142 
## 
##   sigma:  0.0053
## 
##      AIC     AICc      BIC 
## 515.9291 517.0720 522.6847

accuracy(fc_holt_exp, test)

##                       ME       RMSE       MAE         MPE      MAPE      MASE
## Training set   -2.792636   90.20753  74.14323 -0.01756593 0.4165413 0.1055234
## Test set     -703.014806 1151.21262 858.97402 -3.46212317 4.1130726 1.2225238
##                    ACF1 Theil's U
## Training set -0.1541725        NA
## Test set      0.2834645  1.090637

data(USgas)

decompose(USgas) %>% plot()

USgas_par <- ts_split(USgas, 12)

train <- USgas_par$train
test <- USgas_par$test

md_hw <- HoltWinters(train)

md_hw

## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = train)
## 
## Smoothing parameters:
##  alpha: 0.371213
##  beta : 0
##  gamma: 0.4422456
## 
## Coefficients:
##             [,1]
## a   2491.9930104
## b     -0.1287005
## s1    32.0972651
## s2   597.1088003
## s3   834.9628439
## s4   353.8593860
## s5   318.1927058
## s6  -173.0721496
## s7  -346.6229990
## s8  -329.7169608
## s9  -112.1664217
## s10 -140.3186476
## s11 -324.5343787
## s12 -243.9334551

fc_hw <- forecast(md_hw, h = 12)

accuracy(fc_hw, test)

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  7.828361 115.2062 87.67420 0.2991952 4.248131 0.7614222
## Test set     51.013877 115.1555 98.06531 1.7994297 3.766099 0.8516656
##                     ACF1 Theil's U
## Training set  0.21911103        NA
## Test set     -0.01991923 0.3652142

test_forecast(actual = USgas, 
              forecast.obj = fc_hw, 
              test = test)

Informe #3, seccion 10

Josue Altafulla

30/10/2023