Load packages and data

library(tidyverse)
library(tidyquant)
library(dplyr)
library(ggplot2)
library(fpp3)
library(feasts)
library(readxl)

# Data:

Bacon_Price <- read_excel("C:/Users/14047/Downloads/Bacon Price.xls", 
                          range = "A11:B284")
View(Bacon_Price)

Forecasting Steps

Data Preparation

# I fixed my data on the website and in excel to show Dates and average 
# prices of Bacon from Jan 2000 - Sep 2022. Now, I am making it into a tsibble.

data <- Bacon_Price %>% 
  mutate(Month = yearmonth(Date)) %>% 
  as_tsibble(index = Month) %>% 
  select(Month, Price)
data
## # A tsibble: 273 x 2 [1M]
##       Month Price
##       <mth> <dbl>
##  1 2000 Jan  2.75
##  2 2000 Feb  2.87
##  3 2000 Mar  2.93
##  4 2000 Apr  2.95
##  5 2000 May  3.01
##  6 2000 Jun  3.13
##  7 2000 Jul  3.17
##  8 2000 Aug  3.2 
##  9 2000 Sep  3.21
## 10 2000 Oct  3.07
## # … with 263 more rows
## # ℹ Use `print(n = ...)` to see more rows

Data Visualisation

# I am graphing the data to visualize any trends, seasonality, and/or 
# cyclic behavior.

ggplot(Bacon_Price, aes(x = Date, y = Price)) +
  geom_line()

# There is definitely an increasing trend with some seasonality maybe. Maybe some
# seasonality. I am going to employ STL decomposition to see the components
# separately.

data %>% 
  model(STL(Price ~ season() + trend(), robust = TRUE)) %>% 
  components() %>% 
  autoplot()

# There is definitely an increasing trend with a seasonal pattern
# that doesn't necassarily have a constant variance. 

Define a Model

# I am going to use some of the smoothing techniques we used
# in class. The simple exponential smoothing method is suitable
# for forecasting data with no clear trend or seasonal pattern,
# which is applicable for the Bacon data we are using. I think 
# the method that is going to be best in this case is Holt's linear 
# trend method since it's obvious there is a trend, but the seasonality
# isn't clear., but there is some type of seasonal pattern so we will see,
# I will also compare the damped method to see which method is best overall 
# out of the three together. The seasonality isn't constant overtime; however, 
# I will do Holt-Winter's additive method. Holt-Winters' multiplicative
# method is for when there exists different proportions of the seasonality 
# at different levels so I feel as if this method will be accurate for the data.
# I also think Multiplicative Holt-Winter's method is going to be very accurate. 
# Finally, I am going to employ Holt-Winters' damped method just to see the results.
# The one that probably is going to be best overall though is the auto ETS method. 

Training the Model

# I am first fitting my model with corss-validation to the entire data set to 
# see the accuracy of the residuals. 

data %>% 
  stretch_tsibble(.init = 10) %>% 
  model(
    SES = ETS(Price ~ error("A") + trend("N") + season("N")),
    Holt = ETS(Price ~ error("A") + trend("A") + season("N")),
    Damped = ETS(Price ~ error("A") + trend("Ad") + season("N")),
  ) %>% 
  forecast(h = 4) %>% 
  accuracy(data)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 4 observations are missing between 2022 Oct and 2023 Jan
## # A tibble: 3 × 10
##   .model .type      ME  RMSE   MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>   <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damped Test  0.0344  0.239 0.166  0.619   3.46 0.482 0.481 0.755
## 2 Holt   Test  0.00617 0.232 0.167 -0.0486  3.53 0.484 0.468 0.759
## 3 SES    Test  0.0417  0.230 0.161  0.747   3.36 0.468 0.463 0.748
# According to this measure, since the RMSE and MAE are the lowest
# for the SES method, it is most adequate. 

# I am now creating a training and test set to conduct accuracy 
# performances across each method. 


train <- data %>% 
  filter(year(Month) <= 2015)

test <- data %>% 
  filter(year(Month) > 2015)

recent <- data %>% 
  filter(year(Month) >= 2000)
      

# A fit over my training set data using cross-validation and check 
# the accuracy to choose which model is best

fit_train <- train %>% 
  stretch_tsibble(.init = 12) %>% 
  model(
    SES = ETS(Price ~ error("A") + trend("N") + season("N")),
    Holt = ETS(Price ~ error("A") + trend("A") + season("N")),
    Damped = ETS(Price ~ error("A") + trend("Ad") + season("N")),
  )

# Now forecasting the training set

fc_train <- fit_train %>% 
  forecast(h = 81)

# Now checking the accuracy according to the residuals

accuracy(fit_train)
## # A tibble: 543 × 11
##      .id .model .type          ME   RMSE    MAE    MPE  MAPE    MASE   RMSSE
##    <int> <chr>  <chr>       <dbl>  <dbl>  <dbl>  <dbl> <dbl>   <dbl>   <dbl>
##  1     1 SES    Training  0.0232  0.0708 0.0533  0.775  1.76 NaN     NaN    
##  2     1 Holt   Training -0.0219  0.0627 0.0455 -0.712  1.49 NaN     NaN    
##  3     1 Damped Training -0.00554 0.0557 0.0356 -0.182  1.17 NaN     NaN    
##  4     2 SES    Training  0.0188  0.0686 0.0519  0.628  1.71   0.213   0.281
##  5     2 Holt   Training -0.0193  0.0602 0.0426 -0.626  1.39   0.174   0.247
##  6     2 Damped Training -0.0123  0.0536 0.0325 -0.404  1.07   0.133   0.220
##  7     3 SES    Training  0.0225  0.0688 0.0532  0.749  1.76   0.243   0.312
##  8     3 Holt   Training -0.0114  0.0641 0.0459 -0.365  1.50   0.210   0.291
##  9     3 Damped Training -0.00517 0.0555 0.0387 -0.177  1.27   0.177   0.252
## 10     4 SES    Training  0.0273  0.0708 0.0559  0.897  1.84   0.251   0.316
## # … with 533 more rows, and 1 more variable: ACF1 <dbl>
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
# Now checking the accuracy of the prediction errors according 
# to the test set.

accuracy(fc_train, recent)
## # A tibble: 3 × 10
##   .model .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damped Test  0.522   0.837 0.620 10.5    12.5  2.19  2.04 0.955
## 2 Holt   Test  0.00667 0.864 0.591 -0.817  12.6  2.09  2.11 0.975
## 3 SES    Test  0.535   0.833 0.611 10.7    12.4  2.16  2.03 0.952
# The lowest RMSE, which matters most, remains to be for the SES 
# method. So it's seeming as if this method remains to be best so far.



# PLOTTING TRAINING SET FORECAST



# Now plotting the forecast of the training sets with the test 
# set to graphically visualize all three methods.

# The SES method:

# The fit

fit02 <- train %>% 
  model(SES = ETS(Price ~ error("A") + trend("N") + season("N")))

# The forecast
  
fc02 <- fit02 %>% 
  forecast(h = 81)

# I am now graphing the fit on the training set and the forecast 
# to the original data. 

fc02 %>% 
  autoplot(recent) +
  geom_line(aes(y = .fitted), color = "red", data = augment(fit02))

# Even though the fit looks nice, since there is an increasing trend,
# it doesn't seem appropriate. However, I am going to rely on the RMSE.

# Holt's linear method:

fit03 <- train %>% 
  model(Holt = ETS(Price ~ error("A") + trend("A") + season("N")))

fc03 <- fit03 %>% 
  forecast(h = 81)

fc03 %>% 
  autoplot(recent) +
  geom_line(aes(y = .fitted), color = "red", data = augment(fit03))

# The training set seems to fit the data and test set pretty nicely! However,
# it had a higher RMSE than the SES method.

# Holt's Damped:

fit04 <- train %>% 
  model(Damped = ETS(Price ~ error("A") + trend("Ad") + season("N")))

fc04 <- fit04 %>% 
  forecast(h = 81)

fc04 %>% 
  autoplot(recent) +
  geom_line(aes(y = .fitted), color = "red", data = augment(fit04))

# This is very similar to the SES method, but is damped at the very beginning
# of the forecast.
 




# ADDITIVE AND MULTIPLICATIVE



# I am going to create models for Holt-Winter's additive and 
# multiplicative methods. I feel as if these methods will be accurate 
# since the data seems to have a seasonal pattern. The additive method
# is preferred when the seasonal variations are roughly constant through
# the series, while the multiplicative method is preferred when the 
# seasonal variations are changing proportional to the level of the series.
# Thus, I think multiplicative is going to be a good model.

# Fitting the methods to the data using cross validation and checking the overall 
# accuracy of the time series. 

data %>% 
  stretch_tsibble(.init = 10) %>% 
  model(additive = ETS(Price ~ error("A") + trend("A") + season("A")),
        multiplicative = ETS(Price ~ error("M") + trend("A") + season("M"))) %>% 
  forecast(h = 4) %>% 
  accuracy(data)
## Warning: 8 errors (2 unique) encountered for additive
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: 8 errors (2 unique) encountered for multiplicative
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 4 observations are missing between 2022 Oct and 2023 Jan
## # A tibble: 2 × 10
##   .model         .type      ME  RMSE   MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>          <chr>   <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive       Test  0.00473 0.229 0.167 -0.0757  3.53 0.485 0.462 0.758
## 2 multiplicative Test  0.00488 0.235 0.175 -0.0677  3.71 0.508 0.474 0.763
# Now, according to the entire data set, the method with the lowest RMSE overall
# is Holt-Winters' additive method. This is surprising. I guess the data has 
# more of a constant seasonality than I thought.
  

# Now, I am fitting each method to the training set with cross-validation.

fit1 <- train %>% 
  stretch_tsibble(.init = 10) %>% 
  model(
    additive = ETS(Price ~ error("A") + trend("A") + season("A")),
    multiplicative = ETS(Price ~ error("M") + trend("A") + season("M"))
  )
## Warning: 8 errors (2 unique) encountered for additive
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: 8 errors (2 unique) encountered for multiplicative
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
# Now forecasting the training set

fc1 <- fit1 %>% 
  forecast(h = 81)

# Now, checking the accuracy of the forecast 

accuracy(fc1, recent)
## # A tibble: 2 × 10
##   .model         .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>          <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive       Test  -0.00320  1.16 0.692 -0.302  14.2  2.45  2.83 0.979
## 2 multiplicative Test   0.0718   1.30 0.770  1.17   15.7  2.72  3.17 0.978
# The RMSE is not lower for the training and test set compared to the 
# SES method. To help make my decision, I am going to compare the graphs 
# of the forecast.

fit13 <- train %>% 
  model(additive = ETS(Price ~ error("A") + trend("A") + season("A")))

fc13 <- fit13 %>% 
  forecast(h = 81) %>% 
  autoplot(data, level = NULL)
fc13

# The fit of the forecast seems like it could be more accurate than the
# SES method! Maybe this will be the chosen one. 

# Even though Holt-Winters' additive method could be the best method chosen,
# there still exists the auto ETS method to compare:

data %>% 
  model(auto = ETS(Price)) %>% 
  accuracy()
## # A tibble: 1 × 10
##   .model .type          ME  RMSE    MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>       <dbl> <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto   Training 0.000399 0.125 0.0914 -0.0627  1.99 0.265 0.252 0.240
# Expected, the RMSE is the lowest for the auto ETS method. 

# Now, I am fitting the auto ETS method to my training and 
# test set.

fit15 <- train %>% 
  model(auto = ETS(Price))
fit15
## # A mable: 1 x 1
##           auto
##        <model>
## 1 <ETS(M,A,A)>
# Now, the forecast:
  
fc15 <- fit15 %>% 
  forecast(h = 81)

# And so that accuracy of the prediction errors are:

accuracy(fc15, recent)
## # A tibble: 1 × 10
##   .model .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto   Test  -0.651 0.804 0.677 -11.7  12.1  2.40  1.96 0.929
# The RMSE is the lowest for the test set out of all methods! 
# I am going to graph the fitted values and forecast now. 

fc15 %>% 
  autoplot(recent) +
  geom_line(aes(y = .fitted), color = "red", data = augment(fit15))

# The point forecast seems to not have enough seasonality, but the 
# overall forecast seems to fit the test data set pretty nicely. The 
# prediction interval looks very nice but slightly skewed. I honestly 
# think that Holt-Winters' additive method looks best, but I will trust
# the RMSE. Therefore, the chosen method for my forecast is the auto 
# ETS method. 

Checking the Model Performace

# I have already checked the accuracy of my ETS method. 

# I am going to check the residuals of my ETS model.

fit17 <- data %>% 
  model(ETS(Price))


gg_tsresiduals(fit17)

# From the graphs, my variance seems to be pretty constant, however, there is some 
# inconsistency towards the end with a few outliers throughout the data.
# This means that my prediction interval will be hard to calculate. The residual are
# centered around the mean of 0, and for the most part, the distribution is symmetric.
# This is good for my prediction interval and point forecast. There does seems to be 
# some autocorrelation within the data. I will check the significance using the 
# ljung-bow test below. 

# Since I have seasonal data, I am going to use lag = 2m = 2(5) = 10 and dof = 0.

fit16 <- augment(fit17)

fit16 %>% 
  features(.resid, ljung_box, lag = 10, dof = 0)
## # A tibble: 1 × 3
##   .model     lb_stat lb_pvalue
##   <chr>        <dbl>     <dbl>
## 1 ETS(Price)    23.1    0.0104
# Since my p value < 0.05, it is significant. And so, my residuals 
# are distinguishable from a white noise series. This causes complications
# for my forecast. For now, I cannot do anything but accept it. I am guessing
# that autocorrelation is naturally occurring within my model, and it would 
# cause major complications to fix it. 

# When I produce the box_cox for my data, I get 

data %>% 
  features(Price, features = guerrero)
## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1          -0.572
# There is not clear transformation that I could use that would 
# balance the variance of my seasonality. 

Produce the Forecast

# I am now going to forecast the next 4 months of Average Bacon Price using
# the auto ETS method. 

fitty <- data %>% 
  model(auto = ETS(Price))

fcc <- fitty %>% 
  forecast(h = 4) %>% 
  autoplot(data) +
  labs(title = "Forecasting 4 Months Using ETS Method", y = "Average Price of Bacon")
fcc