library(tidyquant)
library(tidyverse)
library(fpp3)
library(moments)
library(tsibble)
library(tsibbledata)
library(ggfortify)
library(ggplot2)
library(readxl)
library(dplyr)
library(fabletools)
library(fable)
library(slider)
library(zoo)
library(latex2exp)
library(seasonal)
gas_prices <- read_excel("/Users/Peter Cook/Documents/Economics and Finance/Business Forecasting/Forecasting Data/gas_2010.xlsx")

gas_prices_tsibble <- gas_prices %>%
  mutate(Week = yearweek(Week)) %>%
  as_tsibble(index = Week)

gas_prices_tsibble %>%
  autoplot() +
  labs(title = "Weekly Gas Prices ($USD)")
## Plot variable not specified, automatically selected `.vars = Price`

gas_prices_tsibble %>%
  autoplot(log(Price)) +
  labs(title = "Weekly Gas Prices Logged ($USD)")

#Logging the data does appear to have too much of an impact.

gas_prices_tsibble %>%
  model(STL(Price ~ trend(window=5) + season(window="periodic"), robust = TRUE)) %>%
  components() %>% autoplot() +
    labs(title = "STL Decomposition of Weekly Gas Prices ($USD)")

#The decomposition reveals that the data likely isn’t white noise based on the remainder. However, there is clear seasonality and a significant upwards trend beginning in 2020, likely due to COVID lockdowns and further exaggerated by the Russian invasion of Ukraine.

lambda <- gas_prices_tsibble %>%
  features(Price, features = guerrero) %>%
  pull(lambda_guerrero)
gas_prices_tsibble %>%
  autoplot(box_cox(Price, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed Weekly Gas Prices ($USD) with $\\lambda$ = ",
         round(lambda,2))))

#The box cox transformation did not appear to have muhc of an impact on the data.

gas_prices_tsibble %>%
  model(MEAN(Price)) %>%
  forecast(h=2) %>%
  autoplot(gas_prices_tsibble) +
  labs(title = "Mean Forecast of Gas Prices ($USD) in 2 Weeks")

#Tried forecasting prices using the MEAN method. I’m going to immediately throw out this forecast as horribly unreliable. The prediction interval is way too huge.

gas_prices_tsibble %>%
  model(NAIVE(Price)) %>%
  forecast(h=2) %>%
  autoplot(gas_prices_tsibble) +
  labs(title = "NAIVE Forecast of Gas Prices ($USD) in 2 Weeks")

#Tried running a NAIVE forecast of gas prices in 2 weeks. This looks a lot better than the MEAN method. I would say the NAIVE method is most accurate because of the way gas prices tend to behave. They (normally) don’t rise or fall by much more than a few cents at a time, so basing the forecast on the last observation makes the most sense to me.

gas_prices_tsibble %>%
  model(RW(Price ~ drift())) %>%
  forecast(h=2) %>%
  autoplot(gas_prices_tsibble) +
  labs(title = "DRIFT Forecast of Gas Prices ($USD) in 2 Weeks")

#Tried forecasting gas prices using the drift method. While I think this forecast is better than the MEAN method, I would guess NAIVE is better for this data.

gas_prices_models <- gas_prices_tsibble %>%
  model(MEAN(Price),
        NAIVE(Price),
        RW(Price ~ drift()))

accuracy(gas_prices_models)
## # A tibble: 3 × 10
##   .model          .type       ME   RMSE    MAE     MPE  MAPE   MASE  RMSSE  ACF1
##   <chr>           <chr>    <dbl>  <dbl>  <dbl>   <dbl> <dbl>  <dbl>  <dbl> <dbl>
## 1 MEAN(Price)     Trai… 5.26e-17 0.616  0.522  -4.17   17.9  1.10   1.00   0.994
## 2 NAIVE(Price)    Trai… 1.79e- 3 0.0557 0.0395  0.0385  1.32 0.0834 0.0905 0.589
## 3 RW(Price ~ dri… Trai… 1.01e-16 0.0557 0.0395 -0.0234  1.32 0.0835 0.0905 0.589
n<-nrow(gas_prices_tsibble)
cutoff<-round(0.7*n)

gas_train <- gas_prices_tsibble %>%
  slice(1:cutoff)

gas_train_models <- gas_train %>%
  model(MEAN(Price),
        NAIVE(Price),
        RW(Price ~ drift()))

accuracy(gas_train_models)
## # A tibble: 3 × 10
##   .model         .type        ME   RMSE    MAE     MPE  MAPE   MASE  RMSSE  ACF1
##   <chr>          <chr>     <dbl>  <dbl>  <dbl>   <dbl> <dbl>  <dbl>  <dbl> <dbl>
## 1 MEAN(Price)    Trai… -1.20e-16 0.555  0.492  -3.56   17.0  1.25   1.10   0.995
## 2 NAIVE(Price)   Trai… -3.74e- 4 0.0499 0.0379 -0.0288  1.28 0.0963 0.0987 0.552
## 3 RW(Price ~ dr… Trai… -1.24e-16 0.0499 0.0379 -0.0161  1.28 0.0962 0.0987 0.552
forecast_models <- forecast(gas_train_models, h = 2)

forecast_models %>%
  accuracy(gas_prices_tsibble)
## # A tibble: 3 × 10
##   .model              .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>               <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price)         Test  -0.544  0.545  0.544  -21.9  21.9  1.38  1.08   -0.5
## 2 NAIVE(Price)        Test  -0.0585 0.0638 0.0585  -2.36  2.36 0.149 0.126  -0.5
## 3 RW(Price ~ drift()) Test  -0.0579 0.0632 0.0579  -2.34  2.34 0.147 0.125  -0.5

#Based on the cross validation, the drift model is the most accurate, being just a tiny bit more accurate than the NAIVE model, based on the lower RMSE (the difference so tiny that it hardly makes a difference). As stated above, the cross validation reveals that the MEAN method is wildly inaccurate in comparison.

gas_prices_tsibble %>%
  model(RW(Price ~ drift())) %>%
  forecast(h = 2) %>%
  hilo(level = 80)
## # A tsibble: 2 x 5 [1W]
## # Key:       .model [1]
##   .model                  Week          Price .mean                  `80%`
##   <chr>                 <week>         <dist> <dbl>                 <hilo>
## 1 RW(Price ~ drift()) 2022 W41 N(3.9, 0.0031)  3.91 [3.839329, 3.982253]80
## 2 RW(Price ~ drift()) 2022 W42 N(3.9, 0.0062)  3.91 [3.811444, 4.013720]80