library(readxl)
library(tidyquant)
library(fpp3)
library(moments)
library(tsibble)
library(tsibbledata)
library(ggplot2)
library(dplyr)
library(seasonal)
library(seasonalview)
library(fabletools)
library(fable)
library(ggfortify)
library(quantmod)
gas_data <- read_excel("C:/Users/harle/OneDrive/Desktop/Class Files/Fall 2022/Forecasting/Gasoline 2010 YTD.xlsx", 
                                col_types = c("date", "numeric"))

gas_tsibble <- gas_data %>%
  mutate(Date = yearweek(Date)) %>%
  tsibble(index = Date)


ggplot(gas_tsibble, mapping = aes(x = Date, y = Price)) +
  labs(title = "Gas Prices 2010 YTD", x = "Week") +
  geom_line()

#Decomposition
gas_dcmp <- gas_tsibble %>%
  model(stl = STL(Price))
components(gas_dcmp) %>%
  autoplot()

#From the decomposition, I can see a clear upward trend beginning around the end of 2020 with clear seasonality. It is concerning, however, that the remainder does not seem toindicate white noise.
#I tried transforming the data with a Box-Cox and by logging it, but none had a notable impact on the data.
gg_tsresiduals(gas_dcmp)

#From the gg_tsresiduals, I can see that the residuals are autocorrelated and are not distinguishable from white noise. 
#Forecasts
gas_models <- gas_tsibble %>%
  model(MEAN(Price),
        NAIVE(Price),
        RW(Price ~ drift()))

accuracy(gas_models)
## # A tibble: 3 × 10
##   .model          .type       ME   RMSE    MAE     MPE  MAPE   MASE  RMSSE  ACF1
##   <chr>           <chr>    <dbl>  <dbl>  <dbl>   <dbl> <dbl>  <dbl>  <dbl> <dbl>
## 1 MEAN(Price)     Trai… 5.26e-17 0.616  0.522  -4.17   17.9  1.10   1.00   0.994
## 2 NAIVE(Price)    Trai… 1.79e- 3 0.0557 0.0395  0.0385  1.32 0.0834 0.0905 0.589
## 3 RW(Price ~ dri… Trai… 1.01e-16 0.0557 0.0395 -0.0234  1.32 0.0835 0.0905 0.589
#Based on these results, it does not seem as if the NAIVE and RW methods are differentiable regarding accuracy. Both are equally valid, with the MEAN method being completely out. 

n<-nrow(gas_tsibble)
cutoff<-round(0.7*n)
train_gas <- gas_tsibble%>%
  slice(1:cutoff)

gas_models_train <- train_gas %>%
  model(MEAN(Price),
        NAIVE(Price),
        RW(Price ~ drift()))
accuracy(gas_models_train)
## # A tibble: 3 × 10
##   .model         .type        ME   RMSE    MAE     MPE  MAPE   MASE  RMSSE  ACF1
##   <chr>          <chr>     <dbl>  <dbl>  <dbl>   <dbl> <dbl>  <dbl>  <dbl> <dbl>
## 1 MEAN(Price)    Trai… -1.20e-16 0.555  0.492  -3.56   17.0  1.25   1.10   0.995
## 2 NAIVE(Price)   Trai… -3.74e- 4 0.0499 0.0379 -0.0288  1.28 0.0963 0.0987 0.552
## 3 RW(Price ~ dr… Trai… -1.24e-16 0.0499 0.0379 -0.0161  1.28 0.0962 0.0987 0.552
#Using training data, it does not seem as if there was a change between the first forecast and the training data forecast. The NAIVE and RW methods are still undifferentiable from each other regarding accuracy.

forecast_guess <- forecast(gas_models_train, h = 2)

forecast_guess %>%
  accuracy(gas_tsibble)
## # A tibble: 3 × 10
##   .model              .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>               <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price)         Test  -0.544  0.545  0.544  -21.9  21.9  1.38  1.08   -0.5
## 2 NAIVE(Price)        Test  -0.0585 0.0638 0.0585  -2.36  2.36 0.149 0.126  -0.5
## 3 RW(Price ~ drift()) Test  -0.0579 0.0632 0.0579  -2.34  2.34 0.147 0.125  -0.5
gas_tsibble %>%
  model(RW(Price ~ drift()))%>%
  forecast(h = 2) %>%
  hilo(level = 80)
## # A tsibble: 2 x 5 [1W]
## # Key:       .model [1]
##   .model                  Date          Price .mean                  `80%`
##   <chr>                 <week>         <dist> <dbl>                 <hilo>
## 1 RW(Price ~ drift()) 2022 W41 N(3.9, 0.0031)  3.91 [3.839329, 3.982253]80
## 2 RW(Price ~ drift()) 2022 W42 N(3.9, 0.0062)  3.91 [3.811444, 4.013720]80
#I decided to use the RW method for my forecast since it and the NAIVE method were equally accurate. The RW method should capture the average change seen in the historic data.
#My point forecast is $3.91/gallon for both of the next two weeks, with 80% prediction intervals of [3.839329, 3.982253] for W41 and [3.811444, 4.013720] for W42.