library(readxl)
library(tidyquant)
library(fpp3)
library(moments)
library(tsibble)
library(tsibbledata)
library(ggplot2)
library(dplyr)
library(seasonal)
library(seasonalview)
library(fabletools)
library(fable)
library(ggfortify)
library(quantmod)
gas_data <- read_excel("C:/Users/harle/OneDrive/Desktop/Class Files/Fall 2022/Forecasting/Gasoline 2010 YTD.xlsx",
col_types = c("date", "numeric"))
gas_tsibble <- gas_data %>%
mutate(Date = yearweek(Date)) %>%
tsibble(index = Date)
ggplot(gas_tsibble, mapping = aes(x = Date, y = Price)) +
labs(title = "Gas Prices 2010 YTD", x = "Week") +
geom_line()

#Decomposition
gas_dcmp <- gas_tsibble %>%
model(stl = STL(Price))
components(gas_dcmp) %>%
autoplot()

#From the decomposition, I can see a clear upward trend beginning around the end of 2020 with clear seasonality. It is concerning, however, that the remainder does not seem toindicate white noise.
#I tried transforming the data with a Box-Cox and by logging it, but none had a notable impact on the data.
gg_tsresiduals(gas_dcmp)

#From the gg_tsresiduals, I can see that the residuals are autocorrelated and are not distinguishable from white noise.
#Forecasts
gas_models <- gas_tsibble %>%
model(MEAN(Price),
NAIVE(Price),
RW(Price ~ drift()))
accuracy(gas_models)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Trai… 5.26e-17 0.616 0.522 -4.17 17.9 1.10 1.00 0.994
## 2 NAIVE(Price) Trai… 1.79e- 3 0.0557 0.0395 0.0385 1.32 0.0834 0.0905 0.589
## 3 RW(Price ~ dri… Trai… 1.01e-16 0.0557 0.0395 -0.0234 1.32 0.0835 0.0905 0.589
#Based on these results, it does not seem as if the NAIVE and RW methods are differentiable regarding accuracy. Both are equally valid, with the MEAN method being completely out.
n<-nrow(gas_tsibble)
cutoff<-round(0.7*n)
train_gas <- gas_tsibble%>%
slice(1:cutoff)
gas_models_train <- train_gas %>%
model(MEAN(Price),
NAIVE(Price),
RW(Price ~ drift()))
accuracy(gas_models_train)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Trai… -1.20e-16 0.555 0.492 -3.56 17.0 1.25 1.10 0.995
## 2 NAIVE(Price) Trai… -3.74e- 4 0.0499 0.0379 -0.0288 1.28 0.0963 0.0987 0.552
## 3 RW(Price ~ dr… Trai… -1.24e-16 0.0499 0.0379 -0.0161 1.28 0.0962 0.0987 0.552
#Using training data, it does not seem as if there was a change between the first forecast and the training data forecast. The NAIVE and RW methods are still undifferentiable from each other regarding accuracy.
forecast_guess <- forecast(gas_models_train, h = 2)
forecast_guess %>%
accuracy(gas_tsibble)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Test -0.544 0.545 0.544 -21.9 21.9 1.38 1.08 -0.5
## 2 NAIVE(Price) Test -0.0585 0.0638 0.0585 -2.36 2.36 0.149 0.126 -0.5
## 3 RW(Price ~ drift()) Test -0.0579 0.0632 0.0579 -2.34 2.34 0.147 0.125 -0.5
gas_tsibble %>%
model(RW(Price ~ drift()))%>%
forecast(h = 2) %>%
hilo(level = 80)
## # A tsibble: 2 x 5 [1W]
## # Key: .model [1]
## .model Date Price .mean `80%`
## <chr> <week> <dist> <dbl> <hilo>
## 1 RW(Price ~ drift()) 2022 W41 N(3.9, 0.0031) 3.91 [3.839329, 3.982253]80
## 2 RW(Price ~ drift()) 2022 W42 N(3.9, 0.0062) 3.91 [3.811444, 4.013720]80
#I decided to use the RW method for my forecast since it and the NAIVE method were equally accurate. The RW method should capture the average change seen in the historic data.
#My point forecast is $3.91/gallon for both of the next two weeks, with 80% prediction intervals of [3.839329, 3.982253] for W41 and [3.811444, 4.013720] for W42.