library(tidyquant)
library(tidyverse)
library(fpp3)
library(moments)
library(tsibble)
library(tsibbledata)
library(ggfortify)
library(ggplot2)
library(readxl)
library(dplyr)
library(fabletools)
library(fable)
library(slider)
library(zoo)
library(latex2exp)
library(seasonal)
gas_prices <- read_excel("/Users/Peter Cook/Documents/Economics and Finance/Business Forecasting/Forecasting Data/gas_2010.xlsx")
gas_prices_tsibble <- gas_prices %>%
mutate(Week = yearweek(Week)) %>%
as_tsibble(index = Week)
gas_prices_tsibble %>%
autoplot() +
labs(title = "Weekly Gas Prices ($USD)")
## Plot variable not specified, automatically selected `.vars = Price`
gas_prices_tsibble %>%
autoplot(log(Price)) +
labs(title = "Weekly Gas Prices Logged ($USD)")
#Logging the data does appear to have too much of an impact.
gas_prices_tsibble %>%
model(STL(Price ~ trend(window=5) + season(window="periodic"), robust = TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL Decomposition of Weekly Gas Prices ($USD)")
#The decomposition reveals that the data likely isn’t white noise based on the remainder. However, there is clear seasonality and a significant upwards trend beginning in 2020, likely due to COVID lockdowns and further exaggerated by the Russian invasion of Ukraine.
lambda <- gas_prices_tsibble %>%
features(Price, features = guerrero) %>%
pull(lambda_guerrero)
gas_prices_tsibble %>%
autoplot(box_cox(Price, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Weekly Gas Prices ($USD) with $\\lambda$ = ",
round(lambda,2))))
#The box cox transformation did not appear to have muhc of an impact on the data.
gas_prices_tsibble %>%
model(MEAN(Price)) %>%
forecast(h=2) %>%
autoplot(gas_prices_tsibble) +
labs(title = "Mean Forecast of Gas Prices ($USD) in 2 Weeks")
#Tried forecasting prices using the MEAN method. I’m going to immediately throw out this forecast as horribly unreliable. The prediction interval is way too huge.
gas_prices_tsibble %>%
model(NAIVE(Price)) %>%
forecast(h=2) %>%
autoplot(gas_prices_tsibble) +
labs(title = "NAIVE Forecast of Gas Prices ($USD) in 2 Weeks")
#Tried running a NAIVE forecast of gas prices in 2 weeks. This looks a lot better than the MEAN method. I would say the NAIVE method is most accurate because of the way gas prices tend to behave. They (normally) don’t rise or fall by much more than a few cents at a time, so basing the forecast on the last observation makes the most sense to me.
gas_prices_tsibble %>%
model(RW(Price ~ drift())) %>%
forecast(h=2) %>%
autoplot(gas_prices_tsibble) +
labs(title = "DRIFT Forecast of Gas Prices ($USD) in 2 Weeks")
#Tried forecasting gas prices using the drift method. While I think this forecast is better than the MEAN method, I would guess NAIVE is better for this data.
gas_prices_models <- gas_prices_tsibble %>%
model(MEAN(Price),
NAIVE(Price),
RW(Price ~ drift()))
accuracy(gas_prices_models)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Trai… 5.26e-17 0.616 0.522 -4.17 17.9 1.10 1.00 0.994
## 2 NAIVE(Price) Trai… 1.79e- 3 0.0557 0.0395 0.0385 1.32 0.0834 0.0905 0.589
## 3 RW(Price ~ dri… Trai… 1.01e-16 0.0557 0.0395 -0.0234 1.32 0.0835 0.0905 0.589
n<-nrow(gas_prices_tsibble)
cutoff<-round(0.7*n)
gas_train <- gas_prices_tsibble %>%
slice(1:cutoff)
gas_train_models <- gas_train %>%
model(MEAN(Price),
NAIVE(Price),
RW(Price ~ drift()))
accuracy(gas_train_models)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Trai… -1.20e-16 0.555 0.492 -3.56 17.0 1.25 1.10 0.995
## 2 NAIVE(Price) Trai… -3.74e- 4 0.0499 0.0379 -0.0288 1.28 0.0963 0.0987 0.552
## 3 RW(Price ~ dr… Trai… -1.24e-16 0.0499 0.0379 -0.0161 1.28 0.0962 0.0987 0.552
forecast_models <- forecast(gas_train_models, h = 2)
forecast_models %>%
accuracy(gas_prices_tsibble)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Test -0.544 0.545 0.544 -21.9 21.9 1.38 1.08 -0.5
## 2 NAIVE(Price) Test -0.0585 0.0638 0.0585 -2.36 2.36 0.149 0.126 -0.5
## 3 RW(Price ~ drift()) Test -0.0579 0.0632 0.0579 -2.34 2.34 0.147 0.125 -0.5
#Based on the cross validation, the drift model is the most accurate, being just a tiny bit more accurate than the NAIVE model, based on the lower RMSE (the difference so tiny that it hardly makes a difference). As stated above, the cross validation reveals that the MEAN method is wildly inaccurate in comparison.
gas_prices_tsibble %>%
model(RW(Price ~ drift())) %>%
forecast(h = 2) %>%
hilo(level = 80)
## # A tsibble: 2 x 5 [1W]
## # Key: .model [1]
## .model Week Price .mean `80%`
## <chr> <week> <dist> <dbl> <hilo>
## 1 RW(Price ~ drift()) 2022 W41 N(3.9, 0.0031) 3.91 [3.839329, 3.982253]80
## 2 RW(Price ~ drift()) 2022 W42 N(3.9, 0.0062) 3.91 [3.811444, 4.013720]80