Forecast #3

Load libraries and dataset

library(readxl)
library(readr)
library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──

## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ dplyr       1.0.9     ✔ tsibbledata 0.4.0
## ✔ tidyr       1.2.0     ✔ feasts      0.2.2
## ✔ lubridate   1.8.0     ✔ fable       0.3.1
## ✔ ggplot2     3.3.6

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(tidyverse)

## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──

## ✔ purrr   0.3.4     ✔ forcats 0.5.1
## ✔ stringr 1.4.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union()         masks lubridate::union(), base::union()

library(seasonal)

## 
## Attaching package: 'seasonal'
## 
## The following object is masked from 'package:tibble':
## 
##     view

gas_p <- readr::read_csv("C:\\Users\\PythonAcct\\Documents\\R Scripts\\Forecast Exercises\\Forecast 3\\Weekly_U.S._All_Grades_All_Formulations_Retail_Gasoline_Prices.csv")

## Rows: 1539 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Week of
## dbl (1): Weekly U.S. All Grades All Formulations Retail Gasoline Prices Doll...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Make the tsibble

gastp <- gas_p %>% 
  mutate(Week=yearweek(`Week of`)) %>% 
  as_tsibble(index=Week) %>% 
  mutate(Price=`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`) %>% 
  select(Week,Price) %>% 
  filter(year(Week)>=2010)
gastp

## # A tsibble: 665 x 2 [1W]
##        Week Price
##      <week> <dbl>
##  1 2010 W01  2.72
##  2 2010 W02  2.80
##  3 2010 W03  2.79
##  4 2010 W04  2.76
##  5 2010 W05  2.72
##  6 2010 W06  2.71
##  7 2010 W07  2.66
##  8 2010 W08  2.71
##  9 2010 W09  2.76
## 10 2010 W10  2.80
## # … with 655 more rows
## # ℹ Use `print(n = ...)` to see more rows

Plot of raw data

gastp %>% 
  autoplot(Price)+
  labs(title="Gas Price")

Applying a box-cox transformation to the data

gastp %>% 
  features(Price, features = guerrero)

## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1          -0.342

gastp %>% 
  autoplot(box_cox(Price,-0.342))+
  labs(title="Transformed Gas Prices")

STL decomposition of the data

gastp %>% 
  model(STL(Price ~ trend(window=16) + season(window="periodic"), robust = TRUE)) %>%
  components() %>% autoplot() +
  labs(title = "STL decomposition: Gas")

A large drop happens around 2014 and 2015, which can be attributed to an increase in American production, OPEC refusing to cut production, and possibly Iran’s opening up to the world. There is a major drop in 2020, which can likely be attributed to COVID-19, where demand for gas dropped dramatically. Following this, there is a huge spike towards the end of the data. Pinpointing a reason for this is difficult, because economists tend to disagree on exactly what is happening. It could be the drastic increase in consumer demand, inflationary governmental actions, or supply chain issues.

This is a modified tsibble to make viewing the forecasts easier

gas_view <- gastp %>% 
  filter(year(Week)>=2021)

gas_seas <- gastp %>% 
  model(SNAIVE(Price))
gas_mean <- gastp %>% 
  model(MEAN(Price))
gas_drift <- gastp %>% 
  model(RW(Price~drift()))

seasf <- gas_seas %>% 
  forecast(h=2)
meanf <- gas_mean %>% 
  forecast(h=2)
driftf <- gas_drift %>% 
  forecast(h=2)

seasf

## # A fable: 2 x 4 [1W]
## # Key:     .model [1]
##   .model            Week        Price .mean
##   <chr>           <week>       <dist> <dbl>
## 1 SNAIVE(Price) 2022 W40 N(3.3, 0.38)  3.28
## 2 SNAIVE(Price) 2022 W41 N(3.4, 0.38)  3.36

meanf

## # A fable: 2 x 4 [1W]
## # Key:     .model [1]
##   .model          Week      Price .mean
##   <chr>         <week>     <dist> <dbl>
## 1 MEAN(Price) 2022 W40 N(3, 0.38)  3.01
## 2 MEAN(Price) 2022 W41 N(3, 0.38)  3.01

driftf

## # A fable: 2 x 4 [1W]
## # Key:     .model [1]
##   .model                  Week          Price .mean
##   <chr>                 <week>         <dist> <dbl>
## 1 RW(Price ~ drift()) 2022 W40 N(3.8, 0.0031)  3.83
## 2 RW(Price ~ drift()) 2022 W41 N(3.8, 0.0062)  3.84

seasf %>% 
  autoplot(gas_view)

meanf %>% 
  autoplot(gas_view)

driftf %>% 
  autoplot(gas_view)

gas_seas %>% 
  accuracy()

## # A tibble: 1 × 10
##   .model        .type        ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>         <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Price) Training 0.0980 0.615 0.473 0.717  16.0     1     1 0.993

gas_mean %>% 
  accuracy()

## # A tibble: 1 × 10
##   .model      .type           ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>       <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Training -2.16e-16 0.615 0.521 -4.17  17.9  1.10  1.00 0.994

gas_drift %>% 
  accuracy()

## # A tibble: 1 × 10
##   .model          .type       ME   RMSE    MAE     MPE  MAPE   MASE  RMSSE  ACF1
##   <chr>           <chr>    <dbl>  <dbl>  <dbl>   <dbl> <dbl>  <dbl>  <dbl> <dbl>
## 1 RW(Price ~ dri… Trai… 6.82e-17 0.0556 0.0395 -0.0224  1.32 0.0834 0.0904 0.589

The drift model seems the most accurate to me. I doubt gas prices will drop so drastically in the next two weeks. The mean and SNAIVE models both predict this to happen. The accuracy function reports the drift function as the most accurate by a bit.

gas_tr <- gastp %>%
  stretch_tsibble(.init = 5, .step = 1) %>% 
  relocate(Week, .id)
gas_tr %>%
  model(SNAIVE(Price)) %>%
  forecast(h = 1) %>%
  accuracy(gastp)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022 W40

## # A tibble: 1 × 10
##   .model        .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>         <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Price) Test  0.0980 0.615 0.473 0.717  16.0     1     1 0.993

gas_tr %>%
  model(MEAN(Price)) %>%
  forecast(h = 1) %>%
  accuracy(gastp)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022 W40

## # A tibble: 1 × 10
##   .model      .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>       <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Test  -0.107 0.618 0.492 -7.65  17.6  1.04  1.01 0.994

gas_tr %>%
  model(RW(Price ~ drift())) %>%
  forecast(h = 1) %>%
  accuracy(gastp)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022 W40

## # A tibble: 1 × 10
##   .model          .type       ME   RMSE    MAE     MPE  MAPE   MASE  RMSSE  ACF1
##   <chr>           <chr>    <dbl>  <dbl>  <dbl>   <dbl> <dbl>  <dbl>  <dbl> <dbl>
## 1 RW(Price ~ dri… Test  -7.08e-4 0.0558 0.0396 -0.0270  1.32 0.0837 0.0907 0.591

The cross validation method confirms that the drift method is the most accurate.

driftf

## # A fable: 2 x 4 [1W]
## # Key:     .model [1]
##   .model                  Week          Price .mean
##   <chr>                 <week>         <dist> <dbl>
## 1 RW(Price ~ drift()) 2022 W40 N(3.8, 0.0031)  3.83
## 2 RW(Price ~ drift()) 2022 W41 N(3.8, 0.0062)  3.84

hilo(driftf)

## # A tsibble: 2 x 6 [1W]
## # Key:       .model [1]
##   .model                  Week          Price .mean                  `80%`
##   <chr>                 <week>         <dist> <dbl>                 <hilo>
## 1 RW(Price ~ drift()) 2022 W40 N(3.8, 0.0031)  3.83 [3.762314, 3.905041]80
## 2 RW(Price ~ drift()) 2022 W41 N(3.8, 0.0062)  3.84 [3.734356, 3.936355]80
## # … with 1 more variable: `95%` <hilo>
## # ℹ Use `colnames()` to see all variable names