Forecast #3

#Installing packages

library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──

## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ dplyr       1.0.9     ✔ tsibbledata 0.4.0
## ✔ tidyr       1.2.0     ✔ feasts      0.2.2
## ✔ lubridate   1.8.0     ✔ fable       0.3.1
## ✔ ggplot2     3.3.6

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(readxl)
library(ggplot2)
library(tsibble)
library(lubridate)
library(tidyverse)

## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──

## ✔ readr   2.1.2     ✔ stringr 1.4.1
## ✔ purrr   0.3.4     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union()         masks lubridate::union(), base::union()

library(dplyr)
library(seasonal)

## 
## Attaching package: 'seasonal'
## 
## The following object is masked from 'package:tibble':
## 
##     view

library(zoo)

## 
## Attaching package: 'zoo'
## 
## The following object is masked from 'package:tsibble':
## 
##     index
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(tidyr)
library(tibble)
library(ggfortify)
library(moments)
library(fable)
library(brew)
library(sos)

## 
## Attaching package: 'sos'
## 
## The following object is masked from 'package:tidyr':
## 
##     matches
## 
## The following object is masked from 'package:dplyr':
## 
##     matches
## 
## The following object is masked from 'package:utils':
## 
##     ?

library(slider)
library(x13binary)
library(USgas)

Gas Prices Data and Plot

The data looks very similar to an equity price graph. There aren’t many significant seasonal or cyclical trends here which make the price movements seems random. There are four instances of extreme price movements: the drops in 2015, 2016, and 2020, and the spike between 2021 and 2022. In 2015 through 2016 crude oil prices fall due to over production. This supply increase decreased the cost of gas in these two years to record lows. The Covid pandemic in early 2020 reduced demand for gasoline which reduced the price. The Russian-Ukrainian war has reduced supply and increased political tension. This has dramaticall increase the cost of gas.

#import data
gasprice <- read_excel("C:/Users/natha/Desktop/Forecasting in R/Weekly_U.S._All_Grades_All_Formulations_Retail_Gasoline_Prices2010-2022.xlsx")

head(gasprice)

## # A tibble: 6 × 2
##   Date                Price
##   <dttm>              <dbl>
## 1 2022-10-03 00:00:00  3.91
## 2 2022-09-26 00:00:00  3.83
## 3 2022-09-19 00:00:00  3.77
## 4 2022-09-12 00:00:00  3.80
## 5 2022-09-05 00:00:00  3.86
## 6 2022-08-29 00:00:00  3.94

gas <- gasprice%>%
  mutate(Week = yearweek(Date)) %>%
  as_tsibble(index = Week)

autoplot(gas, Price) + labs(title = "Weekly US Prices", y = "Price (USD)", x = "Week")

Decompsition

The decompositions offer little to better explain the price movement in the graph. The STL and both classical decompositions show a potential cyclical trend as the trend graphs show an upward, downward, downward, upward pattern. More data would be needed to better show a pattern. 2x41 moving average was done in an attempt to smooth the curve further than the 2x4-MA.

#decomp STL 

gasstl <- gas %>%
  model(stl = STL(Price ~ season(window = "periodic"), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL Decomposition")
gasstl

#additive decomp
gasadd <- gas %>%
  model(classical_decomposition(Price, type = "additive")) %>%
  components() %>%
  autoplot() +
  labs(title = "Additive Decomposition")
gasadd

## Warning: Removed 26 row(s) containing missing values (geom_path).

#multipicative decomp
gasmulti <- gas %>%
  model(classical_decomposition(Price, type = "multiplicative")) %>%
  components() %>%
  autoplot() +
  labs(title = "Multiplicative Decomposition")
gasmulti

## Warning: Removed 26 row(s) containing missing values (geom_path).

#2x4 moving average
gas_ma1 <- gas %>%
  mutate(
    `4-MA` = slider::slide_dbl(Price, mean,
                               .before = 2, .after = 1 , .complete = TRUE),
    `2x4-MA` = slider::slide_dbl(`4-MA`, mean,
                                 .before = 1, .after = 0, .complete = TRUE)
  )
autoplot(gas_ma1, Price) + geom_line(aes(y = `2x4-MA`), colour = "#D55E00") +
  labs(y = "Price (USD)",
       title = "2x4 MA US Gas Prices") +
  guides(colour = guide_legend(title = "series"))

## Warning: Removed 4 row(s) containing missing values (geom_path).

gas_ma1

## # A tsibble: 666 x 5 [1W]
##    Date                Price     Week `4-MA` `2x4-MA`
##    <dttm>              <dbl>   <week>  <dbl>    <dbl>
##  1 2010-01-04 00:00:00  2.72 2010 W01  NA       NA   
##  2 2010-01-11 00:00:00  2.80 2010 W02  NA       NA   
##  3 2010-01-18 00:00:00  2.79 2010 W03   2.77    NA   
##  4 2010-01-25 00:00:00  2.76 2010 W04   2.77     2.77
##  5 2010-02-01 00:00:00  2.72 2010 W05   2.74     2.76
##  6 2010-02-08 00:00:00  2.71 2010 W06   2.71     2.73
##  7 2010-02-15 00:00:00  2.66 2010 W07   2.70     2.71
##  8 2010-02-22 00:00:00  2.71 2010 W08   2.71     2.70
##  9 2010-03-01 00:00:00  2.76 2010 W09   2.73     2.72
## 10 2010-03-08 00:00:00  2.80 2010 W10   2.78     2.76
## # … with 656 more rows

#2x41 moving average
gas_ma <- gas %>%
  mutate(
    `41-MA` = slider::slide_dbl(Price, mean,
                               .before = 20, .after = 20 , .complete = TRUE),
    `2x41-MA` = slider::slide_dbl(`41-MA`, mean,
                                 .before = 1, .after = 0, .complete = TRUE)
  )
autoplot(gas_ma, Price) + geom_line(aes(y = `2x41-MA`), colour = "#D55E00") +
  labs(y = "Price (USD)",
       title = "2x41 MA US Gas Prices") +
  guides(colour = guide_legend(title = "series"))

## Warning: Removed 41 row(s) containing missing values (geom_path).

Forecast

I used the naive, drift, and mean methods for forecasting the next two weeks of gas prices. Since the data doesn’t follow a seasonal trend, I did not forecast it using seasonal naive.

#setting training and test data
gas_train <- gas %>%
  filter(year(Week) < 2019)

gas_test <- gas %>%
  filter(year(Week) > 2018)
head(gas_train)

## # A tsibble: 6 x 3 [1W]
##   Date                Price     Week
##   <dttm>              <dbl>   <week>
## 1 2010-01-04 00:00:00  2.72 2010 W01
## 2 2010-01-11 00:00:00  2.80 2010 W02
## 3 2010-01-18 00:00:00  2.79 2010 W03
## 4 2010-01-25 00:00:00  2.76 2010 W04
## 5 2010-02-01 00:00:00  2.72 2010 W05
## 6 2010-02-08 00:00:00  2.71 2010 W06

head(gas_test)

## # A tsibble: 6 x 3 [1W]
##   Date                Price     Week
##   <dttm>              <dbl>   <week>
## 1 2018-12-31 00:00:00  2.36 2019 W01
## 2 2019-01-07 00:00:00  2.33 2019 W02
## 3 2019-01-14 00:00:00  2.34 2019 W03
## 4 2019-01-21 00:00:00  2.34 2019 W04
## 5 2019-01-28 00:00:00  2.34 2019 W05
## 6 2019-02-04 00:00:00  2.34 2019 W06

recentdata <- gas %>%
  filter(year(Week) >= 2022)

#Forcast using naive method

gas_train_n <- gas_train %>%
  model(NAIVE(Price)) 
gas_test_n <- gas_test %>%
  model(NAIVE(Price)) 

gas_test_n %>%
  forecast(h = "2 weeks") %>%
  autoplot(recentdata) +
  labs(title = "Naive", y = "Price (USD)", x = "Week")

#Drift Method

gas_train_d <- gas_train %>%
  model(RW(Price ~ drift())) 
gas_test_d <- gas_test %>%
  model(RW(Price ~ drift())) 

gas_test_d %>%
  forecast(h = "2 weeks") %>%
  autoplot(recentdata)+
  labs(title = "Drift Method", y = "Price (USD)", x = "Week")

#Mean Method

gas_train_m <- gas_train %>%
  model(MEAN(Price)) 
gas_test_m <- gas_test %>%
  model(MEAN(Price)) 

gas_test_m %>%
  forecast(h = "2 weeks") %>%
  autoplot(recentdata) +
  labs(title = "Mean Method", y = "Price (USD)", x = "Week")

#All together over test period

gasfc <- gas_train %>%
  model(
    Mean = MEAN(Price),
    Naive = NAIVE(Price),
    Drift = RW(Price ~ drift())) 

gasfc %>% 
  forecast(h = 196) %>% 
  autoplot(gas, level = NULL) +
  labs(title = "Testing US Gas Price Forecast", y = "Price (USD)", x = "Week") +
  guides(color = guide_legend(title = "Forecast"))

Accuracy test and interpretation

None of these forecast are acceptable in this stage because they all have autocorralation in their residual. The mean method produced the least accurate because every lag is correlated, the residual are not normally distributed, and the plot of the residuals is not white noise. The naive and drift method have identical ACF test, however, the drift method produced residuals with a distribution closer to normal. Out of the three, the drift method is the best.

#testing accuracy of all three methods
accuracy(gasfc)

## # A tibble: 3 × 10
##   .model .type           ME   RMSE    MAE     MPE  MAPE   MASE  RMSSE  ACF1
##   <chr>  <chr>        <dbl>  <dbl>  <dbl>   <dbl> <dbl>  <dbl>  <dbl> <dbl>
## 1 Mean   Training -2.37e-17 0.555  0.492  -3.57   17.0  1.26   1.10   0.994
## 2 Naive  Training -6.52e- 4 0.0499 0.0379 -0.0401  1.28 0.0968 0.0990 0.555
## 3 Drift  Training -6.93e-17 0.0498 0.0379 -0.0178  1.28 0.0968 0.0990 0.555

#residuals, ACF, and distrubutions of each three methods
gg_tsresiduals(gas_train_m) +
  labs(title = "Mean Residuals")

gg_tsresiduals(gas_train_d) +
  labs(title = "Drift Residuals")

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

gg_tsresiduals(gas_train_n) +
  labs(title = "Naive Residuals")

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

Time-Series Cross-Validation

The RMSE of the drift method is lower by .0003 which indicates that the drift method is more accurate than the naive. This is not what we saw from the residual test.

gas_cross <- gas %>%
  stretch_tsibble(.init = 5, .step = 1)

#drift 
gas_cross_d <- gas_cross %>%
  model(RW(Price ~ drift())) %>%
  forecast(h = "2 weeks") %>%
  accuracy(gas)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 2 observations are missing between 2022 W41 and 2022 W42

gas_cross_d

## # A tibble: 1 × 10
##   .model            .type       ME   RMSE    MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>             <chr>    <dbl>  <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 RW(Price ~ drift… Test  -9.38e-4 0.0809 0.0560 -0.0469  1.86 0.118 0.132 0.760

#mean
gas_cross_mean <- gas_cross %>%
  model(MEAN(Price)) %>%
  forecast(h = "2 weeks") %>%
  accuracy(gas)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 2 observations are missing between 2022 W41 and 2022 W42

gas_cross_mean

## # A tibble: 1 × 10
##   .model      .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>       <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Test  -0.105 0.620 0.494 -7.60  17.7  1.04  1.01 0.997

#naive
gas_cross_n <- gas_cross %>%
  model(NAIVE(Price)) %>%
  forecast(h = "2 weeks") %>%
  accuracy(gas)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 2 observations are missing between 2022 W41 and 2022 W42

gas_cross_n

## # A tibble: 1 × 10
##   .model       .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>        <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NAIVE(Price) Test  0.00266 0.0806 0.0558 0.0476  1.85 0.118 0.131 0.761

Point Forecast and PI

The point forecast is 3.909 with an 80% PI of [3.808, 4.01]

naivegasfc <- gas%>%
  model(NAIVE(Price)) %>%
  forecast(h = "2 weeks") %>%
  hilo()
naivegasfc

## # A tsibble: 2 x 6 [1W]
## # Key:       .model [1]
##   .model           Week          Price .mean                  `80%`
##   <chr>          <week>         <dist> <dbl>                 <hilo>
## 1 NAIVE(Price) 2022 W41 N(3.9, 0.0031)  3.91 [3.837609, 3.980391]80
## 2 NAIVE(Price) 2022 W42 N(3.9, 0.0062)  3.91 [3.808038, 4.009962]80
## # … with 1 more variable: `95%` <hilo>