library(readxl)
library(readr)
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.9 ✔ tsibbledata 0.4.0
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ purrr 0.3.4 ✔ forcats 0.5.1
## ✔ stringr 1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union() masks lubridate::union(), base::union()
library(seasonal)
##
## Attaching package: 'seasonal'
##
## The following object is masked from 'package:tibble':
##
## view
gas_p <- readr::read_csv("C:\\Users\\PythonAcct\\Documents\\R Scripts\\Forecast Exercises\\Forecast 3\\Weekly_U.S._All_Grades_All_Formulations_Retail_Gasoline_Prices.csv")
## Rows: 1539 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Week of
## dbl (1): Weekly U.S. All Grades All Formulations Retail Gasoline Prices Doll...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gastp <- gas_p %>%
mutate(Week=yearweek(`Week of`)) %>%
as_tsibble(index=Week) %>%
mutate(Price=`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`) %>%
select(Week,Price) %>%
filter(year(Week)>=2010)
gastp
## # A tsibble: 665 x 2 [1W]
## Week Price
## <week> <dbl>
## 1 2010 W01 2.72
## 2 2010 W02 2.80
## 3 2010 W03 2.79
## 4 2010 W04 2.76
## 5 2010 W05 2.72
## 6 2010 W06 2.71
## 7 2010 W07 2.66
## 8 2010 W08 2.71
## 9 2010 W09 2.76
## 10 2010 W10 2.80
## # … with 655 more rows
## # ℹ Use `print(n = ...)` to see more rows
gastp %>%
autoplot(Price)+
labs(title="Gas Price")
gastp %>%
features(Price, features = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 -0.342
gastp %>%
autoplot(box_cox(Price,-0.342))+
labs(title="Transformed Gas Prices")
gastp %>%
model(STL(Price ~ trend(window=16) + season(window="periodic"), robust = TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL decomposition: Gas")
A large drop happens around 2014 and 2015, which can be attributed to an increase in American production, OPEC refusing to cut production, and possibly Iran’s opening up to the world. There is a major drop in 2020, which can likely be attributed to COVID-19, where demand for gas dropped dramatically. Following this, there is a huge spike towards the end of the data. Pinpointing a reason for this is difficult, because economists tend to disagree on exactly what is happening. It could be the drastic increase in consumer demand, inflationary governmental actions, or supply chain issues.
gas_view <- gastp %>%
filter(year(Week)>=2021)
gas_seas <- gastp %>%
model(SNAIVE(Price))
gas_mean <- gastp %>%
model(MEAN(Price))
gas_drift <- gastp %>%
model(RW(Price~drift()))
seasf <- gas_seas %>%
forecast(h=2)
meanf <- gas_mean %>%
forecast(h=2)
driftf <- gas_drift %>%
forecast(h=2)
seasf
## # A fable: 2 x 4 [1W]
## # Key: .model [1]
## .model Week Price .mean
## <chr> <week> <dist> <dbl>
## 1 SNAIVE(Price) 2022 W40 N(3.3, 0.38) 3.28
## 2 SNAIVE(Price) 2022 W41 N(3.4, 0.38) 3.36
meanf
## # A fable: 2 x 4 [1W]
## # Key: .model [1]
## .model Week Price .mean
## <chr> <week> <dist> <dbl>
## 1 MEAN(Price) 2022 W40 N(3, 0.38) 3.01
## 2 MEAN(Price) 2022 W41 N(3, 0.38) 3.01
driftf
## # A fable: 2 x 4 [1W]
## # Key: .model [1]
## .model Week Price .mean
## <chr> <week> <dist> <dbl>
## 1 RW(Price ~ drift()) 2022 W40 N(3.8, 0.0031) 3.83
## 2 RW(Price ~ drift()) 2022 W41 N(3.8, 0.0062) 3.84
seasf %>%
autoplot(gas_view)
meanf %>%
autoplot(gas_view)
driftf %>%
autoplot(gas_view)
gas_seas %>%
accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Price) Training 0.0980 0.615 0.473 0.717 16.0 1 1 0.993
gas_mean %>%
accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Training -2.16e-16 0.615 0.521 -4.17 17.9 1.10 1.00 0.994
gas_drift %>%
accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 RW(Price ~ dri… Trai… 6.82e-17 0.0556 0.0395 -0.0224 1.32 0.0834 0.0904 0.589
The drift model seems the most accurate to me. I doubt gas prices will drop so drastically in the next two weeks. The mean and SNAIVE models both predict this to happen. The accuracy function reports the drift function as the most accurate by a bit.
gas_tr <- gastp %>%
stretch_tsibble(.init = 5, .step = 1) %>%
relocate(Week, .id)
gas_tr %>%
model(SNAIVE(Price)) %>%
forecast(h = 1) %>%
accuracy(gastp)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022 W40
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Price) Test 0.0980 0.615 0.473 0.717 16.0 1 1 0.993
gas_tr %>%
model(MEAN(Price)) %>%
forecast(h = 1) %>%
accuracy(gastp)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022 W40
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Test -0.107 0.618 0.492 -7.65 17.6 1.04 1.01 0.994
gas_tr %>%
model(RW(Price ~ drift())) %>%
forecast(h = 1) %>%
accuracy(gastp)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022 W40
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 RW(Price ~ dri… Test -7.08e-4 0.0558 0.0396 -0.0270 1.32 0.0837 0.0907 0.591
The cross validation method confirms that the drift method is the most accurate.
driftf
## # A fable: 2 x 4 [1W]
## # Key: .model [1]
## .model Week Price .mean
## <chr> <week> <dist> <dbl>
## 1 RW(Price ~ drift()) 2022 W40 N(3.8, 0.0031) 3.83
## 2 RW(Price ~ drift()) 2022 W41 N(3.8, 0.0062) 3.84
hilo(driftf)
## # A tsibble: 2 x 6 [1W]
## # Key: .model [1]
## .model Week Price .mean `80%`
## <chr> <week> <dist> <dbl> <hilo>
## 1 RW(Price ~ drift()) 2022 W40 N(3.8, 0.0031) 3.83 [3.762314, 3.905041]80
## 2 RW(Price ~ drift()) 2022 W41 N(3.8, 0.0062) 3.84 [3.734356, 3.936355]80
## # … with 1 more variable: `95%` <hilo>
## # ℹ Use `colnames()` to see all variable names