Libraries

require(fpp3)
## Loading required package: fpp3
## Warning: package 'fpp3' was built under R version 4.3.1
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.1     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.3     ✔ fable       0.3.3
## ✔ ggplot2     3.4.4     ✔ fabletools  0.3.4
## Warning: package 'lubridate' was built under R version 4.3.1
## Warning: package 'ggplot2' was built under R version 4.3.1
## Warning: package 'tsibble' was built under R version 4.3.1
## Warning: package 'tsibbledata' was built under R version 4.3.1
## Warning: package 'feasts' was built under R version 4.3.1
## Warning: package 'fabletools' was built under R version 4.3.1
## Warning: package 'fable' was built under R version 4.3.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
require(ggplot2)
require(tidyverse)
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## Warning: package 'stringr' was built under R version 4.3.1
## Warning: package 'forcats' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.4
## ✔ purrr   1.0.1     ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
require(magrittr)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
require(fable)
require(fabletools)


mydata=read.csv('C:/Users/lfult/Documents/_Courses/Boston College/Predictive analytics/crude.csv')
mydata$Year=seq(as.Date("2018/10/1"), as.Date("2023/9/1"), "months")

Time Series

myts=mydata%>%
  mutate(YearMonth = yearmonth(as.character(Year))) %>%
  as_tsibble(index = YearMonth)
myts%>%autoplot(Crude_Price)

train=myts[1:48,]
test=myts[49:60,]

Models

fit=train |>model(trend_model = TSLM(Crude_Price ~ trend()), 
                 mean_model=MEAN(Crude_Price), 
                 drift_model=RW(Crude_Price~drift()),
                 naive_model=NAIVE(Crude_Price),
                 snaive_model=SNAIVE(Crude_Price), 
                 ets_model=ETS(Crude_Price), 
                 arima_model=ARIMA(Crude_Price))

report(fit%>%dplyr::select(trend_model))
## Series: Crude_Price 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.17632 -0.29958 -0.02533  0.30430  1.38744 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.093180   0.165387   6.610 3.51e-08 ***
## trend()     0.035586   0.005876   6.056 2.39e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.564 on 46 degrees of freedom
## Multiple R-squared: 0.4436,  Adjusted R-squared: 0.4315
## F-statistic: 36.68 on 1 and 46 DF, p-value: 2.3865e-07
report(fit%>%dplyr::select(mean_model))
## Series: Crude_Price 
## Model: MEAN 
## 
## Mean: 1.965 
## sigma^2: 0.5595
report(fit%>%dplyr::select(drift_model))
## Series: Crude_Price 
## Model: RW w/ drift 
## 
## Drift: 0.0131 (se: 0.037)
## sigma^2: 0.0644
report(fit%>%dplyr::select(naive_model))
## Series: Crude_Price 
## Model: NAIVE 
## 
## sigma^2: 0.0644
report(fit%>%dplyr::select(snaive_model))
## Series: Crude_Price 
## Model: SNAIVE 
## 
## sigma^2: 0.7556
report(fit%>%dplyr::select(ets_model))
## Series: Crude_Price 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##      l[0]
##  1.968701
## 
##   sigma^2:  0.0646
## 
##      AIC     AICc      BIC 
## 58.29835 58.84381 63.91196
report(fit%>%dplyr::select(arima_model))
## Series: Crude_Price 
## Model: ARIMA(0,1,1) 
## 
## Coefficients:
##          ma1
##       0.5121
## s.e.  0.1453
## 
## sigma^2 estimated as 0.05183:  log likelihood=3.22
## AIC=-2.43   AICc=-2.16   BIC=1.27
fit|>forecast(h=12)
## # A fable: 84 x 4 [1M]
## # Key:     .model [7]
##    .model      YearMonth  Crude_Price .mean
##    <chr>           <mth>       <dist> <dbl>
##  1 trend_model  2022 Oct N(2.8, 0.35)  2.84
##  2 trend_model  2022 Nov N(2.9, 0.35)  2.87
##  3 trend_model  2022 Dec N(2.9, 0.35)  2.91
##  4 trend_model  2023 Jan N(2.9, 0.35)  2.94
##  5 trend_model  2023 Feb   N(3, 0.35)  2.98
##  6 trend_model  2023 Mar   N(3, 0.35)  3.01
##  7 trend_model  2023 Apr N(3.1, 0.36)  3.05
##  8 trend_model  2023 May N(3.1, 0.36)  3.09
##  9 trend_model  2023 Jun N(3.1, 0.36)  3.12
## 10 trend_model  2023 Jul N(3.2, 0.36)  3.16
## # ℹ 74 more rows

Plots

fit|>forecast(h=12)|>autoplot(train)+geom_line(aes(y=.fitted),data=augment(fit))

myf=fit%>%forecast(h=12)

Ensemble

f1= myf[myf$.model=='trend_model',]$.mean
f2= myf[myf$.model=='mean_model',]$.mean
f3= myf[myf$.model=='drift_model',]$.mean
f4= myf[myf$.model=='naive_model',]$.mean
f5= myf[myf$.model=='snaive_model',]$.mean
f6= myf[myf$.model=='ets_model',]$.mean
f7= myf[myf$.model=='arima_model',]$.mean
ens=(f6+f7)/2
plot(myts$Crude_Price~myts$Year, type='l')
lines(myts$Year[49:60], f1, col='blue')
lines(myts$Year[49:60], f2, col='red')
lines(myts$Year[49:60], f3, col='green')
lines(myts$Year[49:60], f4, col='orange')
lines(myts$Year[49:60], f5, col='purple')
lines(myts$Year[49:60], f6, col='gray')
lines(myts$Year[49:60], f7, col='brown')
lines(myts$Year[49:60], ens, col='yellow')

Accuracy Metrics

tmp=fabletools::accuracy(myf, test)
res=test$Crude_Price-ens
forMASE=myf[myf$.model=='snaive_model',]$.mean
tmp%>%add_row(.model='ensemble', .type='Test', ME=ME(res), RMSE=RMSE(res), MAE=MAE(res), MPE=MPE(res, test$Crude_Price), MAPE=MAPE(res, test$Crude_Price), MASE=MAE(res)/MAE(forMASE), RMSSE=RMSE(res)/RMSE(forMASE), ACF1=ACF1(res))
## # A tibble: 8 × 10
##   .model       .type      ME  RMSE   MAE    MPE  MAPE     MASE    RMSSE  ACF1
##   <chr>        <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl>    <dbl>    <dbl> <dbl>
## 1 arima_model  Test   0.202  0.272 0.223   7.07  7.96 NaN      NaN      0.244
## 2 drift_model  Test  -0.0325 0.185 0.159  -1.65  5.96 NaN      NaN      0.202
## 3 ets_model    Test   0.0524 0.189 0.158   1.49  5.79 NaN      NaN      0.244
## 4 mean_model   Test   0.729  0.752 0.729  26.7  26.7  NaN      NaN      0.244
## 5 naive_model  Test   0.0524 0.189 0.158   1.50  5.79 NaN      NaN      0.244
## 6 snaive_model Test  -0.282  0.677 0.538 -11.1  20.1  NaN      NaN      0.660
## 7 trend_model  Test  -0.338  0.396 0.368 -13.0  14.0  NaN      NaN      0.269
## 8 ensemble     Test   0.127  0.222 0.175   4.28  6.28   0.0589   0.0732 0.244