2018-2022 Data obtained from U.S. Census Bureau.

Load Liabilities

library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.2     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.2     ✔ fable       0.3.3
## ✔ ggplot2     3.4.2     ✔ fabletools  0.3.4
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

Get data

mydata <- read.csv('/Users/adrianjones/Documents/Forecasting/Week6/Assign6/Vehicles.csv')

library(zoo)
## 
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
## 
##     index
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
mydata$Period <- as.yearmon(mydata$Period, format = "%b-%Y")
mydata$Value <- gsub(",", "", mydata$Value)
mydata$Value <- as.numeric(mydata$Value)

Time series + train/test

myts <- mydata %>%
  mutate(Period = yearmonth(Period)) %>%
  as_tsibble(index = Period)

ggplot(myts, aes(x=Period, y=Value)) +
  geom_line(aes(y=Value)) +
  ggtitle("New orders of vehicles and parts, 2018-2022")

# establish train and test sets + knot
train <- myts[1:48,]
test <- myts[49:nrow(myts),]

Time Series Plots

myts%>%gg_season(Value)

myts%>%gg_subseries(Value)

myts%>%gg_lag(Value)

myts%>%ACF(Value)%>%autoplot()

Transformation

lambda <- myts |> features(Value, features = guerrero) |> pull(lambda_guerrero)
myts |> autoplot(box_cox(Value, lambda)) +
  labs(y = "",title = latex2exp::TeX(paste0(
    "Transformed New Orders of Vehicles and Parts (US) $\\lambda$ = ", round(lambda,2))))

# Decomposition
comp=myts%>%model(stl=STL(Value))%>%components()
comp|>as_tsibble() |>autoplot(Value)+  geom_line(aes(y=trend), colour = "red")+
  geom_line(aes(y=season_adjust), colour = "blue")

comp%>%autoplot()

Models (This takes a bit to run)

# ETS, ARIMA, Neural, + ensemble
m1 <- train |> model(ETS_Model = ETS(Value))
m2 <- train |> model(Arima_Model = ARIMA(Value))
m3 <- train |> model(NNETAR(box_cox(Value, .6)~trend(knots = yearmonth("2020 Apr"))+season()))
m4 <- train |> model(Ensemble = (ETS(Value)+ARIMA(Value)+(NNETAR(box_cox(Value, .6)~trend(knots = yearmonth("2020 Apr"))+season())/3)))

Forecast reports

for (i in 1:4){report(eval(parse(text=paste0('m',i))))}
## Series: Value 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9994048 
## 
##   Initial states:
##     l[0]
##  53972.4
## 
##   sigma^2:  31092211
## 
##      AIC     AICc      BIC 
## 1017.893 1018.439 1023.507 
## Series: Value 
## Model: ARIMA(0,0,2) w/ mean 
## 
## Coefficients:
##          ma1     ma2   constant
##       1.4356  0.8333  52717.041
## s.e.  0.1215  0.1510   1735.715
## 
## sigma^2 estimated as 1.5e+07:  log likelihood=-464.79
## AIC=937.58   AICc=938.51   BIC=945.07
## Series: Value 
## Model: NNAR(3,1,9)[12] 
## Transformation: box_cox(Value, 0.6) 
## 
## Average of 20 networks, each of which is
## a 17-9-1 network with 172 weights
## options were - linear output units 
## 
## sigma^2 estimated as 0.0006738
## Series: Value 
## Model: COMBINATION 
## Combination: Value + Value
## 
## ==========================
## 
## Series: Value 
## Model: COMBINATION 
## Combination: Value + Value
## 
## ==========================
## 
## Series: Value 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9994048 
## 
##   Initial states:
##     l[0]
##  53972.4
## 
##   sigma^2:  31092211
## 
##      AIC     AICc      BIC 
## 1017.893 1018.439 1023.507 
## 
## Series: Value 
## Model: ARIMA(0,0,2) w/ mean 
## 
## Coefficients:
##          ma1     ma2   constant
##       1.4356  0.8333  52717.041
## s.e.  0.1215  0.1510   1735.715
## 
## sigma^2 estimated as 1.5e+07:  log likelihood=-464.79
## AIC=937.58   AICc=938.51   BIC=945.07
## 
## 
## Series: Value 
## Model: COMBINATION 
## Transformation: box_cox(Value, 0.6) 
## Combination: Value * 0.333333333333333
## 
## ======================================
## 
## Series: Value 
## Model: NNAR(3,1,9)[12] 
## Transformation: box_cox(Value, 0.6) 
## 
## Average of 20 networks, each of which is
## a 17-9-1 network with 172 weights
## options were - linear output units 
## 
## sigma^2 estimated as 0.0006917

Plots

myplot <- function(model) {
  model |> forecast(test) |> autoplot(myts) +
    geom_line(aes(y = .fitted, col = .model), data = augment(model)) +
    ggtitle(names(model))
}

for (i in 1:4){print(myplot(eval(parse(text=paste0('m',i)))))}

## Warning: Removed 12 rows containing missing values (`geom_line()`).

## Warning: Removed 12 rows containing missing values (`geom_line()`).

Accuracy Metrics

tmp3=c(rep(0,10))
mymat=matrix(rep(0, 4*nrow(train)), ncol=4)

for (i in 1:4){
  tmp=eval(parse(text=paste0('m',i)))%>%forecast(test)
  tmp2=tmp%>%accuracy(test,measures = list(point_accuracy_measures))
  tmp3=rbind(tmp2, tmp3)
}

tmp3=tmp3[tmp3$RMSE>0,]
tmp3$MASE=tmp3$RMSSE=tmp3$.type=NULL
tmp3%>%arrange(RMSE)
## # A tibble: 4 × 7
##   .model                                   ME   RMSE    MAE     MPE   MAPE  ACF1
##   <chr>                                 <dbl>  <dbl>  <dbl>   <dbl>  <dbl> <dbl>
## 1 "ETS_Model"                          4.05e3 4.48e3 4.06e3  6.76e0 6.78e0 0.665
## 2 "Arima_Model"                        5.84e3 6.47e3 5.86e3  9.76e0 9.80e0 0.621
## 3 "NNETAR(box_cox(Value, 0.6) ~ tren…  1.15e4 1.41e4 1.15e4  1.94e1 1.94e1 0.570
## 4 "Ensemble"                          -4.63e6 4.76e6 4.63e6 -7.86e3 7.86e3 0.660