For this assignment, I used monthly data on Consumer Price Index for All Urban Consumers: New Vehicles in U.S. Cities from FRED. I trained ARIMA, ETS, Neural Network, and an ensemble model using data from 2011-2015 and tested it on data from 2016.

Load Data

setwd("~/Documents/MSAE/Predictive Analytics")
CPIdata <- read.csv("CPIdata.csv")

Creating Tsibble

CPIdata  <- CPIdata  %>% 
  mutate(Month = yearmonth(Month)) %>% 
  tsibble(index = Month)

Training Set & Test Set

training <- CPIdata %>% 
  filter(year(Month) < 2016 )

test <- CPIdata %>% 
  filter(year(Month) == 2016)

Time-Series Plot

training %>% autoplot() + 
  labs(title = "Monthly CPI data") +
  xlab("Month") +
  ylab("Consumer Price Index") 

Seasonal Plot

The CPI tends to be a little higher in the spring over the fall months, but there are not strong seasonal fluctuations throughout the year.

training %>% gg_season(CPI) + 
  labs(title = "Seasonality of CPI: New Vehicles in U.S City Average") +
  xlab("Month") +
  ylab("Consumer Price Index") 

Neural Net Models

neuralnet_model <- training %>% model(NNETAR(CPI))
report(neuralnet_model)
## Series: CPI 
## Model: NNAR(1,1,2)[12] 
## 
## Average of 20 networks, each of which is
## a 2-2-1 network with 9 weights
## options were - linear output units 
## 
## sigma^2 estimated as 0.06872
neuralnet_fcst <- neuralnet_model %>% forecast(test, times=100)

neuralnet_model %>% gg_tsresiduals(lag_max = 12) + labs(title = "Neural Net Model")

neuralnet_fcst %>% autoplot(test) + 
  labs(title = "Neural Net model (2 nodes)") +
  xlab("Month") +
  ylab("Consumer Price Index") 

nn_accuracy <- accuracy(neuralnet_fcst, test)


neuralnet_model2 <- training %>% model(NNETAR(CPI, n_nodes = 50))
report(neuralnet_model2)
## Series: CPI 
## Model: NNAR(1,1,50)[12] 
## 
## Average of 20 networks, each of which is
## a 2-50-1 network with 201 weights
## options were - linear output units 
## 
## sigma^2 estimated as 0.05282
neuralnet_fcst2 <- neuralnet_model2 %>% forecast(test, times=100)
nn_accuracy2 <- accuracy(neuralnet_fcst2, test)

neuralnet_fcst2 %>% autoplot(test) + 
  labs(title = "Neural Net model (50 nodes)") +
  xlab("Month") +
  ylab("Consumer Price Index") 

ARIMA

ARIMA_model <- training %>% 
   model(ARIMA(CPI))


ARIMA_fcst <- ARIMA_model %>% forecast(test)

ARIMA_plot <- ARIMA_model %>% forecast(test) %>% autoplot(test) + 
  labs(title = "ARIMA model") +
  xlab("Month") +
  ylab("Consumer Price Index") 

ARIMA_accuracy <- accuracy(ARIMA_fcst, test)

report(ARIMA_model)
## Series: CPI 
## Model: ARIMA(1,1,0)(1,1,0)[12] 
## 
## Coefficients:
##          ar1     sar1
##       0.2935  -0.4529
## s.e.  0.1494   0.1485
## 
## sigma^2 estimated as 0.131:  log likelihood=-18.33
## AIC=42.66   AICc=43.22   BIC=48.21
ARIMA_model %>% gg_tsresiduals(lag_max = 12) + labs(title = "ARIMA model")

ARIMA_plot

ETS model

ETS_model <- training %>% 
  model(ETS(CPI))

ETS_fcst <- ETS_model %>% forecast(test)

ETS_plot <- ETS_model %>% forecast(test) %>% autoplot(test) + 
  labs(title = "ETS model")+
  xlab("Month") +
  ylab("Consumer Price Index") 
ETS_accuracy <- accuracy(ETS_fcst, test)


report(ETS_model)
## Series: CPI 
## Model: ETS(A,Ad,A) 
##   Smoothing parameters:
##     alpha = 0.9990792 
##     beta  = 0.008088877 
##     gamma = 0.0001993413 
##     phi   = 0.9429169 
## 
##   Initial states:
##      l[0]      b[0]       s[0]      s[-1]      s[-2]      s[-3]      s[-4]
##  138.8518 0.5607148 -0.3583554 -0.3535446 -0.4886348 -0.5093881 -0.3833188
##       s[-5]     s[-6]     s[-7]     s[-8]    s[-9]    s[-10]     s[-11]
##  0.06774395 0.5289274 0.6036705 0.5601461 0.353354 0.2559085 -0.2765088
## 
##   sigma^2:  0.076
## 
##      AIC     AICc      BIC 
## 107.0460 123.7289 144.7442
ETS_model %>% gg_tsresiduals(lag_max = 12) + labs(title="ETS Model")

ETS_plot 

Ensemble Model

ensemble_model <- training %>%  model((ETS(CPI) + ARIMA(CPI) + NNETAR(CPI))/3)

ensemble_fcst <- ensemble_model %>% forecast(test, time = 100)


Ensemple_plot <- ensemble_model %>% forecast(test, time = 100) %>% autoplot(test) + 
  labs(title = "Ensemble model")+
  xlab("Month") +
  ylab("Consumer Price Index") 

ensemble_accuracy <- accuracy(ensemble_fcst, test)

report(ensemble_model)
## Series: CPI 
## Model: COMBINATION 
## Combination: CPI * 0.333333333333333
## 
## ====================================
## 
## Series: CPI 
## Model: COMBINATION 
## Combination: CPI + CPI
## 
## ======================
## 
## Series: CPI 
## Model: COMBINATION 
## Combination: CPI + CPI
## 
## ======================
## 
## Series: CPI 
## Model: ETS(A,Ad,A) 
##   Smoothing parameters:
##     alpha = 0.9990792 
##     beta  = 0.008088877 
##     gamma = 0.0001993413 
##     phi   = 0.9429169 
## 
##   Initial states:
##      l[0]      b[0]       s[0]      s[-1]      s[-2]      s[-3]      s[-4]
##  138.8518 0.5607148 -0.3583554 -0.3535446 -0.4886348 -0.5093881 -0.3833188
##       s[-5]     s[-6]     s[-7]     s[-8]    s[-9]    s[-10]     s[-11]
##  0.06774395 0.5289274 0.6036705 0.5601461 0.353354 0.2559085 -0.2765088
## 
##   sigma^2:  0.076
## 
##      AIC     AICc      BIC 
## 107.0460 123.7289 144.7442 
## 
## Series: CPI 
## Model: ARIMA(1,1,0)(1,1,0)[12] 
## 
## Coefficients:
##          ar1     sar1
##       0.2935  -0.4529
## s.e.  0.1494   0.1485
## 
## sigma^2 estimated as 0.131:  log likelihood=-18.33
## AIC=42.66   AICc=43.22   BIC=48.21
## 
## 
## Series: CPI 
## Model: NNAR(1,1,2)[12] 
## 
## Average of 20 networks, each of which is
## a 2-2-1 network with 9 weights
## options were - linear output units 
## 
## sigma^2 estimated as 0.06882

Plots

fit <- training %>% 
  model('ARIMA' = ARIMA(CPI),
        'ETS' = ETS(CPI),
        "Neural Network (2 nodes)" = NNETAR(CPI),
        "Neural Network (50 nodes)" = NNETAR(CPI, n_nodes = 50))
       

fcst <- fit %>% forecast(test, time =100)

fcst %>% 
  autoplot(test, level = NULL) +
  autolayer(CPIdata) +
  labs(title = "Model Forecasts of CPI data") +
  ylab("Consumer Price Index") 

Accuracy Metrics and Discussion

accuracy_metrics = rbind("ETS" = ETS_accuracy, 
                         "ARIMA" = ARIMA_accuracy,
                         "Neural Network" = nn_accuracy,
                         "Neural Network2" = nn_accuracy2,
                         "Ensemble" = ensemble_accuracy 
                         )
accuracy_metrics <- accuracy_metrics %>% arrange(RMSE)
accuracy_metrics
## # A tibble: 5 × 10
##   .model             .type       ME  RMSE   MAE      MPE  MAPE  MASE RMSSE  ACF1
## * <chr>              <chr>    <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(CPI)         Test   0.00233 0.386 0.318  8.91e-4 0.216   NaN   NaN 0.695
## 2 ETS(CPI)           Test   0.0975  0.409 0.336  6.55e-2 0.228   NaN   NaN 0.687
## 3 (ETS(CPI) + ARIMA… Test  -0.698   1.01  0.921 -4.75e-1 0.626   NaN   NaN 0.744
## 4 NNETAR(CPI, n_nod… Test  -1.45    1.68  1.51  -9.82e-1 1.02    NaN   NaN 0.612
## 5 NNETAR(CPI)        Test  -1.93    2.32  2.06  -1.31e+0 1.40    NaN   NaN 0.736

I used best fit ETS and ARIMA models using the fable package and they both performed significantly better than the Neural Network models, with ARIMA producing the lowest variance and bias across all accuracy metrics. I then used the fable package to create a best fit neural network model, which produced a NNAR(1,1,2)[12] model, averaging 20 networks. I compared this to a model which I manually increased the number of nodes to 50. Although the model with additional nodes produced a more accurate forecast, both neural network models over forecast the 12 months of the test set.