For this assignment, I used monthly data on Consumer Price Index for All Urban Consumers: New Vehicles in U.S. Cities from FRED. I trained ARIMA, ETS, Neural Network, and an ensemble model using data from 2011-2015 and tested it on data from 2016.
setwd("~/Documents/MSAE/Predictive Analytics")
CPIdata <- read.csv("CPIdata.csv")
CPIdata <- CPIdata %>%
mutate(Month = yearmonth(Month)) %>%
tsibble(index = Month)
training <- CPIdata %>%
filter(year(Month) < 2016 )
test <- CPIdata %>%
filter(year(Month) == 2016)
training %>% autoplot() +
labs(title = "Monthly CPI data") +
xlab("Month") +
ylab("Consumer Price Index")
The CPI tends to be a little higher in the spring over the fall months, but there are not strong seasonal fluctuations throughout the year.
training %>% gg_season(CPI) +
labs(title = "Seasonality of CPI: New Vehicles in U.S City Average") +
xlab("Month") +
ylab("Consumer Price Index")
neuralnet_model <- training %>% model(NNETAR(CPI))
report(neuralnet_model)
## Series: CPI
## Model: NNAR(1,1,2)[12]
##
## Average of 20 networks, each of which is
## a 2-2-1 network with 9 weights
## options were - linear output units
##
## sigma^2 estimated as 0.06872
neuralnet_fcst <- neuralnet_model %>% forecast(test, times=100)
neuralnet_model %>% gg_tsresiduals(lag_max = 12) + labs(title = "Neural Net Model")
neuralnet_fcst %>% autoplot(test) +
labs(title = "Neural Net model (2 nodes)") +
xlab("Month") +
ylab("Consumer Price Index")
nn_accuracy <- accuracy(neuralnet_fcst, test)
neuralnet_model2 <- training %>% model(NNETAR(CPI, n_nodes = 50))
report(neuralnet_model2)
## Series: CPI
## Model: NNAR(1,1,50)[12]
##
## Average of 20 networks, each of which is
## a 2-50-1 network with 201 weights
## options were - linear output units
##
## sigma^2 estimated as 0.05282
neuralnet_fcst2 <- neuralnet_model2 %>% forecast(test, times=100)
nn_accuracy2 <- accuracy(neuralnet_fcst2, test)
neuralnet_fcst2 %>% autoplot(test) +
labs(title = "Neural Net model (50 nodes)") +
xlab("Month") +
ylab("Consumer Price Index")
ARIMA_model <- training %>%
model(ARIMA(CPI))
ARIMA_fcst <- ARIMA_model %>% forecast(test)
ARIMA_plot <- ARIMA_model %>% forecast(test) %>% autoplot(test) +
labs(title = "ARIMA model") +
xlab("Month") +
ylab("Consumer Price Index")
ARIMA_accuracy <- accuracy(ARIMA_fcst, test)
report(ARIMA_model)
## Series: CPI
## Model: ARIMA(1,1,0)(1,1,0)[12]
##
## Coefficients:
## ar1 sar1
## 0.2935 -0.4529
## s.e. 0.1494 0.1485
##
## sigma^2 estimated as 0.131: log likelihood=-18.33
## AIC=42.66 AICc=43.22 BIC=48.21
ARIMA_model %>% gg_tsresiduals(lag_max = 12) + labs(title = "ARIMA model")
ARIMA_plot
ETS_model <- training %>%
model(ETS(CPI))
ETS_fcst <- ETS_model %>% forecast(test)
ETS_plot <- ETS_model %>% forecast(test) %>% autoplot(test) +
labs(title = "ETS model")+
xlab("Month") +
ylab("Consumer Price Index")
ETS_accuracy <- accuracy(ETS_fcst, test)
report(ETS_model)
## Series: CPI
## Model: ETS(A,Ad,A)
## Smoothing parameters:
## alpha = 0.9990792
## beta = 0.008088877
## gamma = 0.0001993413
## phi = 0.9429169
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4]
## 138.8518 0.5607148 -0.3583554 -0.3535446 -0.4886348 -0.5093881 -0.3833188
## s[-5] s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.06774395 0.5289274 0.6036705 0.5601461 0.353354 0.2559085 -0.2765088
##
## sigma^2: 0.076
##
## AIC AICc BIC
## 107.0460 123.7289 144.7442
ETS_model %>% gg_tsresiduals(lag_max = 12) + labs(title="ETS Model")
ETS_plot
ensemble_model <- training %>% model((ETS(CPI) + ARIMA(CPI) + NNETAR(CPI))/3)
ensemble_fcst <- ensemble_model %>% forecast(test, time = 100)
Ensemple_plot <- ensemble_model %>% forecast(test, time = 100) %>% autoplot(test) +
labs(title = "Ensemble model")+
xlab("Month") +
ylab("Consumer Price Index")
ensemble_accuracy <- accuracy(ensemble_fcst, test)
report(ensemble_model)
## Series: CPI
## Model: COMBINATION
## Combination: CPI * 0.333333333333333
##
## ====================================
##
## Series: CPI
## Model: COMBINATION
## Combination: CPI + CPI
##
## ======================
##
## Series: CPI
## Model: COMBINATION
## Combination: CPI + CPI
##
## ======================
##
## Series: CPI
## Model: ETS(A,Ad,A)
## Smoothing parameters:
## alpha = 0.9990792
## beta = 0.008088877
## gamma = 0.0001993413
## phi = 0.9429169
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4]
## 138.8518 0.5607148 -0.3583554 -0.3535446 -0.4886348 -0.5093881 -0.3833188
## s[-5] s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.06774395 0.5289274 0.6036705 0.5601461 0.353354 0.2559085 -0.2765088
##
## sigma^2: 0.076
##
## AIC AICc BIC
## 107.0460 123.7289 144.7442
##
## Series: CPI
## Model: ARIMA(1,1,0)(1,1,0)[12]
##
## Coefficients:
## ar1 sar1
## 0.2935 -0.4529
## s.e. 0.1494 0.1485
##
## sigma^2 estimated as 0.131: log likelihood=-18.33
## AIC=42.66 AICc=43.22 BIC=48.21
##
##
## Series: CPI
## Model: NNAR(1,1,2)[12]
##
## Average of 20 networks, each of which is
## a 2-2-1 network with 9 weights
## options were - linear output units
##
## sigma^2 estimated as 0.06882
fit <- training %>%
model('ARIMA' = ARIMA(CPI),
'ETS' = ETS(CPI),
"Neural Network (2 nodes)" = NNETAR(CPI),
"Neural Network (50 nodes)" = NNETAR(CPI, n_nodes = 50))
fcst <- fit %>% forecast(test, time =100)
fcst %>%
autoplot(test, level = NULL) +
autolayer(CPIdata) +
labs(title = "Model Forecasts of CPI data") +
ylab("Consumer Price Index")
accuracy_metrics = rbind("ETS" = ETS_accuracy,
"ARIMA" = ARIMA_accuracy,
"Neural Network" = nn_accuracy,
"Neural Network2" = nn_accuracy2,
"Ensemble" = ensemble_accuracy
)
accuracy_metrics <- accuracy_metrics %>% arrange(RMSE)
accuracy_metrics
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## * <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(CPI) Test 0.00233 0.386 0.318 8.91e-4 0.216 NaN NaN 0.695
## 2 ETS(CPI) Test 0.0975 0.409 0.336 6.55e-2 0.228 NaN NaN 0.687
## 3 (ETS(CPI) + ARIMA… Test -0.698 1.01 0.921 -4.75e-1 0.626 NaN NaN 0.744
## 4 NNETAR(CPI, n_nod… Test -1.45 1.68 1.51 -9.82e-1 1.02 NaN NaN 0.612
## 5 NNETAR(CPI) Test -1.93 2.32 2.06 -1.31e+0 1.40 NaN NaN 0.736
I used best fit ETS and ARIMA models using the fable package and they both performed significantly better than the Neural Network models, with ARIMA producing the lowest variance and bias across all accuracy metrics. I then used the fable package to create a best fit neural network model, which produced a NNAR(1,1,2)[12] model, averaging 20 networks. I compared this to a model which I manually increased the number of nodes to 50. Although the model with additional nodes produced a more accurate forecast, both neural network models over forecast the 12 months of the test set.