Time Series Homework

Demetri Chokshi-Fox


library(readr)
library(forecast)
library(seasonal)
library(fpp2)
library(data.table)

Data is civilian unemployment rate not seasonally adjusted from St. Louis FRED

UNRATENSA_1_ <- read_csv("UNRATENSA (1).csv")
## Parsed with column specification:
## cols(
##   DATE = col_date(format = ""),
##   UNRATENSA = col_double()
## )
unempusa = ts(UNRATENSA_1_[,2], start=c(2008, 7), end=c(2019, 7), frequency=12)

Create training and test data

train = window(unempusa, end=c(2018, 7))
test = window(unempusa, start=c(2018, 7))
autoplot(unempusa)

### STL

stl = mstl(train)
autoplot(stl)

### clear seasonality, upwards trend until mid 2010 then downward trend.

stlfc = forecast(stl, h=12)
autoplot(stlfc) + autolayer(test)

stlacc = accuracy(stlfc, test)
stlacc
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.03098243 0.1508853 0.1211365 -0.4348986 1.819758 0.1409166
## Test set      0.12736847 0.1644506 0.1346907  3.3099326 3.497683 0.1566840
##                  ACF1 Theil's U
## Training set 0.127274        NA
## Test set     0.525586 0.4986203

ETS

ets = ets(train)
etsfc = forecast(ets, h=12)
autoplot(etsfc) + autolayer(test)

etsacc = accuracy(etsfc, test)
etsacc
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.02583196 0.1622114 0.1279553 -0.3483897 1.893679 0.1488487
## Test set      0.15964841 0.1911193 0.1641834  4.1843535 4.300635 0.1909924
##                   ACF1 Theil's U
## Training set 0.1782493        NA
## Test set     0.4565990 0.5847944

ARIMA

arima = auto.arima(train, seasonal=T)
## Warning in auto.arima(train, seasonal = T): Having 3 or more differencing
## operations is not recommended. Please consider reducing the total number of
## differences.
arimafc = forecast(arima, h=12)
autoplot(arimafc) + autolayer(test)

#ARIMA PIs grow with the length of forecast window
arimaacc = accuracy(arimafc, test)
arimaacc
##                      ME      RMSE       MAE        MPE      MAPE      MASE
## Training set 0.02960277 0.1683211 0.1359180  0.4428351  2.123404 0.1581116
## Test set     0.47315670 0.5253638 0.4731567 12.5806168 12.580617 0.5504171
##                     ACF1 Theil's U
## Training set -0.07717167        NA
## Test set      0.62205499  1.613715

Neural Nets

neuralnet = nnetar(train)
neuralnetfc = forecast(neuralnet, h=12)
autoplot(neuralnetfc) + autolayer(test)

neuralacc = accuracy(neuralnetfc, test)
neuralacc
##                         ME      RMSE       MAE        MPE     MAPE
## Training set -0.0006089933 0.2513527 0.1871644 -0.1723724 2.885508
## Test set     -0.1910771136 0.3262316 0.2764585 -5.6667909 7.655926
##                   MASE      ACF1 Theil's U
## Training set 0.2177260 0.2450840        NA
## Test set     0.3216006 0.3962918  0.993188

Model Accuracy Evaluation

accuracytable = data.frame(matrix(ncol = 8, nrow = 4))
colnames(accuracytable) = c("ME", "RMSE", "MAE", "MPE", "MAPE", "MASE", "ACF1", "Theil's U")
rownames(accuracytable) = c("ETS Test Set", "STL Test Set", "ARIMA Test Set", "Neural Nets Test Set")
accuracytable[1,] = etsacc[2,]
accuracytable[2,] = stlacc[2,]
accuracytable[3,] = arimaacc[2,]
accuracytable[4,] = neuralacc[2,]
accuracytable
##                              ME      RMSE       MAE       MPE      MAPE
## ETS Test Set          0.1596484 0.1911193 0.1641834  4.184354  4.300635
## STL Test Set          0.1273685 0.1644506 0.1346907  3.309933  3.497683
## ARIMA Test Set        0.4731567 0.5253638 0.4731567 12.580617 12.580617
## Neural Nets Test Set -0.1910771 0.3262316 0.2764585 -5.666791  7.655926
##                           MASE      ACF1 Theil's U
## ETS Test Set         0.1909924 0.4565990 0.5847944
## STL Test Set         0.1566840 0.5255860 0.4986203
## ARIMA Test Set       0.5504171 0.6220550 1.6137145
## Neural Nets Test Set 0.3216006 0.3962918 0.9931880
autoplot(unempusa)+
  autolayer(etsfc, series="ETS", PI=FALSE) +
  autolayer(stlfc, series="STL",PI=FALSE) +
  autolayer(arimafc, series="ARIMA", PI=FALSE) +
  autolayer(neuralnetfc, series="NEURAL NETS", PI=FALSE) +
  xlab("Year") +
  ylab("Unemployment") +
  ggtitle("Civilian Unemployment in the USA")+
  guides(colour=guide_legend(title="Forecast"))

STL model has the lowest MAE, MAPE, and RMSE, making it the most accurate model out of the four.