Homework 3: Time Series Data

#For this assignment, I decided to take a look at CPI over the last 30 years based on monthly CPI values where 100 is represented by the years 1980-1984. 

CPIAUCSL$observation_date = as.Date(CPIAUCSL$observation_date, "%Y-%m-%d")

## Warning in as.POSIXlt.POSIXct(x, tz = tz): unknown timezone '%Y-%m-%d'

ts_cpi_changes = ts(data = CPIAUCSL$CPIAUCSL, start = 1991, end = 2021, frequency = 12)

autoplot(ts_cpi_changes)

cpi_hw = hw(ts_cpi_changes,
                     seasonal = "multiplicative")
hw_damp_cpi <- hw(ts_cpi_changes,
                      seasonal = "multiplicative",
                      damped = TRUE)
autoplot(cpi_hw)

autoplot(hw_damp_cpi)

ts_cpi_train = window(ts_cpi_changes,
                          end = c(2006))
ts_cpi_test = window(ts_cpi_changes,
                         start = 1991)
# Holt-Winters' damped method.
damp_hw_cpi = hw(ts_cpi_train,
                            h = 180,
                            seasonal = "multiplicative",
                            damped = TRUE)
dam_hw_cpi_plot = autoplot(damp_hw_cpi)
dam_hw_cpi_plot

damp_check_cpi = accuracy(damp_hw_cpi, ts_cpi_test)

# try Holt-Winters' method.
hw_cpi = hw(ts_cpi_train,
                           h = 180,
                           seasonal = "multiplicative")
hw_cpi_plot = autoplot(hw_cpi)
hw_cpi_plot

hw_check_cpi = accuracy(hw_cpi, ts_cpi_test)

damp_check_cpi

##                       ME       RMSE        MAE        MPE      MAPE      MASE
## Training set  0.05726379  0.3925191  0.2662088 0.03294098 0.1526409 0.0608786
## Test set     23.98546177 28.1580287 23.9858390 9.80065621 9.8008430 5.4852595
##                   ACF1 Theil's U
## Training set 0.2070482        NA
## Test set     0.9721291  31.80726

hw_check_cpi

##                       ME      RMSE       MAE          MPE      MAPE       MASE
## Training set -0.01365886 0.5317786 0.3737332 -0.008831955 0.2157153 0.08546808
## Test set      8.52105684 9.7908169 8.5552362  3.509109100 3.5259759 1.95647486
##                   ACF1 Theil's U
## Training set 0.5741122        NA
## Test set     0.9338561  11.14658

stl_cpi_train = ts_cpi_train %>%
  stlm(
    s.window = 12,
    robust = TRUE,
    method = "ets",
    lambda = BoxCox.lambda(ts_cpi_train)
  ) %>%
  forecast(
    h = 180,
    lambda = BoxCox.lambda(ts_cpi_train)
    )

## Warning in InvBoxCox(fcast$mean, lambda, biasadj, fcast): biasadj information
## not found, defaulting to FALSE.

stl_cpi_plot = autoplot(stl_cpi_train)
stl_cpi_plot

accuracy(stl_cpi_train, ts_cpi_test)

##                         ME       RMSE        MAE           MPE       MAPE
## Training set  5.659981e-04  0.3830648  0.2510037   0.002650424  0.1433799
## Test set     -2.711643e+01 35.7643265 27.7342462 -10.896917589 11.1853786
##                    MASE      ACF1 Theil's U
## Training set 0.05740139 0.2232740        NA
## Test set     6.34247308 0.9875923  40.01661

arima_cpi =  forecast(auto.arima(ts_cpi_train), h=180)
arima_cpi_plot = autoplot(arima_cpi)
arima_cpi_plot

accuracy(arima_cpi)

##                       ME      RMSE       MAE          MPE      MAPE       MASE
## Training set 0.001161479 0.3392318 0.2210139 -0.001256395 0.1270708 0.05054311
##                     ACF1
## Training set 0.005858209

checkresiduals(damp_hw_cpi)

## 
##  Ljung-Box test
## 
## data:  Residuals from Damped Holt-Winters' multiplicative method
## Q* = 115.37, df = 7, p-value < 2.2e-16
## 
## Model df: 17.   Total lags used: 24

accuracy(damp_hw_cpi)

##                      ME      RMSE       MAE        MPE      MAPE      MASE
## Training set 0.05726379 0.3925191 0.2662088 0.03294098 0.1526409 0.0608786
##                   ACF1
## Training set 0.2070482

checkresiduals(hw_cpi)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 151.91, df = 8, p-value < 2.2e-16
## 
## Model df: 16.   Total lags used: 24

accuracy(hw_cpi)

##                       ME      RMSE       MAE          MPE      MAPE       MASE
## Training set -0.01365886 0.5317786 0.3737332 -0.008831955 0.2157153 0.08546808
##                   ACF1
## Training set 0.5741122

checkresiduals(stl_cpi_train)

## Warning in checkresiduals(stl_cpi_train): The fitted degrees of freedom is based
## on the model used for the seasonally adjusted data.

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ETS(M,A,N)
## Q* = 102.39, df = 20, p-value = 4.703e-13
## 
## Model df: 4.   Total lags used: 24

accuracy(stl_cpi_train)

##                        ME      RMSE       MAE         MPE      MAPE       MASE
## Training set 0.0005659981 0.3830648 0.2510037 0.002650424 0.1433799 0.05740139
##                  ACF1
## Training set 0.223274

checkresiduals(arima_cpi)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,4)(0,0,1)[12] with drift
## Q* = 30.588, df = 17, p-value = 0.02241
## 
## Model df: 7.   Total lags used: 24

accuracy(arima_cpi)

##                       ME      RMSE       MAE          MPE      MAPE       MASE
## Training set 0.001161479 0.3392318 0.2210139 -0.001256395 0.1270708 0.05054311
##                     ACF1
## Training set 0.005858209

#After looking at the residuals and the ME, RSME, and ACF of each of the created models, I narrowed it down to 2 contenders: The ARIMA model and the STL model

arima_layer = autoplot(ts_cpi_changes) + 
  autolayer(arima_cpi)
arima_layer

stl_layer = autoplot(ts_cpi_changes) + 
  autolayer(stl_cpi_train)
stl_layer

#After looking at the comparisons of each model against previous data, the ARIMA model clearly follows the historical CPI much more closely than the STL model, this is the one to use for producing CPI forecasts going forward.

Homework 3: Time Series Data

Dan McSwain

12/5/2021