Set-up
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.1.3
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.8 v tsibble 1.1.3
## v dplyr 1.0.7 v tsibbledata 0.4.1
## v tidyr 1.1.4 v feasts 0.3.0
## v lubridate 1.8.0 v fable 0.3.2
## v ggplot2 3.3.5
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tsibble' was built under R version 4.1.3
## Warning: package 'tsibbledata' was built under R version 4.1.3
## Warning: package 'feasts' was built under R version 4.1.3
## Warning: package 'fabletools' was built under R version 4.1.3
## Warning: package 'fable' was built under R version 4.1.3
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
# Paths
projpath <- "C:/Users/dancu/Documents/Fall2022_ADEC743002"
rawdata <- file.path(projpath, "RawData")
Data: US Retail Employment with Consumer Sentiment
# Read in consumer sentiment index from St. Louis FRED
cons<-read.csv(file.path(rawdata, "UMCSENT.csv"))
names(cons)[names(cons) == "UMCSENT"] <- "cs"
cons$cs<-as.numeric(cons$cs)
## Warning: NAs introduced by coercion
consts <- cons %>%
mutate(Month = yearmonth(DATE)) %>%
select(-DATE) %>%
as_tsibble(index = Month)
# Start at 1978 where consistent consumer sentiment begins
consts<-consts %>% filter(year(Month)>=1978)
us_retail<-us_employment %>% filter(Title=="Retail Trade") %>% filter(year(Month)>=1978)
# Merge data sets, create training set
us_retail_cs<-merge(us_retail, consts)
us_retail_cs<-us_retail_cs %>% as_tsibble(index = Month)
us_retail_cs
## # A tsibble: 501 x 5 [1M]
## Month Series_ID Title Employed cs
## <mth> <chr> <chr> <dbl> <dbl>
## 1 1978 Jan CEU4200000001 Retail Trade 9545 83.7
## 2 1978 Feb CEU4200000001 Retail Trade 9426. 84.3
## 3 1978 Mar CEU4200000001 Retail Trade 9521. 78.8
## 4 1978 Apr CEU4200000001 Retail Trade 9696. 81.6
## 5 1978 May CEU4200000001 Retail Trade 9827. 82.9
## 6 1978 Jun CEU4200000001 Retail Trade 9922. 80
## 7 1978 Jul CEU4200000001 Retail Trade 9919. 82.4
## 8 1978 Aug CEU4200000001 Retail Trade 9950. 78.4
## 9 1978 Sep CEU4200000001 Retail Trade 10019. 80.4
## 10 1978 Oct CEU4200000001 Retail Trade 10048. 79.3
## # ... with 491 more rows
autoplot(us_retail_cs, .vars=Employed)

train<-us_retail_cs[1:477,]
Consumer sentiment x variable with ARIMA errors
fit <- train %>%
model(ARIMA(Employed ~ cs))
report(fit)
## Series: Employed
## Model: LM w/ ARIMA(2,0,1)(0,1,2)[12] errors
##
## Coefficients:
## ar1 ar2 ma1 sma1 sma2 cs intercept
## 1.9136 -0.9177 -0.7782 -0.4028 -0.1242 0.6185 152.7061
## s.e. 0.0356 0.0352 0.0571 0.0489 0.0477 0.4073 47.6107
##
## sigma^2 estimated as 1631: log likelihood=-2379.18
## AIC=4774.36 AICc=4774.67 BIC=4807.49
us_future <- new_data(train, 24) %>%
mutate(cs = mean(train[466:477,]$cs))
fc1<-forecast(fit, new_data = us_future)
fc1 %>%
autoplot(us_retail_cs) +
labs(y = "retail employed")

augment(fit) %>% features(.innov, ljung_box, lag=24)
## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(Employed ~ cs) 33.5 0.0941
# passes, but not by a lot
fc1
## # A fable: 24 x 5 [1M]
## # Key: .model [1]
## .model Month Employed .mean cs
## <chr> <mth> <dist> <dbl> <dbl>
## 1 ARIMA(Employed ~ cs) 2017 Oct N(15822, 1631) 15822. 95.4
## 2 ARIMA(Employed ~ cs) 2017 Nov N(16188, 3733) 16188. 95.4
## 3 ARIMA(Employed ~ cs) 2017 Dec N(16314, 6301) 16314. 95.4
## 4 ARIMA(Employed ~ cs) 2018 Jan N(15734, 9316) 15734. 95.4
## 5 ARIMA(Employed ~ cs) 2018 Feb N(15521, 12746) 15521. 95.4
## 6 ARIMA(Employed ~ cs) 2018 Mar N(15548, 16549) 15548. 95.4
## 7 ARIMA(Employed ~ cs) 2018 Apr N(15610, 20681) 15610. 95.4
## 8 ARIMA(Employed ~ cs) 2018 May N(15695, 25090) 15695. 95.4
## 9 ARIMA(Employed ~ cs) 2018 Jun N(15782, 29725) 15782. 95.4
## 10 ARIMA(Employed ~ cs) 2018 Jul N(15791, 34533) 15791. 95.4
## # ... with 14 more rows
accuracy(fc1, us_retail_cs)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(Employed ~ cs) Test 9.20 66.1 54.2 0.0565 0.342 0.214 0.217 0.589
Try same ARIMA parameters in a model without cs predictor
train %>% gg_tsdisplay(difference(Employed), plot_type='partial')
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

# ACF plot does suggest seasonal MA(q=2)
# a case can be made for AR(p=2), because PACF lag 2 has big spike
fit2 <- train %>%
model(ARIMA(Employed ~ pdq(2,0,1)+PDQ(0,1,2)))
report(fit2)
## Series: Employed
## Model: ARIMA(2,0,1)(0,1,2)[12] w/ drift
##
## Coefficients:
## ar1 ar2 ma1 sma1 sma2 constant
## 1.9160 -0.920 -0.7829 -0.4093 -0.119 0.6154
## s.e. 0.0344 0.034 0.0553 0.0485 0.047 0.1911
##
## sigma^2 estimated as 1635: log likelihood=-2380.32
## AIC=4774.65 AICc=4774.89 BIC=4803.64
fc2<-forecast(fit2, new_data = us_future)
fc2 %>%
autoplot(us_retail_cs) +
labs(y = "retail employed")

augment(fit2) %>% features(.innov, ljung_box, lag=24)
## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(Employed ~ pdq(2, 0, 1) + PDQ(0, 1, 2)) 36.0 0.0544
# close call
accuracy(fc2, us_retail_cs)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(Employed ~ pdq(2~ Test 7.29 65.3 53.7 0.0445 0.339 0.212 0.215 0.588
# The ARIMA without the external predictor is actually a little better
Try lagged consumer sentiment by 1 month with ARIMA errors
train2 <- train %>% mutate(Employed = c(NA, Employed[2:477]))
fit3 <- train2 %>%
model(ARIMA(Employed ~ pdq(d = 0) + cs + lag(cs)))
report(fit3)
## Series: Employed
## Model: LM w/ ARIMA(4,0,0)(2,1,0)[12] errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 sar1 sar2 cs lag(cs)
## 1.1827 -0.0373 -0.0259 -0.1291 -0.3448 -0.2199 0.8176 0.2752
## s.e. 0.0464 0.0721 0.0726 0.0463 0.0468 0.0459 0.3995 0.3988
##
## sigma^2 estimated as 1729: log likelihood=-2387.23
## AIC=4792.45 AICc=4792.85 BIC=4829.73
fc3<-forecast(fit3, new_data = us_future)
fc3 %>%
autoplot(us_retail_cs) +
labs(y = "retail employed")

fc3
## # A fable: 24 x 5 [1M]
## # Key: .model [1]
## .model Month Employed .mean cs
## <chr> <mth> <dist> <dbl> <dbl>
## 1 ARIMA(Employed ~ pdq(d = 0) + cs + lag~ 2017 Oct N(15829, 1729) 15829. 95.4
## 2 ARIMA(Employed ~ pdq(d = 0) + cs + lag~ 2017 Nov N(16193, 4146) 16193. 95.4
## 3 ARIMA(Employed ~ pdq(d = 0) + cs + lag~ 2017 Dec N(16319, 7350) 16319. 95.4
## 4 ARIMA(Employed ~ pdq(d = 0) + cs + lag~ 2018 Jan N(15746, 11450) 15746. 95.4
## 5 ARIMA(Employed ~ pdq(d = 0) + cs + lag~ 2018 Feb N(15540, 15935) 15540. 95.4
## 6 ARIMA(Employed ~ pdq(d = 0) + cs + lag~ 2018 Mar N(15569, 20695) 15569. 95.4
## 7 ARIMA(Employed ~ pdq(d = 0) + cs + lag~ 2018 Apr N(15630, 25613) 15630. 95.4
## 8 ARIMA(Employed ~ pdq(d = 0) + cs + lag~ 2018 May N(15717, 30565) 15717. 95.4
## 9 ARIMA(Employed ~ pdq(d = 0) + cs + lag~ 2018 Jun N(15804, 35488) 15804. 95.4
## 10 ARIMA(Employed ~ pdq(d = 0) + cs + lag~ 2018 Jul N(15813, 40336) 15813. 95.4
## # ... with 14 more rows
accuracy(fc3, us_retail_cs)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(Employed ~ pdq(d~ Test 8.44 48.9 41.1 0.0519 0.259 0.162 0.161 0.386
gg_tsresiduals(fit3)
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).

augment(fit3) %>% features(.innov, ljung_box, lag=24)
## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(Employed ~ pdq(d = 0) + cs + lag(cs)) 39.9 0.0218
# actually doesn't pass Ljung Box test
Try auto ARIMA
fit4 <- train %>%
model(ARIMA(Employed))
report(fit4)
## Series: Employed
## Model: ARIMA(1,1,1)(0,1,2)[12]
##
## Coefficients:
## ar1 ma1 sma1 sma2
## 0.8817 -0.7156 -0.4125 -0.1373
## s.e. 0.0490 0.0710 0.0476 0.0465
##
## sigma^2 estimated as 1664: log likelihood=-2378.54
## AIC=4767.08 AICc=4767.21 BIC=4787.78
fc4<-forecast(fit4, new_data = us_future)
fc4 %>%
autoplot(us_retail_cs) +
labs(y = "retail employed")

accuracy(fc4, us_retail_cs)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(Employed) Test 50.9 65.7 54.0 0.321 0.341 0.213 0.216 0.187
augment(fit4) %>% features(.innov, ljung_box, lag=24)
## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(Employed) 33.8 0.0891
# barely passes
Review some accuracy measures
c(ARIMAx = accuracy(fc1, us_retail_cs)["RMSE"], ARIMA = accuracy(fc2, us_retail_cs)["RMSE"], ARIMAxlag = accuracy(fc3, us_retail_cs)["RMSE"], ARIMAauto = accuracy(fc4, us_retail_cs)["RMSE"])
## $ARIMAx.RMSE
## [1] 66.11605
##
## $ARIMA.RMSE
## [1] 65.26866
##
## $ARIMAxlag.RMSE
## [1] 48.8649
##
## $ARIMAauto.RMSE
## [1] 65.73839
c(ARIMAx = accuracy(fc1, us_retail_cs)["MASE"], ARIMA = accuracy(fc2, us_retail_cs)["MASE"], ARIMAxlag = accuracy(fc3, us_retail_cs)["MASE"], ARIMAauto = accuracy(fc4, us_retail_cs)["MASE"])
## $ARIMAx.MASE
## [1] 0.2139277
##
## $ARIMA.MASE
## [1] 0.2120622
##
## $ARIMAxlag.MASE
## [1] 0.1621576
##
## $ARIMAauto.MASE
## [1] 0.2133636
model including lagged predictor is a close winner by these metrics