Forecasting Work

library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──

## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ dplyr       1.0.9     ✔ tsibbledata 0.4.0
## ✔ tidyr       1.2.0     ✔ feasts      0.2.2
## ✔ lubridate   1.8.0     ✔ fable       0.3.1
## ✔ ggplot2     3.3.6

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(readxl)

I load the dataset here.

gd <- readxl::read_excel("C:\\Users\\PythonAcct\\Downloads\\GARQGSP.xls")
gdp <- gd %>% 
  mutate(Quarter=yearquarter(Date)) %>% 
  as_tsibble(index=Quarter)
gdp %>% 
  autoplot(RGDP)+labs(title="Real GDP of Georgia in millions of chained 2012 dollars",x="Quarter",y="RGDP (Millions)")

This is quarterly real GDP data. It might be seasonally adjusted. I have no idea.

un <- readxl::read_excel("C:\\Users\\PythonAcct\\Downloads\\GAURN (2).xls")
unr <- un %>% 
  mutate(Month=yearmonth(Month)) %>% 
  as_tsibble(index=Month)
unr %>% 
  autoplot(URN)+labs(title="Unemployment Rate in Georgia",x="Month",y="Unemployment Rate (%)")

This is monthly unemployment rate data. It is NOT seasonally adjusted.

GDP Section

gdp %>% 
  autoplot(RGDP)+labs(title="Real GDP of Georgia in millions of chained 2012 dollars",x="Quarter",y="RGDP (Millions)")

There is obviously a trend in this data. It is seasonally adjusted already. The only data I could find that isn’t seasonally adjusted is yearly data.

First, I will compare the four benchmark forecasts and a TSLM model.

gdp_train <- gdp %>% 
  filter(year(Quarter)<2020)
gdp_test <- gdp %>% 
  filter(year(Quarter)>2019)
gdpfit <- gdp_train %>% 
  model(
    Mean = MEAN(RGDP),
    Naive = NAIVE(RGDP),
    Seasonal_naive = SNAIVE(RGDP),
    Drift = RW(RGDP ~ drift()),
    tslm1=TSLM(RGDP ~ trend())
  )
accuracy(gdpfit)

## # A tibble: 5 × 10
##   .model         .type          ME   RMSE    MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>          <chr>       <dbl>  <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mean           Traini…  1.36e-11 39355. 33572. -0.651  6.94  2.85  2.87  0.946
## 2 Naive          Traini…  2.02e+ 3  4595.  3633.  0.399  0.763 0.308 0.335 0.261
## 3 Seasonal_naive Traini…  7.87e+ 3 13726. 11784.  1.53   2.44  1     1     0.907
## 4 Drift          Traini… -1.18e-11  4129.  2860. -0.0303 0.611 0.243 0.301 0.261
## 5 tslm1          Traini… -3.88e-12 21350. 19724. -0.191  4.23  1.67  1.56  0.949

As far as training set accuracy goes, the drift blows the others out of the water.

gdpfc <- gdpfit %>% 
  forecast(h=10)
accuracy(gdpfc,gdp_test)

## # A tibble: 5 × 10
##   .model         .type      ME   RMSE    MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>          <chr>   <dbl>  <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift          Test  -10310. 22398. 15412. -2.00    2.87   NaN   NaN 0.491
## 2 Mean           Test   89833. 93150. 89833. 15.8    15.8    NaN   NaN 0.618
## 3 Naive          Test     787. 24649. 20456. -0.0604  3.69   NaN   NaN 0.618
## 4 Seasonal_naive Test    6255. 26084. 22303.  0.911   3.99   NaN   NaN 0.577
## 5 tslm1          Test   23017. 30575. 28305.  3.93    4.98   NaN   NaN 0.500

The drift model also outperforms the others on predicting the test set.

gdp_cv <- gdp %>%
  stretch_tsibble(.init = 5, .step = 1) %>% 
  relocate(Quarter, .id)
gdp_cv %>%
  model(Mean = MEAN(RGDP),
    Naive = NAIVE(RGDP),
    Seasonal_naive = SNAIVE(RGDP),
    Drift = RW(RGDP ~ drift()),
    tslm1=TSLM(RGDP ~ trend()+season())) %>%
  forecast(h = 1) %>%
  accuracy(gdp)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022 Q4

## # A tibble: 5 × 10
##   .model         .type     ME   RMSE    MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>          <chr>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Drift          Test   1181.  8741.  4711. 0.216 0.945 0.334 0.482 -0.132
## 2 Mean           Test  34460. 52577. 39323. 6.29  7.42  2.79  2.90   0.946
## 3 Naive          Test   2142.  8894.  5231. 0.399 1.04  0.371 0.490 -0.114
## 4 Seasonal_naive Test   8457. 18257. 14208. 1.58  2.81  1.01  1.01   0.681
## 5 tslm1          Test  16348. 24805. 21141. 3.09  4.16  1.50  1.37   0.872

Cross-validation confirms that the drift model is the best one.

gdpdrift <- gdp %>% 
  model(Drift = RW(RGDP ~ drift()))
gg_tsresiduals(gdpdrift)

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

Now I will compare the drift model to some of the more advanced modeling methods.

My prediction for the ETS is additive trend, possibly multiplicative errors, and no seasonality. I’ll still compare it to a few others.

gdpetsfit <- gdp_train %>% 
  model(
  dampedmult=ETS(RGDP~error("M")+trend("Ad")+season("N")),
  regmult=ETS(RGDP~error("M")+trend("A")+season("N")),
  dampedadd=ETS(RGDP~error("A")+trend("Ad")+season("N")),
  regadd=ETS(RGDP~error("A")+trend("A")+season("N"))
  )
report(gdpetsfit)

## Warning in report.mdl_df(gdpetsfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 4 × 9
##   .model      sigma2 log_lik   AIC  AICc   BIC       MSE      AMSE        MAE
##   <chr>        <dbl>   <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>      <dbl>
## 1 dampedmult 8.09e-5   -621. 1255. 1256. 1267. 15645423. 35806540.    0.00636
## 2 regmult    7.75e-5   -621. 1251. 1252. 1262. 15279895. 36770386.    0.00621
## 3 dampedadd  1.72e+7   -620. 1252. 1254. 1265. 15739703. 35774390. 2959.     
## 4 regadd     1.72e+7   -621. 1251. 1252. 1262. 16028270. 36223950. 2949.

The AAN ETS was the best.

accuracy(gdpetsfit)

## # A tibble: 4 × 10
##   .model     .type       ME  RMSE   MAE      MPE  MAPE  MASE RMSSE     ACF1
##   <chr>      <chr>    <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 dampedmult Training 471.  3955. 2957.  0.0961  0.638 0.251 0.288 -0.0339 
## 2 regmult    Training -38.8 3909. 2892. -0.00917 0.624 0.245 0.285 -0.0175 
## 3 dampedadd  Training 416.  3967. 2959.  0.0857  0.639 0.251 0.289 -0.00880
## 4 regadd     Training 321.  4004. 2949.  0.0676  0.637 0.250 0.292 -0.00719

The MAN ETS method also has the best RMSE.

gdpetsfc <- gdpetsfit %>% 
  forecast(h=10)
accuracy(gdpetsfc,gdp_test)

## # A tibble: 4 × 10
##   .model     .type      ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr>   <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedadd  Test  -17764. 24855. 17764. -3.30  3.30   NaN   NaN 0.359
## 2 dampedmult Test  -16250. 24237. 16250. -3.04  3.04   NaN   NaN 0.395
## 3 regadd     Test  -21286. 26577. 21286. -3.91  3.91   NaN   NaN 0.246
## 4 regmult    Test  -20726. 26248. 20726. -3.81  3.81   NaN   NaN 0.264

The multiplicative damped was the best here.

gdpets_cv <- gdp %>%
  stretch_tsibble(.init = 10, .step = 1) %>% 
  relocate(Quarter, .id)
gdp_cv %>%
  model(dampedmult=ETS(RGDP~error("M")+trend("Ad")+season("N")),
  regmult=ETS(RGDP~error("M")+trend("A")+season("N")),
  dampedadd=ETS(RGDP~error("A")+trend("Ad")+season("N")),
  regadd=ETS(RGDP~error("A")+trend("A")+season("N"))) %>%
  forecast(h = 1) %>%
  accuracy(gdp)

## Warning: 2 errors (1 unique) encountered for dampedmult
## [2] Not enough data to estimate this ETS model.

## Warning: 1 error encountered for regmult
## [1] Not enough data to estimate this ETS model.

## Warning: 2 errors (1 unique) encountered for dampedadd
## [2] Not enough data to estimate this ETS model.

## Warning: 1 error encountered for regadd
## [1] Not enough data to estimate this ETS model.

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022 Q4

## # A tibble: 4 × 10
##   .model     .type    ME   RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>      <chr> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 dampedadd  Test  1296. 11817. 5583. 0.226  1.11 0.396 0.651 -0.0752
## 2 dampedmult Test  1323. 11291. 5567. 0.232  1.11 0.395 0.622 -0.0180
## 3 regadd     Test   929. 12023. 5470. 0.157  1.09 0.388 0.663 -0.143 
## 4 regmult    Test   780. 11296. 5309. 0.131  1.06 0.376 0.623 -0.130

Each method chose a different model. I will use the one with the lowest AICc and the lowest RMSE when cross-validating, with is AAN and MAdN

gdp %>% 
  gg_tsdisplay(difference(RGDP),plot_type="partial")

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

The first difference appears to be stationary.

gdp %>% 
  features(RGDP,unitroot_ndiffs)

## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      1

This confirms that one differences is necessary.

gdparfit <- gdp_train %>% 
  model(
    arima110=ARIMA(RGDP~pdq(1,1,0)),
    arima010=ARIMA(RGDP~pdq(0,1,0)),
    arima011=ARIMA(RGDP~pdq(0,1,1)),
    arima111=ARIMA(RGDP~pdq(1,1,1)),
    arima=ARIMA(RGDP),
    arima1=ARIMA(RGDP,stepwise = FALSE)
  )
report(gdparfit)

## Warning in report.mdl_df(gdparfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 6 × 8
##   .model      sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>        <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 arima110 15100791.   -570. 1148. 1149. 1156. <cpl [1]> <cpl [4]>
## 2 arima010 15251663.   -571. 1148. 1148. 1154. <cpl [4]> <cpl [0]>
## 3 arima011 15141770.   -570. 1148. 1149. 1157. <cpl [4]> <cpl [1]>
## 4 arima111 14801345.   -570. 1147. 1148. 1156. <cpl [1]> <cpl [5]>
## 5 arima    14801690.   -561. 1127. 1128. 1134. <cpl [0]> <cpl [5]>
## 6 arima1   14272983.   -559. 1126. 1127. 1134. <cpl [3]> <cpl [0]>

The automatic ARIMA with no stepwise returned the lowest AICc.

accuracy(gdparfit)

## # A tibble: 6 × 10
##   .model   .type        ME  RMSE   MAE     MPE  MAPE  MASE RMSSE     ACF1
##   <chr>    <chr>     <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 arima110 Training  -4.86 3754. 2554. -0.0185 0.544 0.217 0.274 -0.0134 
## 2 arima010 Training  13.1  3806. 2562. -0.0152 0.547 0.217 0.277  0.164  
## 3 arima011 Training   5.83 3759. 2571. -0.0148 0.548 0.218 0.274  0.00301
## 4 arima111 Training 531.   3717. 2741.  0.109  0.589 0.233 0.271 -0.0223 
## 5 arima    Training 156.   3717. 2679.  0.0330 0.577 0.227 0.271  0.00970
## 6 arima1   Training  45.4  3617. 2657.  0.0117 0.577 0.225 0.264  0.0337

Same for this one.

gdparfc <- gdparfit %>% 
  forecast(h=10)
accuracy(gdparfc,gdp_test)

## # A tibble: 6 × 10
##   .model   .type      ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>   <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima    Test  -22151. 27346. 22151. -4.07  4.07   NaN   NaN 0.244
## 2 arima010 Test  -14443. 23980. 15687. -2.73  2.94   NaN   NaN 0.446
## 3 arima011 Test  -13694. 23661. 15512. -2.59  2.90   NaN   NaN 0.454
## 4 arima1   Test  -22147. 27463. 22147. -4.07  4.07   NaN   NaN 0.250
## 5 arima110 Test  -13068. 23663. 15613. -2.49  2.92   NaN   NaN 0.473
## 6 arima111 Test  -15433. 24439. 16159. -2.90  3.02   NaN   NaN 0.436

The best one here was 011.

gdpar_cv <- gdp %>%
  stretch_tsibble(.init = 10, .step = 1) %>% 
  relocate(Quarter, .id)
gdp_cv %>%
  model(arima110=ARIMA(RGDP~pdq(1,1,0)),
    arima010=ARIMA(RGDP~pdq(0,1,0)),
    arima011=ARIMA(RGDP~pdq(0,1,1)),
    arima111=ARIMA(RGDP~pdq(1,1,1)),
    arima=ARIMA(RGDP),
    arima1=ARIMA(RGDP,stepwise = FALSE)) %>%
  forecast(h = 1) %>%
  accuracy(gdp)

## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning: 5 errors (1 unique) encountered for arima111
## [5] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022 Q4

## # A tibble: 6 × 10
##   .model   .type    ME   RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>    <chr> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 arima    Test  1118. 11475. 5600. 0.193 1.12  0.397 0.632 -0.0645
## 2 arima010 Test   891.  8928. 4648. 0.159 0.932 0.329 0.492 -0.107 
## 3 arima011 Test  1393. 10290. 5094. 0.251 1.01  0.361 0.567 -0.144 
## 4 arima1   Test  1248. 11436. 5451. 0.223 1.09  0.386 0.630 -0.0715
## 5 arima110 Test  1548. 11176. 5228. 0.279 1.04  0.371 0.616 -0.145 
## 6 arima111 Test  1772. 11506. 5196. 0.328 1.02  0.368 0.634 -0.187

The lowest one here with cross-validation was 010.

gdpfin_cv <- gdp %>%
  stretch_tsibble(.init = 10, .step = 1) %>% 
  relocate(Quarter, .id)
gdp_cv %>%
  model(
    arima010=ARIMA(RGDP~pdq(0,1,0)),
    arimacosn=ARIMA(RGDP~1+pdq(0,1,0)),
    dampedmult=ETS(RGDP~error("M")+trend("Ad")+season("N")),
    regadd=ETS(RGDP~error("A")+trend("A")+season("N")),
    Drift = RW(RGDP ~ drift())
    ) %>%
  forecast(h = 1) %>%
  accuracy(gdp)

## Warning: Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.

## Warning: 4 errors (1 unique) encountered for arimacosn
## [4] There are no ARIMA models to choose from after imposing the `order_constraint`, please consider allowing more models.

## Warning: 2 errors (1 unique) encountered for dampedmult
## [2] Not enough data to estimate this ETS model.

## Warning: 1 error encountered for regadd
## [1] Not enough data to estimate this ETS model.

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022 Q4

## # A tibble: 5 × 10
##   .model     .type    ME   RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>      <chr> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 arima010   Test   891.  8928. 4648. 0.159 0.932 0.329 0.492 -0.107 
## 2 arimacosn  Test  1438.  8735. 4369. 0.283 0.863 0.310 0.481 -0.195 
## 3 dampedmult Test  1323. 11291. 5567. 0.232 1.11  0.395 0.622 -0.0180
## 4 Drift      Test  1181.  8741. 4711. 0.216 0.945 0.334 0.482 -0.132 
## 5 regadd     Test   929. 12023. 5470. 0.157 1.09  0.388 0.663 -0.143

The ARIMA(0,1,0) with a constant is the best model, slightly outperforming the drift model.

gdpfinfit <- gdp %>% 
  model(arimacosn=ARIMA(RGDP~1+pdq(0,1,0)))
gdpfinfc <- gdpfinfit %>% 
  forecast(h=12)
gdpfinfc %>% 
  autoplot(gdp)

gg_tsresiduals(gdpfinfit)

These look amazing.

hilo(gdpfinfc) %>% 
  select(-`80%`)

## # A tsibble: 12 x 5 [1Q]
## # Key:       .model [1]
##    .model    Quarter               RGDP   .mean                  `95%`
##    <chr>       <qtr>             <dist>   <dbl>                 <hilo>
##  1 arimacosn 2022 Q4 N(593996, 7.2e+07) 593996. [577397.9, 610593.8]95
##  2 arimacosn 2023 Q1  N(6e+05, 1.4e+08) 596123. [572649.9, 619596.1]95
##  3 arimacosn 2023 Q2  N(6e+05, 2.2e+08) 598250. [569501.6, 626998.7]95
##  4 arimacosn 2023 Q3  N(6e+05, 2.9e+08) 600377. [567181.3, 633573.3]95
##  5 arimacosn 2023 Q4  N(6e+05, 3.6e+08) 602504. [565390.2, 639618.7]95
##  6 arimacosn 2024 Q1  N(6e+05, 4.3e+08) 604632. [563975.0, 645288.2]95
##  7 arimacosn 2024 Q2   N(606759, 5e+08) 606759. [562844.6, 650672.9]95
##  8 arimacosn 2024 Q3 N(608886, 5.7e+08) 608886. [561939.7, 655832.1]95
##  9 arimacosn 2024 Q4 N(611013, 6.5e+08) 611013. [561219.1, 660807.0]95
## 10 arimacosn 2025 Q1 N(613140, 7.2e+08) 613140. [560652.7, 665627.6]95
## 11 arimacosn 2025 Q2 N(615267, 7.9e+08) 615267. [560218.0, 670316.6]95
## 12 arimacosn 2025 Q3 N(617394, 8.6e+08) 617394. [559897.4, 674891.6]95

These are the forecasted values.

unr %>% 
  autoplot(URN)+labs(title="Unemployment Rate in Georgia",x="Month",y="Unemployment Rate (%)")

Extremely high seasonality. Something of a downward trend. Maybe a cyclical component. Not too much heteroskedasticity either.

First, I will compare the four benchmark forecasts and a TSLM model.

unr_train <- unr %>% 
  filter(year(Month)<2020)
unr_test <- unr %>% 
  filter(year(Month)>2019)
unrfit <- unr_train %>% 
  model(
    Mean = MEAN(URN),
    Naive = NAIVE(URN),
    Seasonal_naive = SNAIVE(URN),
    Drift = RW(URN ~ drift()),
    tslm1=TSLM(URN ~ trend()+season()),
    tslm2=TSLM(URN ~ trend() + fourier(K = 6))
  )
accuracy(unrfit)

## # A tibble: 6 × 10
##   .model         .type           ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <chr>          <chr>        <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Mean           Training -1.05e-16 1.77  1.35  -8.02  23.3  1.76  1.69  0.970 
## 2 Naive          Training -1.08e- 2 0.396 0.299 -0.417  5.14 0.390 0.378 0.0684
## 3 Seasonal_naive Training -1.07e- 1 1.05  0.767 -3.20  12.5  1     1     0.959 
## 4 Drift          Training  5.23e-17 0.396 0.299 -0.222  5.12 0.390 0.378 0.0684
## 5 tslm1          Training  7.72e-17 1.75  1.33  -7.77  22.8  1.74  1.67  0.986 
## 6 tslm2          Training -1.20e-16 1.75  1.33  -7.77  22.8  1.74  1.67  0.986

As far as training set accuracy goes, the drift slightly wins out.

unrfc <- unrfit %>% 
  forecast(h=34)
accuracy(unrfc,unr_test)

## # A tibble: 6 × 10
##   .model         .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>          <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift          Test   1.46   2.53  1.54  21.7   25.0   NaN   NaN 0.736
## 2 Mean           Test  -1.43   2.57  2.34 -52.3   61.8   NaN   NaN 0.752
## 3 Naive          Test   1.27   2.49  1.49  16.2   24.4   NaN   NaN 0.752
## 4 Seasonal_naive Test   0.944  2.37  1.43   8.27  24.5   NaN   NaN 0.729
## 5 tslm1          Test  -1.48   2.62  2.37 -53.4   62.6   NaN   NaN 0.739
## 6 tslm2          Test  -1.48   2.62  2.37 -53.4   62.6   NaN   NaN 0.739

The seasonal naive is the best on the test set.

unr_cv <- unr %>%
  stretch_tsibble(.init = 10, .step = 1) %>% 
  relocate(Month, .id)
unr_cv %>%
  model(
    Mean = MEAN(URN),
    Naive = NAIVE(URN),
    Seasonal_naive = SNAIVE(URN),
    Drift = RW(URN ~ drift()),
    tslm1=TSLM(URN ~ trend()+season()),
    tslm2=TSLM(URN ~ trend() + fourier(K = 6))
    ) %>%
  forecast(h = 1) %>%
  accuracy(unr)

## Warning: prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2023 Jan

## # A tibble: 6 × 10
##   .model         .type       ME  RMSE   MAE     MPE  MAPE  MASE RMSSE    ACF1
##   <chr>          <chr>    <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Drift          Test   0.0112  0.527 0.318  -0.134  5.50 0.365 0.407 -0.0255
## 2 Mean           Test  -0.310   1.86  1.44  -14.3   26.5  1.65  1.43   0.956 
## 3 Naive          Test  -0.00957 0.525 0.318  -0.524  5.52 0.365 0.406 -0.0258
## 4 Seasonal_naive Test  -0.114   1.29  0.871  -4.44  15.0  1     1      0.890 
## 5 tslm1          Test   0.370   1.87  1.33   -0.744 22.8  1.53  1.45   0.966 
## 6 tslm2          Test   0.336   1.96  1.37   -1.17  23.2  1.57  1.51   0.910

Cross-validation says the NAIVE is the best one. Weird.

Now I will compare the more advanced modeling methods.

My prediction for the ETS is additive trend, possibly multiplicative errors, and additive seasonality. I’ll still compare it to a few others.

unretsfit <- unr_train %>% 
  model(
  dampedmult=ETS(URN~error("M")+trend("Ad")+season("M")),
  regmult=ETS(URN~error("M")+trend("A")+season("M")),
  dampedadd=ETS(URN~error("M")+trend("Ad")+season("A")),
  regadd=ETS(URN~error("M")+trend("A")+season("A")),
  ets=ETS(URN)
  )
report(unretsfit)

## Warning in report.mdl_df(unretsfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 5 × 9
##   .model      sigma2 log_lik   AIC  AICc   BIC    MSE  AMSE    MAE
##   <chr>        <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>
## 1 dampedmult 0.00172   -891. 1818. 1819. 1895. 0.0576 0.120 0.0302
## 2 regmult    0.00176   -899. 1831. 1832. 1904. 0.0597 0.126 0.0309
## 3 dampedadd  0.00167   -883. 1802. 1803. 1879. 0.0542 0.113 0.0301
## 4 regadd     0.00173   -893. 1819. 1820. 1892. 0.0556 0.121 0.0304
## 5 ets        0.0552    -882. 1799. 1800. 1876. 0.0534 0.112 0.172

The automatic model was the best.

accuracy(unretsfit)

## # A tibble: 5 × 10
##   .model     .type           ME  RMSE   MAE     MPE  MAPE  MASE RMSSE   ACF1
##   <chr>      <chr>        <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 dampedmult Training -0.00421  0.240 0.176 -0.0977  3.00 0.229 0.229 0.0659
## 2 regmult    Training -0.0111   0.244 0.181 -0.250   3.07 0.235 0.233 0.159 
## 3 dampedadd  Training -0.00496  0.233 0.174 -0.0976  2.99 0.226 0.222 0.0647
## 4 regadd     Training  0.000554 0.236 0.175  0.0758  3.01 0.229 0.225 0.0768
## 5 ets        Training -0.00417  0.231 0.172 -0.0743  2.97 0.224 0.221 0.0364

Same here.

unretsfc <- unretsfit %>% 
  forecast(h=34)
accuracy(unretsfc,unr_test)

## # A tibble: 5 × 10
##   .model     .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedadd  Test   1.12  2.42  1.44  12.8  23.8   NaN   NaN 0.738
## 2 dampedmult Test   1.30  2.47  1.47  17.5  23.6   NaN   NaN 0.736
## 3 ets        Test   1.17  2.42  1.43  14.3  23.3   NaN   NaN 0.734
## 4 regadd     Test   1.74  2.59  1.74  30.7  30.7   NaN   NaN 0.673
## 5 regmult    Test   1.20  2.44  1.45  15.0  23.5   NaN   NaN 0.741

My damped add was actually better here.

unrets_cv <- unr %>%
  stretch_tsibble(.init = 10, .step = 5) %>% 
  relocate(Month, .id)
unrets_cv %>%
  model(dampedmult=ETS(URN~error("M")+trend("Ad")+season("M")),
  regmult=ETS(URN~error("M")+trend("A")+season("M")),
  dampedadd=ETS(URN~error("M")+trend("Ad")+season("A")),
  regadd=ETS(URN~error("M")+trend("A")+season("A")),
  ets=ETS(URN)
  ) %>%
  forecast(h = 1) %>%
  accuracy(unr)

## Warning: 2 errors (2 unique) encountered for dampedmult
## [1] A seasonal ETS model cannot be used for this data.
## [1] Not enough data to estimate this ETS model.

## Warning: 2 errors (2 unique) encountered for regmult
## [1] A seasonal ETS model cannot be used for this data.
## [1] Not enough data to estimate this ETS model.

## Warning: 2 errors (2 unique) encountered for dampedadd
## [1] A seasonal ETS model cannot be used for this data.
## [1] Not enough data to estimate this ETS model.

## Warning: 2 errors (2 unique) encountered for regadd
## [1] A seasonal ETS model cannot be used for this data.
## [1] Not enough data to estimate this ETS model.

## # A tibble: 5 × 10
##   .model     .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <chr>      <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 dampedadd  Test  -0.0382 0.306 0.230 -1.03   4.41 0.263 0.236 0.129 
## 2 dampedmult Test  -0.0346 0.317 0.238 -0.926  4.52 0.272 0.244 0.168 
## 3 ets        Test  -0.0438 0.356 0.252 -0.959  4.62 0.288 0.274 0.196 
## 4 regadd     Test  -0.0384 0.318 0.232 -0.826  4.33 0.265 0.245 0.0494
## 5 regmult    Test  -0.0347 0.325 0.245 -0.804  4.51 0.280 0.250 0.163

The damped additive is the best here.

unr %>% 
  gg_tsdisplay(difference(URN),plot_type="partial")

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

Tough to say if this is stationary or not.

unr %>% 
  features(URN,unitroot_ndiffs)

## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      1

This confirms that one differences is necessary.

I’ll check seasonal differences now.

unr %>% 
  features(URN,unitroot_nsdiffs)

## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       0

No seasonal difference is required.

unrarfit <- unr_train %>% 
  model(
    arima110=ARIMA(URN~pdq(1,1,0)),
    arima010=ARIMA(URN~pdq(0,1,0)),
    arima011=ARIMA(URN~pdq(0,1,1)),
    arima111=ARIMA(URN~pdq(1,1,1)),
    arima=ARIMA(URN),
    arima1=ARIMA(URN,stepwise = FALSE)
  )
report(unrarfit)

## Warning in report.mdl_df(unrarfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 6 × 8
##   .model   sigma2 log_lik    AIC   AICc   BIC ar_roots   ma_roots  
##   <chr>     <dbl>   <dbl>  <dbl>  <dbl> <dbl> <list>     <list>    
## 1 arima110 0.0563    5.07  -4.14  -4.09  8.59 <cpl [1]>  <cpl [12]>
## 2 arima010 0.0564    4.10  -4.19  -4.17  4.30 <cpl [0]>  <cpl [12]>
## 3 arima011 0.0563    4.85  -3.69  -3.65  9.04 <cpl [0]>  <cpl [13]>
## 4 arima111 0.0550   11.6  -15.1  -15.0   1.87 <cpl [1]>  <cpl [13]>
## 5 arima    0.0542   15.4  -18.8  -18.7   6.64 <cpl [2]>  <cpl [14]>
## 6 arima1   0.0545   14.4  -14.8  -14.6  14.9  <cpl [26]> <cpl [13]>

The automatic ARIMA returned the lowest AICc.

accuracy(unrarfit)

## # A tibble: 6 × 10
##   .model   .type          ME  RMSE   MAE     MPE  MAPE  MASE RMSSE     ACF1
##   <chr>    <chr>       <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 arima110 Training  0.00308 0.234 0.168  0.0808  2.91 0.219 0.223 -0.00891
## 2 arima010 Training  0.00328 0.234 0.167  0.0847  2.90 0.218 0.224  0.0616 
## 3 arima011 Training  0.00313 0.234 0.168  0.0817  2.91 0.219 0.223  0.00729
## 4 arima111 Training  0.00225 0.231 0.166  0.0778  2.90 0.217 0.220 -0.0566 
## 5 arima    Training -0.00628 0.229 0.166 -0.139   2.89 0.216 0.219 -0.00123
## 6 arima1   Training -0.00679 0.229 0.167 -0.164   2.91 0.218 0.219 -0.0523

Same for this one.

unrarfc <- unrarfit %>% 
  forecast(h=34)
accuracy(unrarfc,unr_test)

## # A tibble: 6 × 10
##   .model   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima    Test  1.01   2.44  1.49  9.46  25.8   NaN   NaN 0.751
## 2 arima010 Test  1.85   2.67  1.85 33.9   33.9   NaN   NaN 0.666
## 3 arima011 Test  1.86   2.67  1.86 34.0   34.0   NaN   NaN 0.666
## 4 arima1   Test  0.809  2.46  1.60  3.25  29.8   NaN   NaN 0.771
## 5 arima110 Test  1.86   2.67  1.86 34.0   34.0   NaN   NaN 0.665
## 6 arima111 Test  1.82   2.64  1.82 32.9   32.9   NaN   NaN 0.668

The best one here was also the automatic

unrar_cv <- unr %>%
  stretch_tsibble(.init = 10, .step = 5) %>% 
  relocate(Month, .id)
unrar_cv %>%
  model(arima110=ARIMA(URN~pdq(1,1,0)),
    arima010=ARIMA(URN~pdq(0,1,0)),
    arima011=ARIMA(URN~pdq(0,1,1)),
    arima111=ARIMA(URN~pdq(1,1,1)),
    arima=ARIMA(URN)) %>%
  forecast(h = 1) %>%
  accuracy(unr)

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.

## Warning: 1 error encountered for arima111
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots

## # A tibble: 5 × 10
##   .model   .type      ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>    <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 arima    Test  -0.105  0.588 0.289 -2.72  6.07 0.331 0.453 0.0921
## 2 arima010 Test  -0.0932 0.578 0.287 -2.51  6.06 0.329 0.445 0.0919
## 3 arima011 Test  -0.0838 0.510 0.288 -2.17  5.81 0.329 0.393 0.139 
## 4 arima110 Test  -0.0946 0.583 0.287 -2.55  6.07 0.328 0.449 0.0843
## 5 arima111 Test  -0.0580 0.361 0.256 -1.15  4.72 0.292 0.278 0.153

The lowest one here with cross-validation was 111. Awesome.

unrcons_cv <- unr %>%
  stretch_tsibble(.init = 10, .step = 5) %>% 
  relocate(Month, .id)
unrcons_cv %>%
  model(
    arimanocons=ARIMA(URN~pdq(1,1,1)),
    arima111=ARIMA(URN~1+pdq(1,1,1))
    ) %>%
  forecast(h = 1) %>%
  accuracy(unr)

## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.

## Warning: Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.

## Warning: 1 error encountered for arimanocons
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots

## Warning: 65 errors (1 unique) encountered for arima111
## [65] There are no ARIMA models to choose from after imposing the `order_constraint`, please consider allowing more models.

## # A tibble: 2 × 10
##   .model      .type      ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>       <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima111    Test  -0.143  0.452 0.322 -3.01  6.14 0.368 0.348 0.140
## 2 arimanocons Test  -0.0580 0.361 0.256 -1.15  4.72 0.292 0.278 0.153

I need to see if a constant is necessary. It isn’t.

unrfin_cv <- unr %>%
  stretch_tsibble(.init = 10, .step = 1) %>% 
  relocate(Month, .id)
unrfin_cv %>%
  model(
     arima111=ARIMA(URN~pdq(1,1,1)),
     dampedadd=ETS(URN~error("M")+trend("Ad")+season("A")),
     Naive = NAIVE(URN)
    ) %>%
  forecast(h = 1) %>%
  accuracy(unr)

## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning: 6 errors (1 unique) encountered for arima111
## [6] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots

## Warning: 9 errors (2 unique) encountered for dampedadd
## [3] A seasonal ETS model cannot be used for this data.
## [6] Not enough data to estimate this ETS model.

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2023 Jan

## # A tibble: 3 × 10
##   .model    .type       ME  RMSE   MAE     MPE  MAPE  MASE RMSSE    ACF1
##   <chr>     <chr>    <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 arima111  Test  -0.00822 0.521 0.249 -0.306   4.35 0.286 0.402 -0.131 
## 2 dampedadd Test  -0.00202 0.503 0.222 -0.0949  3.84 0.255 0.388 -0.101 
## 3 Naive     Test  -0.00957 0.525 0.318 -0.524   5.52 0.365 0.406 -0.0258

The damped additive model is the best of all forecasting methods.

unrfinfit <- unr %>% 
  model(dampedadd=ETS(URN~error("M")+trend("Ad")+season("A")))
unrfinfc <- unrfinfit %>% 
  forecast(h=36)
unrview <- unr %>% 
  filter(year(Month)>2020)
unrfinfc %>% 
  autoplot(unrview,level=NULL)

hilo(unrfinfc) %>% 
  select(-`80%`)

## # A tsibble: 36 x 5 [1M]
## # Key:       .model [1]
##    .model       Month           URN .mean                    `95%`
##    <chr>        <mth>        <dist> <dbl>                   <hilo>
##  1 dampedadd 2023 Jan N(3.1, 0.082)  3.11 [ 2.5466100, 3.668533]95
##  2 dampedadd 2023 Feb    N(3, 0.22)  2.97 [ 2.0363325, 3.894918]95
##  3 dampedadd 2023 Mar  N(2.6, 0.43)  2.64 [ 1.3544505, 3.916972]95
##  4 dampedadd 2023 Apr  N(2.7, 0.71)  2.75 [ 1.0983089, 4.391851]95
##  5 dampedadd 2023 May   N(2.9, 1.1)  2.88 [ 0.8533244, 4.909380]95
##  6 dampedadd 2023 Jun   N(3.5, 1.6)  3.51 [ 1.0606319, 5.954903]95
##  7 dampedadd 2023 Jul   N(3.3, 2.2)  3.32 [ 0.4402794, 6.200429]95
##  8 dampedadd 2023 Aug     N(3, 2.9)  3.03 [-0.2875660, 6.351201]95
##  9 dampedadd 2023 Sep   N(2.8, 3.7)  2.84 [-0.9268358, 6.598785]95
## 10 dampedadd 2023 Oct   N(2.9, 4.6)  2.86 [-1.3501111, 7.074651]95
## # … with 26 more rows
## # ℹ Use `print(n = ...)` to see more rows

These are the forecasted values.

unrfinfit2 <- unr %>% 
  model(dampedadd=ETS(URN~error("M")+trend("Ad")+season("A")))
unrfinfc2 <- unrfinfit2 %>% 
  forecast(h=12)
unrview2 <- unr %>% 
  filter(year(Month)>2020)
unrfinfc2 %>% 
  autoplot(unrview2)

unrfinfc2

## # A fable: 12 x 4 [1M]
## # Key:     .model [1]
##    .model       Month           URN .mean
##    <chr>        <mth>        <dist> <dbl>
##  1 dampedadd 2023 Jan N(3.1, 0.082)  3.11
##  2 dampedadd 2023 Feb    N(3, 0.22)  2.97
##  3 dampedadd 2023 Mar  N(2.6, 0.43)  2.64
##  4 dampedadd 2023 Apr  N(2.7, 0.71)  2.75
##  5 dampedadd 2023 May   N(2.9, 1.1)  2.88
##  6 dampedadd 2023 Jun   N(3.5, 1.6)  3.51
##  7 dampedadd 2023 Jul   N(3.3, 2.2)  3.32
##  8 dampedadd 2023 Aug     N(3, 2.9)  3.03
##  9 dampedadd 2023 Sep   N(2.8, 3.7)  2.84
## 10 dampedadd 2023 Oct   N(2.9, 4.6)  2.86
## 11 dampedadd 2023 Nov   N(2.6, 5.7)  2.63
## 12 dampedadd 2023 Dec   N(2.7, 6.8)  2.66

Forecasting Work

Brandon Pascali

2022-11-22

GDP Section