library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.9 ✔ tsibbledata 0.4.0
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(readxl)
I load the dataset here.
gd <- readxl::read_excel("C:\\Users\\PythonAcct\\Downloads\\GARQGSP.xls")
gdp <- gd %>%
mutate(Quarter=yearquarter(Date)) %>%
as_tsibble(index=Quarter)
gdp %>%
autoplot(RGDP)+labs(title="Real GDP of Georgia in millions of chained 2012 dollars",x="Quarter",y="RGDP (Millions)")
This is quarterly real GDP data. It might be seasonally adjusted. I have
no idea.
un <- readxl::read_excel("C:\\Users\\PythonAcct\\Downloads\\GAURN (2).xls")
unr <- un %>%
mutate(Month=yearmonth(Month)) %>%
as_tsibble(index=Month)
unr %>%
autoplot(URN)+labs(title="Unemployment Rate in Georgia",x="Month",y="Unemployment Rate (%)")
This is monthly unemployment rate data. It is NOT seasonally adjusted.
gdp %>%
autoplot(RGDP)+labs(title="Real GDP of Georgia in millions of chained 2012 dollars",x="Quarter",y="RGDP (Millions)")
There is obviously a trend in this data. It is seasonally adjusted already. The only data I could find that isn’t seasonally adjusted is yearly data.
First, I will compare the four benchmark forecasts and a TSLM model.
gdp_train <- gdp %>%
filter(year(Quarter)<2020)
gdp_test <- gdp %>%
filter(year(Quarter)>2019)
gdpfit <- gdp_train %>%
model(
Mean = MEAN(RGDP),
Naive = NAIVE(RGDP),
Seasonal_naive = SNAIVE(RGDP),
Drift = RW(RGDP ~ drift()),
tslm1=TSLM(RGDP ~ trend())
)
accuracy(gdpfit)
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mean Traini… 1.36e-11 39355. 33572. -0.651 6.94 2.85 2.87 0.946
## 2 Naive Traini… 2.02e+ 3 4595. 3633. 0.399 0.763 0.308 0.335 0.261
## 3 Seasonal_naive Traini… 7.87e+ 3 13726. 11784. 1.53 2.44 1 1 0.907
## 4 Drift Traini… -1.18e-11 4129. 2860. -0.0303 0.611 0.243 0.301 0.261
## 5 tslm1 Traini… -3.88e-12 21350. 19724. -0.191 4.23 1.67 1.56 0.949
As far as training set accuracy goes, the drift blows the others out of the water.
gdpfc <- gdpfit %>%
forecast(h=10)
accuracy(gdpfc,gdp_test)
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Test -10310. 22398. 15412. -2.00 2.87 NaN NaN 0.491
## 2 Mean Test 89833. 93150. 89833. 15.8 15.8 NaN NaN 0.618
## 3 Naive Test 787. 24649. 20456. -0.0604 3.69 NaN NaN 0.618
## 4 Seasonal_naive Test 6255. 26084. 22303. 0.911 3.99 NaN NaN 0.577
## 5 tslm1 Test 23017. 30575. 28305. 3.93 4.98 NaN NaN 0.500
The drift model also outperforms the others on predicting the test set.
gdp_cv <- gdp %>%
stretch_tsibble(.init = 5, .step = 1) %>%
relocate(Quarter, .id)
gdp_cv %>%
model(Mean = MEAN(RGDP),
Naive = NAIVE(RGDP),
Seasonal_naive = SNAIVE(RGDP),
Drift = RW(RGDP ~ drift()),
tslm1=TSLM(RGDP ~ trend()+season())) %>%
forecast(h = 1) %>%
accuracy(gdp)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022 Q4
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Test 1181. 8741. 4711. 0.216 0.945 0.334 0.482 -0.132
## 2 Mean Test 34460. 52577. 39323. 6.29 7.42 2.79 2.90 0.946
## 3 Naive Test 2142. 8894. 5231. 0.399 1.04 0.371 0.490 -0.114
## 4 Seasonal_naive Test 8457. 18257. 14208. 1.58 2.81 1.01 1.01 0.681
## 5 tslm1 Test 16348. 24805. 21141. 3.09 4.16 1.50 1.37 0.872
Cross-validation confirms that the drift model is the best one.
gdpdrift <- gdp %>%
model(Drift = RW(RGDP ~ drift()))
gg_tsresiduals(gdpdrift)
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
Now I will compare the drift model to some of the more advanced modeling methods.
My prediction for the ETS is additive trend, possibly multiplicative errors, and no seasonality. I’ll still compare it to a few others.
gdpetsfit <- gdp_train %>%
model(
dampedmult=ETS(RGDP~error("M")+trend("Ad")+season("N")),
regmult=ETS(RGDP~error("M")+trend("A")+season("N")),
dampedadd=ETS(RGDP~error("A")+trend("Ad")+season("N")),
regadd=ETS(RGDP~error("A")+trend("A")+season("N"))
)
report(gdpetsfit)
## Warning in report.mdl_df(gdpetsfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 4 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult 8.09e-5 -621. 1255. 1256. 1267. 15645423. 35806540. 0.00636
## 2 regmult 7.75e-5 -621. 1251. 1252. 1262. 15279895. 36770386. 0.00621
## 3 dampedadd 1.72e+7 -620. 1252. 1254. 1265. 15739703. 35774390. 2959.
## 4 regadd 1.72e+7 -621. 1251. 1252. 1262. 16028270. 36223950. 2949.
The AAN ETS was the best.
accuracy(gdpetsfit)
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Training 471. 3955. 2957. 0.0961 0.638 0.251 0.288 -0.0339
## 2 regmult Training -38.8 3909. 2892. -0.00917 0.624 0.245 0.285 -0.0175
## 3 dampedadd Training 416. 3967. 2959. 0.0857 0.639 0.251 0.289 -0.00880
## 4 regadd Training 321. 4004. 2949. 0.0676 0.637 0.250 0.292 -0.00719
The MAN ETS method also has the best RMSE.
gdpetsfc <- gdpetsfit %>%
forecast(h=10)
accuracy(gdpetsfc,gdp_test)
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedadd Test -17764. 24855. 17764. -3.30 3.30 NaN NaN 0.359
## 2 dampedmult Test -16250. 24237. 16250. -3.04 3.04 NaN NaN 0.395
## 3 regadd Test -21286. 26577. 21286. -3.91 3.91 NaN NaN 0.246
## 4 regmult Test -20726. 26248. 20726. -3.81 3.81 NaN NaN 0.264
The multiplicative damped was the best here.
gdpets_cv <- gdp %>%
stretch_tsibble(.init = 10, .step = 1) %>%
relocate(Quarter, .id)
gdp_cv %>%
model(dampedmult=ETS(RGDP~error("M")+trend("Ad")+season("N")),
regmult=ETS(RGDP~error("M")+trend("A")+season("N")),
dampedadd=ETS(RGDP~error("A")+trend("Ad")+season("N")),
regadd=ETS(RGDP~error("A")+trend("A")+season("N"))) %>%
forecast(h = 1) %>%
accuracy(gdp)
## Warning: 2 errors (1 unique) encountered for dampedmult
## [2] Not enough data to estimate this ETS model.
## Warning: 1 error encountered for regmult
## [1] Not enough data to estimate this ETS model.
## Warning: 2 errors (1 unique) encountered for dampedadd
## [2] Not enough data to estimate this ETS model.
## Warning: 1 error encountered for regadd
## [1] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022 Q4
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedadd Test 1296. 11817. 5583. 0.226 1.11 0.396 0.651 -0.0752
## 2 dampedmult Test 1323. 11291. 5567. 0.232 1.11 0.395 0.622 -0.0180
## 3 regadd Test 929. 12023. 5470. 0.157 1.09 0.388 0.663 -0.143
## 4 regmult Test 780. 11296. 5309. 0.131 1.06 0.376 0.623 -0.130
Each method chose a different model. I will use the one with the lowest AICc and the lowest RMSE when cross-validating, with is AAN and MAdN
gdp %>%
gg_tsdisplay(difference(RGDP),plot_type="partial")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
The first difference appears to be stationary.
gdp %>%
features(RGDP,unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 1
This confirms that one differences is necessary.
gdparfit <- gdp_train %>%
model(
arima110=ARIMA(RGDP~pdq(1,1,0)),
arima010=ARIMA(RGDP~pdq(0,1,0)),
arima011=ARIMA(RGDP~pdq(0,1,1)),
arima111=ARIMA(RGDP~pdq(1,1,1)),
arima=ARIMA(RGDP),
arima1=ARIMA(RGDP,stepwise = FALSE)
)
report(gdparfit)
## Warning in report.mdl_df(gdparfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 6 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima110 15100791. -570. 1148. 1149. 1156. <cpl [1]> <cpl [4]>
## 2 arima010 15251663. -571. 1148. 1148. 1154. <cpl [4]> <cpl [0]>
## 3 arima011 15141770. -570. 1148. 1149. 1157. <cpl [4]> <cpl [1]>
## 4 arima111 14801345. -570. 1147. 1148. 1156. <cpl [1]> <cpl [5]>
## 5 arima 14801690. -561. 1127. 1128. 1134. <cpl [0]> <cpl [5]>
## 6 arima1 14272983. -559. 1126. 1127. 1134. <cpl [3]> <cpl [0]>
The automatic ARIMA with no stepwise returned the lowest AICc.
accuracy(gdparfit)
## # A tibble: 6 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima110 Training -4.86 3754. 2554. -0.0185 0.544 0.217 0.274 -0.0134
## 2 arima010 Training 13.1 3806. 2562. -0.0152 0.547 0.217 0.277 0.164
## 3 arima011 Training 5.83 3759. 2571. -0.0148 0.548 0.218 0.274 0.00301
## 4 arima111 Training 531. 3717. 2741. 0.109 0.589 0.233 0.271 -0.0223
## 5 arima Training 156. 3717. 2679. 0.0330 0.577 0.227 0.271 0.00970
## 6 arima1 Training 45.4 3617. 2657. 0.0117 0.577 0.225 0.264 0.0337
Same for this one.
gdparfc <- gdparfit %>%
forecast(h=10)
accuracy(gdparfc,gdp_test)
## # A tibble: 6 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test -22151. 27346. 22151. -4.07 4.07 NaN NaN 0.244
## 2 arima010 Test -14443. 23980. 15687. -2.73 2.94 NaN NaN 0.446
## 3 arima011 Test -13694. 23661. 15512. -2.59 2.90 NaN NaN 0.454
## 4 arima1 Test -22147. 27463. 22147. -4.07 4.07 NaN NaN 0.250
## 5 arima110 Test -13068. 23663. 15613. -2.49 2.92 NaN NaN 0.473
## 6 arima111 Test -15433. 24439. 16159. -2.90 3.02 NaN NaN 0.436
The best one here was 011.
gdpar_cv <- gdp %>%
stretch_tsibble(.init = 10, .step = 1) %>%
relocate(Quarter, .id)
gdp_cv %>%
model(arima110=ARIMA(RGDP~pdq(1,1,0)),
arima010=ARIMA(RGDP~pdq(0,1,0)),
arima011=ARIMA(RGDP~pdq(0,1,1)),
arima111=ARIMA(RGDP~pdq(1,1,1)),
arima=ARIMA(RGDP),
arima1=ARIMA(RGDP,stepwise = FALSE)) %>%
forecast(h = 1) %>%
accuracy(gdp)
## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning: 5 errors (1 unique) encountered for arima111
## [5] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022 Q4
## # A tibble: 6 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test 1118. 11475. 5600. 0.193 1.12 0.397 0.632 -0.0645
## 2 arima010 Test 891. 8928. 4648. 0.159 0.932 0.329 0.492 -0.107
## 3 arima011 Test 1393. 10290. 5094. 0.251 1.01 0.361 0.567 -0.144
## 4 arima1 Test 1248. 11436. 5451. 0.223 1.09 0.386 0.630 -0.0715
## 5 arima110 Test 1548. 11176. 5228. 0.279 1.04 0.371 0.616 -0.145
## 6 arima111 Test 1772. 11506. 5196. 0.328 1.02 0.368 0.634 -0.187
The lowest one here with cross-validation was 010.
gdpfin_cv <- gdp %>%
stretch_tsibble(.init = 10, .step = 1) %>%
relocate(Quarter, .id)
gdp_cv %>%
model(
arima010=ARIMA(RGDP~pdq(0,1,0)),
arimacosn=ARIMA(RGDP~1+pdq(0,1,0)),
dampedmult=ETS(RGDP~error("M")+trend("Ad")+season("N")),
regadd=ETS(RGDP~error("A")+trend("A")+season("N")),
Drift = RW(RGDP ~ drift())
) %>%
forecast(h = 1) %>%
accuracy(gdp)
## Warning: Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Warning: 4 errors (1 unique) encountered for arimacosn
## [4] There are no ARIMA models to choose from after imposing the `order_constraint`, please consider allowing more models.
## Warning: 2 errors (1 unique) encountered for dampedmult
## [2] Not enough data to estimate this ETS model.
## Warning: 1 error encountered for regadd
## [1] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022 Q4
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima010 Test 891. 8928. 4648. 0.159 0.932 0.329 0.492 -0.107
## 2 arimacosn Test 1438. 8735. 4369. 0.283 0.863 0.310 0.481 -0.195
## 3 dampedmult Test 1323. 11291. 5567. 0.232 1.11 0.395 0.622 -0.0180
## 4 Drift Test 1181. 8741. 4711. 0.216 0.945 0.334 0.482 -0.132
## 5 regadd Test 929. 12023. 5470. 0.157 1.09 0.388 0.663 -0.143
The ARIMA(0,1,0) with a constant is the best model, slightly outperforming the drift model.
gdpfinfit <- gdp %>%
model(arimacosn=ARIMA(RGDP~1+pdq(0,1,0)))
gdpfinfc <- gdpfinfit %>%
forecast(h=12)
gdpfinfc %>%
autoplot(gdp)
gg_tsresiduals(gdpfinfit)
These look amazing.
hilo(gdpfinfc) %>%
select(-`80%`)
## # A tsibble: 12 x 5 [1Q]
## # Key: .model [1]
## .model Quarter RGDP .mean `95%`
## <chr> <qtr> <dist> <dbl> <hilo>
## 1 arimacosn 2022 Q4 N(593996, 7.2e+07) 593996. [577397.9, 610593.8]95
## 2 arimacosn 2023 Q1 N(6e+05, 1.4e+08) 596123. [572649.9, 619596.1]95
## 3 arimacosn 2023 Q2 N(6e+05, 2.2e+08) 598250. [569501.6, 626998.7]95
## 4 arimacosn 2023 Q3 N(6e+05, 2.9e+08) 600377. [567181.3, 633573.3]95
## 5 arimacosn 2023 Q4 N(6e+05, 3.6e+08) 602504. [565390.2, 639618.7]95
## 6 arimacosn 2024 Q1 N(6e+05, 4.3e+08) 604632. [563975.0, 645288.2]95
## 7 arimacosn 2024 Q2 N(606759, 5e+08) 606759. [562844.6, 650672.9]95
## 8 arimacosn 2024 Q3 N(608886, 5.7e+08) 608886. [561939.7, 655832.1]95
## 9 arimacosn 2024 Q4 N(611013, 6.5e+08) 611013. [561219.1, 660807.0]95
## 10 arimacosn 2025 Q1 N(613140, 7.2e+08) 613140. [560652.7, 665627.6]95
## 11 arimacosn 2025 Q2 N(615267, 7.9e+08) 615267. [560218.0, 670316.6]95
## 12 arimacosn 2025 Q3 N(617394, 8.6e+08) 617394. [559897.4, 674891.6]95
These are the forecasted values.
unr %>%
autoplot(URN)+labs(title="Unemployment Rate in Georgia",x="Month",y="Unemployment Rate (%)")
Extremely high seasonality. Something of a downward trend. Maybe a cyclical component. Not too much heteroskedasticity either.
First, I will compare the four benchmark forecasts and a TSLM model.
unr_train <- unr %>%
filter(year(Month)<2020)
unr_test <- unr %>%
filter(year(Month)>2019)
unrfit <- unr_train %>%
model(
Mean = MEAN(URN),
Naive = NAIVE(URN),
Seasonal_naive = SNAIVE(URN),
Drift = RW(URN ~ drift()),
tslm1=TSLM(URN ~ trend()+season()),
tslm2=TSLM(URN ~ trend() + fourier(K = 6))
)
accuracy(unrfit)
## # A tibble: 6 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mean Training -1.05e-16 1.77 1.35 -8.02 23.3 1.76 1.69 0.970
## 2 Naive Training -1.08e- 2 0.396 0.299 -0.417 5.14 0.390 0.378 0.0684
## 3 Seasonal_naive Training -1.07e- 1 1.05 0.767 -3.20 12.5 1 1 0.959
## 4 Drift Training 5.23e-17 0.396 0.299 -0.222 5.12 0.390 0.378 0.0684
## 5 tslm1 Training 7.72e-17 1.75 1.33 -7.77 22.8 1.74 1.67 0.986
## 6 tslm2 Training -1.20e-16 1.75 1.33 -7.77 22.8 1.74 1.67 0.986
As far as training set accuracy goes, the drift slightly wins out.
unrfc <- unrfit %>%
forecast(h=34)
accuracy(unrfc,unr_test)
## # A tibble: 6 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Test 1.46 2.53 1.54 21.7 25.0 NaN NaN 0.736
## 2 Mean Test -1.43 2.57 2.34 -52.3 61.8 NaN NaN 0.752
## 3 Naive Test 1.27 2.49 1.49 16.2 24.4 NaN NaN 0.752
## 4 Seasonal_naive Test 0.944 2.37 1.43 8.27 24.5 NaN NaN 0.729
## 5 tslm1 Test -1.48 2.62 2.37 -53.4 62.6 NaN NaN 0.739
## 6 tslm2 Test -1.48 2.62 2.37 -53.4 62.6 NaN NaN 0.739
The seasonal naive is the best on the test set.
unr_cv <- unr %>%
stretch_tsibble(.init = 10, .step = 1) %>%
relocate(Month, .id)
unr_cv %>%
model(
Mean = MEAN(URN),
Naive = NAIVE(URN),
Seasonal_naive = SNAIVE(URN),
Drift = RW(URN ~ drift()),
tslm1=TSLM(URN ~ trend()+season()),
tslm2=TSLM(URN ~ trend() + fourier(K = 6))
) %>%
forecast(h = 1) %>%
accuracy(unr)
## Warning: prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2023 Jan
## # A tibble: 6 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Test 0.0112 0.527 0.318 -0.134 5.50 0.365 0.407 -0.0255
## 2 Mean Test -0.310 1.86 1.44 -14.3 26.5 1.65 1.43 0.956
## 3 Naive Test -0.00957 0.525 0.318 -0.524 5.52 0.365 0.406 -0.0258
## 4 Seasonal_naive Test -0.114 1.29 0.871 -4.44 15.0 1 1 0.890
## 5 tslm1 Test 0.370 1.87 1.33 -0.744 22.8 1.53 1.45 0.966
## 6 tslm2 Test 0.336 1.96 1.37 -1.17 23.2 1.57 1.51 0.910
Cross-validation says the NAIVE is the best one. Weird.
Now I will compare the more advanced modeling methods.
My prediction for the ETS is additive trend, possibly multiplicative errors, and additive seasonality. I’ll still compare it to a few others.
unretsfit <- unr_train %>%
model(
dampedmult=ETS(URN~error("M")+trend("Ad")+season("M")),
regmult=ETS(URN~error("M")+trend("A")+season("M")),
dampedadd=ETS(URN~error("M")+trend("Ad")+season("A")),
regadd=ETS(URN~error("M")+trend("A")+season("A")),
ets=ETS(URN)
)
report(unretsfit)
## Warning in report.mdl_df(unretsfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 5 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult 0.00172 -891. 1818. 1819. 1895. 0.0576 0.120 0.0302
## 2 regmult 0.00176 -899. 1831. 1832. 1904. 0.0597 0.126 0.0309
## 3 dampedadd 0.00167 -883. 1802. 1803. 1879. 0.0542 0.113 0.0301
## 4 regadd 0.00173 -893. 1819. 1820. 1892. 0.0556 0.121 0.0304
## 5 ets 0.0552 -882. 1799. 1800. 1876. 0.0534 0.112 0.172
The automatic model was the best.
accuracy(unretsfit)
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Training -0.00421 0.240 0.176 -0.0977 3.00 0.229 0.229 0.0659
## 2 regmult Training -0.0111 0.244 0.181 -0.250 3.07 0.235 0.233 0.159
## 3 dampedadd Training -0.00496 0.233 0.174 -0.0976 2.99 0.226 0.222 0.0647
## 4 regadd Training 0.000554 0.236 0.175 0.0758 3.01 0.229 0.225 0.0768
## 5 ets Training -0.00417 0.231 0.172 -0.0743 2.97 0.224 0.221 0.0364
Same here.
unretsfc <- unretsfit %>%
forecast(h=34)
accuracy(unretsfc,unr_test)
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedadd Test 1.12 2.42 1.44 12.8 23.8 NaN NaN 0.738
## 2 dampedmult Test 1.30 2.47 1.47 17.5 23.6 NaN NaN 0.736
## 3 ets Test 1.17 2.42 1.43 14.3 23.3 NaN NaN 0.734
## 4 regadd Test 1.74 2.59 1.74 30.7 30.7 NaN NaN 0.673
## 5 regmult Test 1.20 2.44 1.45 15.0 23.5 NaN NaN 0.741
My damped add was actually better here.
unrets_cv <- unr %>%
stretch_tsibble(.init = 10, .step = 5) %>%
relocate(Month, .id)
unrets_cv %>%
model(dampedmult=ETS(URN~error("M")+trend("Ad")+season("M")),
regmult=ETS(URN~error("M")+trend("A")+season("M")),
dampedadd=ETS(URN~error("M")+trend("Ad")+season("A")),
regadd=ETS(URN~error("M")+trend("A")+season("A")),
ets=ETS(URN)
) %>%
forecast(h = 1) %>%
accuracy(unr)
## Warning: 2 errors (2 unique) encountered for dampedmult
## [1] A seasonal ETS model cannot be used for this data.
## [1] Not enough data to estimate this ETS model.
## Warning: 2 errors (2 unique) encountered for regmult
## [1] A seasonal ETS model cannot be used for this data.
## [1] Not enough data to estimate this ETS model.
## Warning: 2 errors (2 unique) encountered for dampedadd
## [1] A seasonal ETS model cannot be used for this data.
## [1] Not enough data to estimate this ETS model.
## Warning: 2 errors (2 unique) encountered for regadd
## [1] A seasonal ETS model cannot be used for this data.
## [1] Not enough data to estimate this ETS model.
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedadd Test -0.0382 0.306 0.230 -1.03 4.41 0.263 0.236 0.129
## 2 dampedmult Test -0.0346 0.317 0.238 -0.926 4.52 0.272 0.244 0.168
## 3 ets Test -0.0438 0.356 0.252 -0.959 4.62 0.288 0.274 0.196
## 4 regadd Test -0.0384 0.318 0.232 -0.826 4.33 0.265 0.245 0.0494
## 5 regmult Test -0.0347 0.325 0.245 -0.804 4.51 0.280 0.250 0.163
The damped additive is the best here.
unr %>%
gg_tsdisplay(difference(URN),plot_type="partial")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
Tough to say if this is stationary or not.
unr %>%
features(URN,unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 1
This confirms that one differences is necessary.
I’ll check seasonal differences now.
unr %>%
features(URN,unitroot_nsdiffs)
## # A tibble: 1 × 1
## nsdiffs
## <int>
## 1 0
No seasonal difference is required.
unrarfit <- unr_train %>%
model(
arima110=ARIMA(URN~pdq(1,1,0)),
arima010=ARIMA(URN~pdq(0,1,0)),
arima011=ARIMA(URN~pdq(0,1,1)),
arima111=ARIMA(URN~pdq(1,1,1)),
arima=ARIMA(URN),
arima1=ARIMA(URN,stepwise = FALSE)
)
report(unrarfit)
## Warning in report.mdl_df(unrarfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 6 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima110 0.0563 5.07 -4.14 -4.09 8.59 <cpl [1]> <cpl [12]>
## 2 arima010 0.0564 4.10 -4.19 -4.17 4.30 <cpl [0]> <cpl [12]>
## 3 arima011 0.0563 4.85 -3.69 -3.65 9.04 <cpl [0]> <cpl [13]>
## 4 arima111 0.0550 11.6 -15.1 -15.0 1.87 <cpl [1]> <cpl [13]>
## 5 arima 0.0542 15.4 -18.8 -18.7 6.64 <cpl [2]> <cpl [14]>
## 6 arima1 0.0545 14.4 -14.8 -14.6 14.9 <cpl [26]> <cpl [13]>
The automatic ARIMA returned the lowest AICc.
accuracy(unrarfit)
## # A tibble: 6 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima110 Training 0.00308 0.234 0.168 0.0808 2.91 0.219 0.223 -0.00891
## 2 arima010 Training 0.00328 0.234 0.167 0.0847 2.90 0.218 0.224 0.0616
## 3 arima011 Training 0.00313 0.234 0.168 0.0817 2.91 0.219 0.223 0.00729
## 4 arima111 Training 0.00225 0.231 0.166 0.0778 2.90 0.217 0.220 -0.0566
## 5 arima Training -0.00628 0.229 0.166 -0.139 2.89 0.216 0.219 -0.00123
## 6 arima1 Training -0.00679 0.229 0.167 -0.164 2.91 0.218 0.219 -0.0523
Same for this one.
unrarfc <- unrarfit %>%
forecast(h=34)
accuracy(unrarfc,unr_test)
## # A tibble: 6 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test 1.01 2.44 1.49 9.46 25.8 NaN NaN 0.751
## 2 arima010 Test 1.85 2.67 1.85 33.9 33.9 NaN NaN 0.666
## 3 arima011 Test 1.86 2.67 1.86 34.0 34.0 NaN NaN 0.666
## 4 arima1 Test 0.809 2.46 1.60 3.25 29.8 NaN NaN 0.771
## 5 arima110 Test 1.86 2.67 1.86 34.0 34.0 NaN NaN 0.665
## 6 arima111 Test 1.82 2.64 1.82 32.9 32.9 NaN NaN 0.668
The best one here was also the automatic
unrar_cv <- unr %>%
stretch_tsibble(.init = 10, .step = 5) %>%
relocate(Month, .id)
unrar_cv %>%
model(arima110=ARIMA(URN~pdq(1,1,0)),
arima010=ARIMA(URN~pdq(0,1,0)),
arima011=ARIMA(URN~pdq(0,1,1)),
arima111=ARIMA(URN~pdq(1,1,1)),
arima=ARIMA(URN)) %>%
forecast(h = 1) %>%
accuracy(unr)
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## Warning: 1 error encountered for arima111
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test -0.105 0.588 0.289 -2.72 6.07 0.331 0.453 0.0921
## 2 arima010 Test -0.0932 0.578 0.287 -2.51 6.06 0.329 0.445 0.0919
## 3 arima011 Test -0.0838 0.510 0.288 -2.17 5.81 0.329 0.393 0.139
## 4 arima110 Test -0.0946 0.583 0.287 -2.55 6.07 0.328 0.449 0.0843
## 5 arima111 Test -0.0580 0.361 0.256 -1.15 4.72 0.292 0.278 0.153
The lowest one here with cross-validation was 111. Awesome.
unrcons_cv <- unr %>%
stretch_tsibble(.init = 10, .step = 5) %>%
relocate(Month, .id)
unrcons_cv %>%
model(
arimanocons=ARIMA(URN~pdq(1,1,1)),
arima111=ARIMA(URN~1+pdq(1,1,1))
) %>%
forecast(h = 1) %>%
accuracy(unr)
## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## Warning: Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Warning: 1 error encountered for arimanocons
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## Warning: 65 errors (1 unique) encountered for arima111
## [65] There are no ARIMA models to choose from after imposing the `order_constraint`, please consider allowing more models.
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima111 Test -0.143 0.452 0.322 -3.01 6.14 0.368 0.348 0.140
## 2 arimanocons Test -0.0580 0.361 0.256 -1.15 4.72 0.292 0.278 0.153
I need to see if a constant is necessary. It isn’t.
unrfin_cv <- unr %>%
stretch_tsibble(.init = 10, .step = 1) %>%
relocate(Month, .id)
unrfin_cv %>%
model(
arima111=ARIMA(URN~pdq(1,1,1)),
dampedadd=ETS(URN~error("M")+trend("Ad")+season("A")),
Naive = NAIVE(URN)
) %>%
forecast(h = 1) %>%
accuracy(unr)
## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning: 6 errors (1 unique) encountered for arima111
## [6] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## Warning: 9 errors (2 unique) encountered for dampedadd
## [3] A seasonal ETS model cannot be used for this data.
## [6] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2023 Jan
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima111 Test -0.00822 0.521 0.249 -0.306 4.35 0.286 0.402 -0.131
## 2 dampedadd Test -0.00202 0.503 0.222 -0.0949 3.84 0.255 0.388 -0.101
## 3 Naive Test -0.00957 0.525 0.318 -0.524 5.52 0.365 0.406 -0.0258
The damped additive model is the best of all forecasting methods.
unrfinfit <- unr %>%
model(dampedadd=ETS(URN~error("M")+trend("Ad")+season("A")))
unrfinfc <- unrfinfit %>%
forecast(h=36)
unrview <- unr %>%
filter(year(Month)>2020)
unrfinfc %>%
autoplot(unrview,level=NULL)
hilo(unrfinfc) %>%
select(-`80%`)
## # A tsibble: 36 x 5 [1M]
## # Key: .model [1]
## .model Month URN .mean `95%`
## <chr> <mth> <dist> <dbl> <hilo>
## 1 dampedadd 2023 Jan N(3.1, 0.082) 3.11 [ 2.5466100, 3.668533]95
## 2 dampedadd 2023 Feb N(3, 0.22) 2.97 [ 2.0363325, 3.894918]95
## 3 dampedadd 2023 Mar N(2.6, 0.43) 2.64 [ 1.3544505, 3.916972]95
## 4 dampedadd 2023 Apr N(2.7, 0.71) 2.75 [ 1.0983089, 4.391851]95
## 5 dampedadd 2023 May N(2.9, 1.1) 2.88 [ 0.8533244, 4.909380]95
## 6 dampedadd 2023 Jun N(3.5, 1.6) 3.51 [ 1.0606319, 5.954903]95
## 7 dampedadd 2023 Jul N(3.3, 2.2) 3.32 [ 0.4402794, 6.200429]95
## 8 dampedadd 2023 Aug N(3, 2.9) 3.03 [-0.2875660, 6.351201]95
## 9 dampedadd 2023 Sep N(2.8, 3.7) 2.84 [-0.9268358, 6.598785]95
## 10 dampedadd 2023 Oct N(2.9, 4.6) 2.86 [-1.3501111, 7.074651]95
## # … with 26 more rows
## # ℹ Use `print(n = ...)` to see more rows
These are the forecasted values.
unrfinfit2 <- unr %>%
model(dampedadd=ETS(URN~error("M")+trend("Ad")+season("A")))
unrfinfc2 <- unrfinfit2 %>%
forecast(h=12)
unrview2 <- unr %>%
filter(year(Month)>2020)
unrfinfc2 %>%
autoplot(unrview2)
unrfinfc2
## # A fable: 12 x 4 [1M]
## # Key: .model [1]
## .model Month URN .mean
## <chr> <mth> <dist> <dbl>
## 1 dampedadd 2023 Jan N(3.1, 0.082) 3.11
## 2 dampedadd 2023 Feb N(3, 0.22) 2.97
## 3 dampedadd 2023 Mar N(2.6, 0.43) 2.64
## 4 dampedadd 2023 Apr N(2.7, 0.71) 2.75
## 5 dampedadd 2023 May N(2.9, 1.1) 2.88
## 6 dampedadd 2023 Jun N(3.5, 1.6) 3.51
## 7 dampedadd 2023 Jul N(3.3, 2.2) 3.32
## 8 dampedadd 2023 Aug N(3, 2.9) 3.03
## 9 dampedadd 2023 Sep N(2.8, 3.7) 2.84
## 10 dampedadd 2023 Oct N(2.9, 4.6) 2.86
## 11 dampedadd 2023 Nov N(2.6, 5.7) 2.63
## 12 dampedadd 2023 Dec N(2.7, 6.8) 2.66