library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.9 ✔ tsibbledata 0.4.0
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(readxl)
hoown <- readxl::read_excel("C:\\Users\\PythonAcct\\Downloads\\GAHOWN (1).xls") %>%
mutate(Year=year(Date)) %>%
as_tsibble(index=Year) %>%
mutate(Rate=HomeOwnRate) %>%
select(-Date) %>%
select(-HomeOwnRate)
hoown %>%
autoplot(Rate)
This is yeary data. There doesn’t seem to be much of any trend. Almost seems cyclical.
First, I will compare the four benchmark forecasts and a TSLM model.
ho_train <- hoown %>%
filter(Year<2019)
ho_test <- hoown%>%
filter(Year>2018)
hofit <- ho_train %>%
model(
Mean = MEAN(Rate),
Naive = NAIVE(Rate),
Drift = RW(Rate ~ drift()),
tslm1=TSLM(Rate ~ trend())
)
accuracy(hofit)
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mean Training -5.68e-15 3.03 2.63 -0.206 3.94 2.55 2.24 0.879
## 2 Naive Training 5.88e- 3 1.35 1.03 -0.0114 1.55 1 1 0.110
## 3 Drift Training 3.34e-15 1.35 1.03 -0.0202 1.55 1.00 1.00 0.110
## 4 tslm1 Training 4.06e-16 3.03 2.63 -0.206 3.94 2.55 2.24 0.879
As far as training set accuracy goes, the naive is the best one.
hofc <- hofit %>%
forecast(h=3)
accuracy(hofc,ho_test)
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Test 1.42 2.05 1.42 2.13 2.13 NaN NaN -0.658
## 2 Mean Test -1.29 1.96 1.81 -2.03 2.80 NaN NaN -0.658
## 3 Naive Test 1.43 2.05 1.43 2.15 2.15 NaN NaN -0.658
## 4 tslm1 Test -1.28 1.95 1.80 -2.01 2.79 NaN NaN -0.658
The TSLM model was the best here.
ho_cv <- hoown%>%
stretch_tsibble(.init = 2, .step = 1) %>%
relocate(Year, .id)
ho_cv %>%
model(Mean = MEAN(Rate),
Naive = NAIVE(Rate),
Drift = RW(Rate ~ drift()),
tslm1=TSLM(Rate ~ trend())) %>%
forecast(h = 1) %>%
accuracy(hoown)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Test -0.127 1.57 1.22 -0.200 1.84 1.08 1.06 0.0170
## 2 Mean Test 0.746 3.13 2.61 0.947 3.89 2.31 2.10 0.854
## 3 Naive Test 0.0361 1.50 1.14 0.0315 1.71 1.01 1.01 -0.0101
## 4 tslm1 Test -1.73 3.31 2.71 -2.69 4.12 2.40 2.23 0.819
The naive does the best here.
honaive <- hoown %>%
model(Naive = NAIVE(Rate))
gg_tsresiduals(honaive)
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
Now I will compare the drift model to some of the more advanced modeling methods.
My prediction for the ETS is no trend trend, possibly multiplicative errors, and no seasonality. I’ll still compare it to a few others.
hoetsfit <- ho_train %>%
model(
dampedmult=ETS(Rate~error("M")+trend("Ad")+season("N")),
regmult=ETS(Rate~error("M")+trend("A")+season("N")),
multno=ETS(Rate~error("M")+trend("N")+season("N")),
regadd=ETS(Rate~error("A")+trend("N")+season("N")),
ets=ETS(Rate)
)
report(hoetsfit)
## Warning in report.mdl_df(hoetsfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 5 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult 0.000465 -72.1 156. 159. 166. 1.77 3.76 0.0155
## 2 regmult 0.000452 -72.2 154. 156. 162. 1.78 3.95 0.0151
## 3 multno 0.000425 -72.2 150. 151. 155. 1.77 3.93 0.0150
## 4 regadd 1.88 -72.3 151. 151. 155. 1.77 3.93 1.00
## 5 ets 0.000425 -72.2 150. 151. 155. 1.77 3.93 0.0150
The automatic ets and the multiplicative errors with no trend were the best.
accuracy(hoetsfit)
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Training -0.0740 1.33 1.03 -0.130 1.55 0.999 0.985 0.0726
## 2 regmult Training -0.0454 1.33 1.01 -0.0879 1.51 0.978 0.986 0.110
## 3 multno Training 0.00649 1.33 1.00 -0.00984 1.50 0.972 0.986 0.110
## 4 regadd Training 0.00568 1.33 1.00 -0.0111 1.50 0.971 0.986 0.111
## 5 ets Training 0.00649 1.33 1.00 -0.00984 1.50 0.972 0.986 0.110
The MAdN was best here.
hoetsfc <- hoetsfit %>%
forecast(h=3)
accuracy(hoetsfc,ho_test)
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Test 1.39 2.03 1.39 2.08 2.08 NaN NaN -0.657
## 2 ets Test 1.43 2.05 1.43 2.15 2.15 NaN NaN -0.658
## 3 multno Test 1.43 2.05 1.43 2.15 2.15 NaN NaN -0.658
## 4 regadd Test 1.43 2.05 1.43 2.15 2.15 NaN NaN -0.658
## 5 regmult Test 1.33 1.99 1.33 1.99 1.99 NaN NaN -0.654
The regmult was the best here.
hoets_cv <- hoown %>%
stretch_tsibble(.init = 3, .step = 1) %>%
relocate(Year, .id)
hoets_cv %>%
model(dampedmult=ETS(Rate~error("M")+trend("Ad")+season("N")),
regmult=ETS(Rate~error("M")+trend("A")+season("N")),
multno=ETS(Rate~error("M")+trend("N")+season("N")),
regadd=ETS(Rate~error("A")+trend("N")+season("N")),
ets=ETS(Rate)
) %>%
forecast(h = 1) %>%
accuracy(hoown)
## Warning: 4 errors (1 unique) encountered for dampedmult
## [4] Not enough data to estimate this ETS model.
## Warning: 3 errors (1 unique) encountered for regmult
## [3] Not enough data to estimate this ETS model.
## Warning: 1 error encountered for multno
## [1] Not enough data to estimate this ETS model.
## Warning: 1 error encountered for regadd
## [1] Not enough data to estimate this ETS model.
## Warning: 1 error encountered for ets
## [1] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Test -0.0554 1.73 1.32 -0.121 1.96 1.16 1.16 0.175
## 2 ets Test 0.0998 1.66 1.25 0.119 1.87 1.10 1.12 0.0928
## 3 multno Test 0.0998 1.66 1.25 0.119 1.87 1.10 1.12 0.0928
## 4 regadd Test 0.101 1.67 1.25 0.120 1.87 1.10 1.12 0.0941
## 5 regmult Test -0.235 1.79 1.34 -0.383 2.00 1.19 1.21 0.250
My multno and the automatic ets were the best here.
hoown %>%
gg_tsdisplay(difference(Rate),plot_type="partial")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
The first difference appears to be stationary.
hoown %>%
features(Rate,unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 0
This confirms that no differences are necessary.
hoarfit <- ho_train %>%
model(
arima100=ARIMA(Rate~pdq(1,0,0)),
arima001=ARIMA(Rate~pdq(0,0,1)),
arima101=ARIMA(Rate~pdq(1,0,1)),
arima100cons=ARIMA(Rate~1+pdq(1,0,0)),
arima001cons=ARIMA(Rate~1+pdq(0,0,1)),
arima101cons=ARIMA(Rate~1+pdq(1,0,1)),
arima=ARIMA(Rate),
arima1=ARIMA(Rate,stepwise = FALSE)
)
report(hoarfit)
## Warning in report.mdl_df(hoarfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 8 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima100 1.82 -59.9 126. 127. 131. <cpl [1]> <cpl [0]>
## 2 arima001 4.14 -74.0 154. 155. 159. <cpl [0]> <cpl [1]>
## 3 arima101 1.82 -59.5 127. 128. 133. <cpl [1]> <cpl [1]>
## 4 arima100cons 1.82 -59.9 126. 127. 131. <cpl [1]> <cpl [0]>
## 5 arima001cons 4.14 -74.0 154. 155. 159. <cpl [0]> <cpl [1]>
## 6 arima101cons 1.82 -59.5 127. 128. 133. <cpl [1]> <cpl [1]>
## 7 arima 1.82 -59.9 126. 127. 131. <cpl [1]> <cpl [0]>
## 8 arima1 1.82 -59.9 126. 127. 131. <cpl [1]> <cpl [0]>
The automatic ARIMA with no stepwise returned the lowest AICc.
accuracy(hoarfit)
## # A tibble: 8 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima100 Training 0.0824 1.31 1.03 0.0842 1.54 0.998 0.970 0.168
## 2 arima001 Training 0.0279 1.98 1.72 -0.0789 2.58 1.67 1.46 0.482
## 3 arima101 Training 0.0722 1.29 1.01 0.0697 1.52 0.984 0.955 0.0305
## 4 arima100cons Training 0.0824 1.31 1.03 0.0842 1.54 0.998 0.970 0.168
## 5 arima001cons Training 0.0279 1.98 1.72 -0.0789 2.58 1.67 1.46 0.482
## 6 arima101cons Training 0.0722 1.29 1.01 0.0697 1.52 0.984 0.955 0.0305
## 7 arima Training 0.0824 1.31 1.03 0.0842 1.54 0.998 0.970 0.168
## 8 arima1 Training 0.0824 1.31 1.03 0.0842 1.54 0.998 0.970 0.168
arima101 and arima101cons was best here
hoarfc <- hoarfit %>%
forecast(h=3)
accuracy(hoarfc,ho_test)
## # A tibble: 8 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test 1.09 1.84 1.29 1.62 1.93 NaN NaN -0.641
## 2 arima001 Test -0.809 1.57 1.38 -1.28 2.13 NaN NaN -0.506
## 3 arima001cons Test -0.809 1.57 1.38 -1.28 2.13 NaN NaN -0.506
## 4 arima1 Test 1.09 1.84 1.29 1.62 1.93 NaN NaN -0.641
## 5 arima100 Test 1.09 1.84 1.29 1.62 1.93 NaN NaN -0.641
## 6 arima100cons Test 1.09 1.84 1.29 1.62 1.93 NaN NaN -0.641
## 7 arima101 Test 0.848 1.72 1.24 1.25 1.86 NaN NaN -0.634
## 8 arima101cons Test 0.848 1.72 1.24 1.25 1.86 NaN NaN -0.634
The best one here was 0101 with and without a constant
hoar_cv <- hoown %>%
stretch_tsibble(.init = 2, .step = 1) %>%
relocate(Year, .id)
hoar_cv %>%
model(arima100=ARIMA(Rate~pdq(1,0,0)),
arima001=ARIMA(Rate~pdq(0,0,1)),
arima101=ARIMA(Rate~pdq(1,0,1)),
arima100cons=ARIMA(Rate~1+pdq(1,0,0)),
arima001cons=ARIMA(Rate~1+pdq(0,0,1)),
arima101cons=ARIMA(Rate~1+pdq(1,0,1)),
arima=ARIMA(Rate),
arima1=ARIMA(Rate,stepwise = FALSE)
) %>%
forecast(h = 1) %>%
accuracy(hoown)
## Warning in max(pdq$p): no non-missing arguments to max; returning -Inf
## Warning in max(pdq$q): no non-missing arguments to max; returning -Inf
## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## Warning in max(pdq$p): no non-missing arguments to max; returning -Inf
## Warning in max(pdq$q): no non-missing arguments to max; returning -Inf
## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## Warning in max(pdq$p): no non-missing arguments to max; returning -Inf
## Warning in max(pdq$q): no non-missing arguments to max; returning -Inf
## Warning in max(pdq$p): no non-missing arguments to max; returning -Inf
## Warning in max(pdq$q): no non-missing arguments to max; returning -Inf
## Warning: 1 error encountered for arima100
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## Warning: 8 errors (1 unique) encountered for arima001
## [8] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## Warning: 6 errors (1 unique) encountered for arima101
## [6] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## Warning: 1 error encountered for arima100cons
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## Warning: 1 error encountered for arima001cons
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## Warning: 1 error encountered for arima101cons
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022
## # A tibble: 8 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test 0.264 1.74 1.31 0.375 1.98 1.16 1.17 0.178
## 2 arima001 Test 0.497 2.43 2.20 0.605 3.26 1.95 1.63 0.587
## 3 arima001cons Test 0.522 2.26 2.00 0.673 2.99 1.77 1.52 0.484
## 4 arima1 Test 0.264 1.74 1.31 0.375 1.98 1.16 1.17 0.178
## 5 arima100 Test 0.304 1.65 1.28 0.417 1.91 1.13 1.11 0.170
## 6 arima100cons Test 0.304 1.65 1.28 0.417 1.91 1.13 1.11 0.170
## 7 arima101 Test 0.276 1.86 1.42 0.354 2.11 1.25 1.25 0.0898
## 8 arima101cons Test 0.402 1.81 1.38 0.558 2.07 1.23 1.22 0.102
The lowest one here with cross-validation was 100 with and without constant.
hofin_cv <- hoown %>%
stretch_tsibble(.init = 2, .step = 1) %>%
relocate(Year, .id)
hofin_cv %>%
model(
arima100cons=ARIMA(Rate~1+pdq(1,0,0)),
multno=ETS(Rate~error("M")+trend("N")+season("N")),
Naive = NAIVE(Rate)
) %>%
forecast(h = 1) %>%
accuracy(hoown)
## Warning in max(pdq$p): no non-missing arguments to max; returning -Inf
## Warning: 1 error encountered for arima100cons
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
## Warning: 2 errors (1 unique) encountered for multno
## [2] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima100cons Test 0.304 1.65 1.28 0.417 1.91 1.13 1.11 0.170
## 2 multno Test 0.0998 1.66 1.25 0.119 1.87 1.10 1.12 0.0928
## 3 Naive Test 0.0361 1.50 1.14 0.0315 1.71 1.01 1.01 -0.0101
Wow. The naive easily won here. I don’t really know what to say about that.
hofinfit <- hoown %>%
model(Naive=NAIVE(Rate))
hofinfc <- hofinfit %>%
forecast(h=12)
hofinfc %>%
autoplot(hoown)
gg_tsresiduals(hofinfit)
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
These look amazing.
hilo(hofinfc) %>%
select(-`80%`)
## # A tsibble: 12 x 5 [1Y]
## # Key: .model [1]
## .model Year Rate .mean `95%`
## <chr> <dbl> <dist> <dbl> <hilo>
## 1 Naive 2022 N(64, 2.2) 64 [61.08613, 66.91387]95
## 2 Naive 2023 N(64, 4.4) 64 [59.87916, 68.12084]95
## 3 Naive 2024 N(64, 6.6) 64 [58.95302, 69.04698]95
## 4 Naive 2025 N(64, 8.8) 64 [58.17225, 69.82775]95
## 5 Naive 2026 N(64, 11) 64 [57.48438, 70.51562]95
## 6 Naive 2027 N(64, 13) 64 [56.86250, 71.13750]95
## 7 Naive 2028 N(64, 15) 64 [56.29061, 71.70939]95
## 8 Naive 2029 N(64, 18) 64 [55.75832, 72.24168]95
## 9 Naive 2030 N(64, 20) 64 [55.25838, 72.74162]95
## 10 Naive 2031 N(64, 22) 64 [54.78552, 73.21448]95
## 11 Naive 2032 N(64, 24) 64 [54.33577, 73.66423]95
## 12 Naive 2033 N(64, 27) 64 [53.90604, 74.09396]95
These are the forecasted values.