library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.9 ✔ tsibbledata 0.4.0
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(readxl)
inc <- readxl::read_excel("C:\\Users\\PythonAcct\\Downloads\\MEHOINUSGAA672N.xls")
rinc <- inc %>%
mutate(Month=yearmonth(observation_date)) %>%
as_tsibble(index=Month) %>%
mutate(RINC=MEHOINUSGAA672N) %>%
select(-observation_date) %>%
select(-MEHOINUSGAA672N)
rinc %>%
autoplot(RINC)+labs(title="Median Real Household Income",x="Yearly",y="Median Real Household Income")
rinc <- rinc %>%
mutate(Year=year(Month)) %>%
as_tsibble(index=Year) %>%
select(-Month)
rinc
## # A tsibble: 38 x 2 [1Y]
## RINC Year
## <dbl> <dbl>
## 1 49773 1984
## 2 50685 1985
## 3 57673 1986
## 4 61117 1987
## 5 58660 1988
## 6 58268 1989
## 7 55568 1990
## 8 52938 1991
## 9 54662 1992
## 10 58624 1993
## # … with 28 more rows
## # ℹ Use `print(n = ...)` to see more rows
rinc_train <- rinc%>%
filter(Year<2015)
rinc_test <- rinc %>%
filter(Year>2014)
rincfit <- rinc_train %>%
model(
Mean = MEAN(RINC),
Naive = NAIVE(RINC),
Drift = RW(RINC ~ drift()),
tslm1=TSLM(RINC ~ trend(knots=c(1987,1991,2006,2009)))
)
accuracy(rincfit)
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mean Training -9.39e-13 4480. 3761. -0.586 6.42 1.46 1.45 0.699
## 2 Naive Training 2.33e+ 2 3093. 2573. 0.299 4.36 1 1 0.0619
## 3 Drift Training 1.70e-12 3084. 2584. -0.0968 4.38 1.00 0.997 0.0619
## 4 tslm1 Training 4.69e-13 2069. 1585. -0.120 2.67 0.616 0.669 0.344
My knotted linear regression model works the best.
rincfc <- rincfit %>%
forecast(h=7)
accuracy(rincfc,rinc_test)
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Test 3207. 3595. 3207. 5.20 5.20 NaN NaN -0.129
## 2 Mean Test 2002. 2657. 2244. 3.21 3.63 NaN NaN -0.0962
## 3 Naive Test 4139. 4493. 4139. 6.72 6.72 NaN NaN -0.0962
## 4 tslm1 Test 3402. 3766. 3402. 5.52 5.52 NaN NaN -0.120
The mean method does much better here.
rinc_cv <- rinc %>%
stretch_tsibble(.init = 5, .step = 1) %>%
relocate(Year, .id)
rinc_cv %>%
model(Mean = MEAN(RINC),
Naive = NAIVE(RINC),
Drift = RW(RINC ~ drift()),
tslm1=TSLM(RINC ~ trend()),
tslm2=TSLM(RINC ~ trend(knots=c(1987,1991,2006,2009)))) %>%
forecast(h = 1) %>%
accuracy(rinc)
## Warning: prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Test -570. 2997. 2568. -1.06 4.32 1.04 1.01 0.00512
## 2 Mean Test 1782. 4255. 3550. 2.61 5.80 1.44 1.43 0.721
## 3 Naive Test 86.0 2803. 2344. 0.0328 3.93 0.951 0.943 -0.0195
## 4 tslm1 Test -2318. 4769. 3783. -4.25 6.55 1.54 1.60 0.705
## 5 tslm2 Test -548. 2956. 2317. -0.911 3.90 0.941 0.994 0.292
Cool. The knotted regression was really good at predicting each new thing.
rinclinear <- rinc %>%
model(tslm2=TSLM(RINC ~ trend(knots=c(1987,1991,2006,2009))))
gg_tsresiduals(rinclinear)
Residuals aren’t too bad either.
Now I will compare the drift model to some of the more advanced modeling methods.
My prediction for the ETS is additive trend, possibly multiplicative errors, and no seasonality. I’ll still compare it to a few others.
rincetsfit <- rinc_train %>%
model(
dampedmult=ETS(RINC~error("M")+trend("Ad")+season("N")),
regmult=ETS(RINC~error("M")+trend("A")+season("N")),
ets=ETS(RINC)
)
report(rincetsfit)
## Warning in report.mdl_df(rincetsfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 3 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult 0.00345 -303. 618. 622. 627. 9654214. 18782143. 4.49e-2
## 2 regmult 0.00339 -304. 617. 620. 624. 9980985. 20034116. 4.53e-2
## 3 ets 9893844. -302. 610. 611. 614. 9255532. 19841027. 2.49e+3
The automatic ets was the best.
accuracy(rincetsfit)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Training -177. 3107. 2626. -0.443 4.49 1.02 1.00 0.0336
## 2 regmult Training -399. 3159. 2669. -0.819 4.57 1.04 1.02 0.0472
## 3 ets Training 225. 3042. 2490. 0.289 4.22 0.968 0.984 0.0614
Same here.
rincetsfc <- rincetsfit %>%
forecast(h=7)
accuracy(rincetsfc,rinc_test)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Test 3832. 4189. 3832. 6.22 6.22 NaN NaN -0.117
## 2 ets Test 4140. 4493. 4140. 6.72 6.72 NaN NaN -0.0962
## 3 regmult Test 2188. 2734. 2188. 3.53 3.53 NaN NaN -0.0395
The regmult model did the best here.
rincets_cv <- rinc %>%
stretch_tsibble(.init = 5, .step = 1) %>%
relocate(Year, .id)
rincets_cv %>%
model(dampedmult=ETS(RINC~error("M")+trend("Ad")+season("N")),
regmult=ETS(RINC~error("M")+trend("A")+season("N")),
ets=ETS(RINC)) %>%
forecast(h = 1) %>%
accuracy(rinc)
## Warning: 2 errors (1 unique) encountered for dampedmult
## [2] Not enough data to estimate this ETS model.
## Warning: 1 error encountered for regmult
## [1] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Test -61.1 3321. 2818. -0.265 4.72 1.14 1.12 0.101
## 2 ets Test 246. 3085. 2509. 0.279 4.19 1.02 1.04 0.0779
## 3 regmult Test -1012. 3962. 2883. -1.91 4.94 1.17 1.33 0.365
The ETS automatic had the best here.
rinc %>%
model(ets=ETS(RINC)) %>%
report()
## Series: RINC
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998993
##
## Initial states:
## l[0]
## 49779.56
##
## sigma^2: 9080787
##
## AIC AICc BIC
## 750.9972 751.7031 755.9100
rinc_train %>%
model(ets=ETS(RINC)) %>%
report()
## Series: RINC
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998993
##
## Initial states:
## l[0]
## 49779.56
##
## sigma^2: 9893844
##
## AIC AICc BIC
## 609.7163 610.6052 614.0183
Both select A,N,N as the best model. I guess this makes sense, since there isn’t a noticeable trend.
rinc %>%
gg_tsdisplay(difference(RINC),plot_type="partial")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
The first difference appears to be stationary.
rinc %>%
features(RINC,unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 0
Apparently no differences are needed.
rincarfit <- rinc_train %>%
model(
arima100=ARIMA(RINC~0+pdq(1,0,0)),
arima001=ARIMA(RINC~0+pdq(0,0,1)),
arima101=ARIMA(RINC~0+pdq(1,0,1)),
arima100cons=ARIMA(RINC~1+pdq(1,0,0)),
arima001cons=ARIMA(RINC~1+pdq(0,0,1)),
arima101cons=ARIMA(RINC~1+pdq(1,0,1)),
arima=ARIMA(RINC),
arima1=ARIMA(RINC,stepwise = FALSE)
)
## Warning: 1 error encountered for arima100
## [1] non-stationary AR part from CSS
## Warning: 1 error encountered for arima101
## [1] non-stationary AR part from CSS
report(rincarfit)
## Warning in report.mdl_df(rincarfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 6 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima001 934763826. -365. 735. 735. 738. <cpl [0]> <cpl [1]>
## 2 arima100cons 9201337. -292. 590. 591. 594. <cpl [1]> <cpl [0]>
## 3 arima001cons 11678737. -295. 597. 598. 601. <cpl [0]> <cpl [1]>
## 4 arima101cons 9165322. -291. 591. 592. 596. <cpl [1]> <cpl [1]>
## 5 arima 9201337. -292. 590. 591. 594. <cpl [1]> <cpl [0]>
## 6 arima1 9201337. -292. 590. 591. 594. <cpl [1]> <cpl [0]>
The automatic ARIMAs and my 100 with a constant have the same values. I’m guessing they are the same model.
accuracy(rincarfit)
## # A tibble: 8 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima100 Trai… NaN NaN NaN NaN NaN NaN NaN NA
## 2 arima001 Trai… 29777. 30077. 29777. 50.6 50.6 11.6 9.73 -0.538
## 3 arima101 Trai… NaN NaN NaN NaN NaN NaN NaN NA
## 4 arima100co… Trai… 302. 2934. 2457. 0.283 4.18 0.955 0.949 0.0960
## 5 arima001co… Trai… 78.3 3305. 2798. -0.252 4.78 1.09 1.07 0.260
## 6 arima101co… Trai… 231. 2877. 2386. 0.162 4.07 0.927 0.930 -0.0408
## 7 arima Trai… 302. 2934. 2457. 0.283 4.18 0.955 0.949 0.0960
## 8 arima1 Trai… 302. 2934. 2457. 0.283 4.18 0.955 0.949 0.0960
The 101 with constant was best here.
rincarfc <- rincarfit %>%
forecast(h=7)
accuracy(rincarfc,rinc_test)
## # A tibble: 8 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test 3524. 3894. 3524. 5.71 5.71 NaN NaN -0.128
## 2 arima001 Test 56974. 58003. 56974. 93.2 93.2 NaN NaN 0.00531
## 3 arima001cons Test 2065. 2743. 2350. 3.31 3.80 NaN NaN -0.0812
## 4 arima1 Test 3524. 3894. 3524. 5.71 5.71 NaN NaN -0.128
## 5 arima100 Test NaN NaN NaN NaN NaN NaN NaN NA
## 6 arima100cons Test 3524. 3894. 3524. 5.71 5.71 NaN NaN -0.128
## 7 arima101 Test NaN NaN NaN NaN NaN NaN NaN NA
## 8 arima101cons Test 2941. 3377. 2941. 4.76 4.76 NaN NaN -0.127
The best one here was 001 with constant
rincar_cv <- rinc %>%
stretch_tsibble(.init = 5, .step = 1) %>%
relocate(Year, .id)
rincar_cv %>%
model(
arima100=ARIMA(RINC~0+pdq(1,0,0)),
arima001=ARIMA(RINC~0+pdq(0,0,1)),
arima101=ARIMA(RINC~0+pdq(1,0,1)),
arima100cons=ARIMA(RINC~1+pdq(1,0,0)),
arima001cons=ARIMA(RINC~1+pdq(0,0,1)),
arima101cons=ARIMA(RINC~1+pdq(1,0,1)),
arima=ARIMA(RINC),
arima1=ARIMA(RINC,stepwise = FALSE)
) %>%
forecast(h = 1) %>%
accuracy(rinc)
## Warning in wrap_arima(y, order = c(p, d, q), seasonal = list(order = c(P, :
## possible convergence problem: optim gave code = 1
## Warning in wrap_arima(y, order = c(p, d, q), seasonal = list(order = c(P, :
## possible convergence problem: optim gave code = 1
## Warning in wrap_arima(y, order = c(p, d, q), seasonal = list(order = c(P, :
## possible convergence problem: optim gave code = 1
## Warning in wrap_arima(y, order = c(p, d, q), seasonal = list(order = c(P, :
## possible convergence problem: optim gave code = 1
## Warning in wrap_arima(y, order = c(p, d, q), seasonal = list(order = c(P, :
## possible convergence problem: optim gave code = 1
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning: 34 errors (1 unique) encountered for arima100
## [34] non-stationary AR part from CSS
## Warning: 26 errors (1 unique) encountered for arima101
## [26] non-stationary AR part from CSS
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022
## # A tibble: 8 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test 726. 3324. 2670. 1.05 4.41 1.08 1.12 0.262
## 2 arima001 Test 30814. 31046. 30814. 51.5 51.5 12.5 10.4 -0.503
## 3 arima001cons Test 1264. 3364. 2833. 1.85 4.65 1.15 1.13 0.234
## 4 arima1 Test 633. 3397. 2747. 0.881 4.54 1.12 1.14 0.156
## 5 arima100 Test NaN NaN NaN NaN NaN NaN NaN NA
## 6 arima100cons Test 763. 2836. 2334. 1.11 3.86 0.947 0.954 0.159
## 7 arima101 Test -369. 3269. 2976. -0.768 5.24 1.21 1.10 -0.364
## 8 arima101cons Test 792. 3072. 2508. 1.15 4.14 1.02 1.03 -0.0689
The lowest one here with cross-validation was 100 with constant.
rincfin_cv <- rinc %>%
stretch_tsibble(.init = 4, .step = 1) %>%
relocate(Year, .id)
rincfin_cv %>%
model(
arima100cons=ARIMA(RINC~1+pdq(1,0,0)),
ets1=ETS(RINC~error("A")+trend("N")+season("N")),
tslm2=TSLM(RINC ~ trend(knots=c(1987,1991,2006,2009))),
ets=ETS(RINC),
arima=ARIMA(RINC,stepwise = FALSE)) %>%
forecast(h = 1) %>%
accuracy(rinc)
## Warning: prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## prediction from a rank-deficient fit may be misleading
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test 727. 3411. 2780. 1.05 4.60 1.13 1.15 0.169
## 2 arima100cons Test 749. 2794. 2274. 1.10 3.76 0.923 0.940 0.158
## 3 ets Test 166. 3068. 2508. 0.147 4.19 1.02 1.03 0.0818
## 4 ets1 Test 166. 3068. 2508. 0.147 4.19 1.02 1.03 0.0817
## 5 tslm2 Test -721. 3112. 2437. -1.21 4.10 0.989 1.05 0.215
The ARIMA(1,0,0) with a constant is the best model
rincfinfit <- rinc %>%
model(arima100cons=ARIMA(RINC~1+pdq(1,0,0)))
rincfinfc <- rincfinfit %>%
forecast(h=12)
rincfinfc %>%
autoplot(rinc)
gg_tsresiduals(rincfinfit)
These look weird asf
hilo(rincfinfc) %>%
select(-`80%`)
## # A tsibble: 12 x 5 [1Y]
## # Key: .model [1]
## .model Year RINC .mean `95%`
## <chr> <dbl> <dist> <dbl> <hilo>
## 1 arima100cons 2022 N(60863, 8419070) 60863. [55176.40, 66550.32]95
## 2 arima100cons 2023 N(60374, 1.3e+07) 60374. [53191.55, 67557.38]95
## 3 arima100cons 2024 N(59997, 1.6e+07) 59997. [52056.53, 67938.00]95
## 4 arima100cons 2025 N(59706, 1.8e+07) 59706. [51346.92, 68065.53]95
## 5 arima100cons 2026 N(59482, 1.9e+07) 59482. [50882.87, 68080.49]95
## 6 arima100cons 2027 N(59308, 2e+07) 59308. [50570.16, 68046.70]95
## 7 arima100cons 2028 N(59175, 2e+07) 59175. [50354.52, 67995.00]95
## 8 arima100cons 2029 N(59072, 2e+07) 59072. [50202.95, 67940.30]95
## 9 arima100cons 2030 N(58992, 2.1e+07) 58992. [50094.66, 67889.43]95
## 10 arima100cons 2031 N(58931, 2.1e+07) 58931. [50016.22, 67845.09]95
## 11 arima100cons 2032 N(58883, 2.1e+07) 58883. [49958.72, 67807.85]95
## 12 arima100cons 2033 N(58847, 2.1e+07) 58847. [49916.14, 67777.33]95
These are the forecasted values.