Diberikan data yang berisi pekerja di US dari 1939 Januari sampai 2019 September. Perkirakan jumlah pekerja sampai 3 tahun berikutnya!
#install.packages("fpp3")
library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.4 v tsibble 1.0.1
## v dplyr 1.0.7 v tsibbledata 0.3.0
## v tidyr 1.1.3 v feasts 0.2.2
## v lubridate 1.7.10 v fable 0.3.1
## v ggplot2 3.3.5
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
data("us_employment")
us_employment
## # A tsibble: 143,412 x 4 [1M]
## # Key: Series_ID [148]
## Month Series_ID Title Employed
## <mth> <chr> <chr> <dbl>
## 1 1939 Jan CEU0500000001 Total Private 25338
## 2 1939 Feb CEU0500000001 Total Private 25447
## 3 1939 Mar CEU0500000001 Total Private 25833
## 4 1939 Apr CEU0500000001 Total Private 25801
## 5 1939 May CEU0500000001 Total Private 26113
## 6 1939 Jun CEU0500000001 Total Private 26485
## 7 1939 Jul CEU0500000001 Total Private 26481
## 8 1939 Aug CEU0500000001 Total Private 26848
## 9 1939 Sep CEU0500000001 Total Private 27468
## 10 1939 Oct CEU0500000001 Total Private 27830
## # ... with 143,402 more rows
leisure <- us_employment %>%
filter(Title == "Leisure and Hospitality",
year(Month) > 2000) %>%
mutate(Employed = Employed/1000) %>%
select(Month, Employed)
autoplot(leisure, Employed) +
labs(title = "US employment: leisure and hospitality",
y="Number of people (millions)")
leisure %>%
gg_tsdisplay(difference(Employed, 12),
plot_type='partial', lag=36) +
labs(title="Seasonally differenced", y="")
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
leisure %>%
gg_tsdisplay(difference(Employed, 12) %>% difference(),
plot_type='partial', lag=36) +
labs(title = "Double differenced", y="")
## Warning: Removed 13 row(s) containing missing values (geom_path).
## Warning: Removed 13 rows containing missing values (geom_point).
\[ \text{ARIMA}(p,d,q)(P,D,Q)m\] \[ \text{ARIMA}(p,1,q)(P,1,Q)m\]
fit <- leisure %>%
model(
arima012011 = ARIMA(Employed ~ pdq(0,1,2) + PDQ(0,1,1)),
arima210011 = ARIMA(Employed ~ pdq(2,1,0) + PDQ(0,1,1)),
auto = ARIMA(Employed, stepwise = FALSE, approx = FALSE)
)
fit %>% pivot_longer(everything(), names_to = "Model name",
values_to = "Orders")
## # A mable: 3 x 2
## # Key: Model name [3]
## `Model name` Orders
## <chr> <model>
## 1 arima012011 <ARIMA(0,1,2)(0,1,1)[12]>
## 2 arima210011 <ARIMA(2,1,0)(0,1,1)[12]>
## 3 auto <ARIMA(2,1,0)(1,1,1)[12]>
glance(fit) %>% arrange(AICc) %>% select(.model:BIC)
## # A tibble: 3 x 6
## .model sigma2 log_lik AIC AICc BIC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto 0.00142 395. -780. -780. -763.
## 2 arima210011 0.00145 392. -776. -776. -763.
## 3 arima012011 0.00146 391. -775. -775. -761.
fit %>% select(auto) %>% gg_tsresiduals(lag=36)
augment(fit) %>% features(.innov, ljung_box, lag=24, dof=4)
## # A tibble: 3 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 arima012011 22.4 0.320
## 2 arima210011 18.9 0.527
## 3 auto 16.6 0.680
forecast(fit, h=36) %>%
filter(.model=='auto') %>%
autoplot(leisure) +
labs(title = "US employment: leisure and hospitality",
y="Number of people (millions)")