X-12-ARIMA.knit

Diberikan data yang berisi pekerja di US dari 1939 Januari sampai 2019 September. Perkirakan jumlah pekerja sampai 3 tahun berikutnya!

#install.packages("fpp3")
library(fpp3)

## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --

## v tibble      3.1.4      v tsibble     1.0.1 
## v dplyr       1.0.7      v tsibbledata 0.3.0 
## v tidyr       1.1.3      v feasts      0.2.2 
## v lubridate   1.7.10     v fable       0.3.1 
## v ggplot2     3.3.5

## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()    masks base::date()
## x dplyr::filter()      masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval()  masks lubridate::interval()
## x dplyr::lag()         masks stats::lag()
## x tsibble::setdiff()   masks base::setdiff()
## x tsibble::union()     masks base::union()

data("us_employment")
us_employment

## # A tsibble: 143,412 x 4 [1M]
## # Key:       Series_ID [148]
##       Month Series_ID     Title         Employed
##       <mth> <chr>         <chr>            <dbl>
##  1 1939 Jan CEU0500000001 Total Private    25338
##  2 1939 Feb CEU0500000001 Total Private    25447
##  3 1939 Mar CEU0500000001 Total Private    25833
##  4 1939 Apr CEU0500000001 Total Private    25801
##  5 1939 May CEU0500000001 Total Private    26113
##  6 1939 Jun CEU0500000001 Total Private    26485
##  7 1939 Jul CEU0500000001 Total Private    26481
##  8 1939 Aug CEU0500000001 Total Private    26848
##  9 1939 Sep CEU0500000001 Total Private    27468
## 10 1939 Oct CEU0500000001 Total Private    27830
## # ... with 143,402 more rows

leisure <- us_employment %>%
  filter(Title == "Leisure and Hospitality",
         year(Month) > 2000) %>%
  mutate(Employed = Employed/1000) %>%
  select(Month, Employed)
autoplot(leisure, Employed) +
  labs(title = "US employment: leisure and hospitality",
       y="Number of people (millions)")

leisure %>%
  gg_tsdisplay(difference(Employed, 12),
               plot_type='partial', lag=36) +
  labs(title="Seasonally differenced", y="")

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

leisure %>%
  gg_tsdisplay(difference(Employed, 12) %>% difference(),
               plot_type='partial', lag=36) +
  labs(title = "Double differenced", y="")

## Warning: Removed 13 row(s) containing missing values (geom_path).

## Warning: Removed 13 rows containing missing values (geom_point).

\[ \text{ARIMA}(p,d,q)(P,D,Q)m\] \[ \text{ARIMA}(p,1,q)(P,1,Q)m\]

fit <- leisure %>%
  model(
    arima012011 = ARIMA(Employed ~ pdq(0,1,2) + PDQ(0,1,1)),
    arima210011 = ARIMA(Employed ~ pdq(2,1,0) + PDQ(0,1,1)),
    auto = ARIMA(Employed, stepwise = FALSE, approx = FALSE)
  )
fit %>% pivot_longer(everything(), names_to = "Model name",
                     values_to = "Orders")

## # A mable: 3 x 2
## # Key:     Model name [3]
##   `Model name`                    Orders
##   <chr>                          <model>
## 1 arima012011  <ARIMA(0,1,2)(0,1,1)[12]>
## 2 arima210011  <ARIMA(2,1,0)(0,1,1)[12]>
## 3 auto         <ARIMA(2,1,0)(1,1,1)[12]>

glance(fit) %>% arrange(AICc) %>% select(.model:BIC)

## # A tibble: 3 x 6
##   .model       sigma2 log_lik   AIC  AICc   BIC
##   <chr>         <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 auto        0.00142    395. -780. -780. -763.
## 2 arima210011 0.00145    392. -776. -776. -763.
## 3 arima012011 0.00146    391. -775. -775. -761.

fit %>% select(auto) %>% gg_tsresiduals(lag=36)

augment(fit) %>% features(.innov, ljung_box, lag=24, dof=4)

## # A tibble: 3 x 3
##   .model      lb_stat lb_pvalue
##   <chr>         <dbl>     <dbl>
## 1 arima012011    22.4     0.320
## 2 arima210011    18.9     0.527
## 3 auto           16.6     0.680

forecast(fit, h=36) %>%
  filter(.model=='auto') %>%
  autoplot(leisure) +
  labs(title = "US employment: leisure and hospitality",
       y="Number of people (millions)")