Arima

library(tidyverse)
library(fpp3)
library(patchwork)
china <- tidyquant::tq_get("MKTGDPCNA646NWDB",
                           get = "economic.data",
                           from = "1960-01-01")
china
# A tibble: 62 × 3
   symbol           date              price
   <chr>            <date>            <dbl>
 1 MKTGDPCNA646NWDB 1960-01-01 59716467625.
 2 MKTGDPCNA646NWDB 1961-01-01 50056868958.
 3 MKTGDPCNA646NWDB 1962-01-01 47209359006.
 4 MKTGDPCNA646NWDB 1963-01-01 50706799903.
 5 MKTGDPCNA646NWDB 1964-01-01 59708343489.
 6 MKTGDPCNA646NWDB 1965-01-01 70436266147.
 7 MKTGDPCNA646NWDB 1966-01-01 76720285970.
 8 MKTGDPCNA646NWDB 1967-01-01 72881631327.
 9 MKTGDPCNA646NWDB 1968-01-01 70846535056.
10 MKTGDPCNA646NWDB 1969-01-01 79705906247.
# … with 52 more rows
china_tsbl <- china |> 
  mutate(date = year(date)) |> 
  as_tsibble(index = date, key = symbol)

china_tsbl |> 
  autoplot(price)

china_tsbl |> 
  features(price, unitroot_ndiffs)
# A tibble: 1 × 2
  symbol           ndiffs
  <chr>             <int>
1 MKTGDPCNA646NWDB      2
china_tsbl |> 
  autoplot(log(price) |> difference(lag = 1, differences = 2))

china_tsbl |> 
  autoplot(log(price) |> difference(1) |> difference(1))

# difference(lag = 1, differences = 2) es equivalente a hacer 
# difference(lag = 1) |> difference(lag = 1)
china_tsbl |> 
  gg_tsdisplay(price, 
               plot_type = "partial")

china_tsbl |> 
  gg_tsdisplay(log(price) |> difference(1, differences = 2), 
               plot_type = "partial")

china_fit <- china_tsbl |> 
  model(
    arima_121  = ARIMA(price ~ pdq(1,2,1) + PDQ(0,0,0)),
    arima_220  = ARIMA(price ~ pdq(2,2,0) + PDQ(0,0,0)),
    arima_022  = ARIMA(price ~ pdq(0,2,2) + PDQ(0,0,0)),
    auto_arima = ARIMA(price ~ PDQ(0,0,0), stepwise = FALSE, 
                       approximation = FALSE),
    arima_semiauto = ARIMA(price ~ pdq(0:1,2,0:3) + PDQ(0,0,0))
  )

china_fit
# A mable: 1 x 6
# Key:     symbol [1]
  symbol                arima_121    arima_220      arima_022     auto_arima
  <chr>                   <model>      <model>        <model>        <model>
1 MKTGDPCNA646NWDB <ARIMA(1,2,1)> <NULL model> <ARIMA(0,2,2)> <ARIMA(0,2,2)>
# … with 1 more variable: arima_semiauto <model>
china_fit |> 
  select(auto_arima) |> 
  report()
Series: price 
Model: ARIMA(0,2,2) 

Coefficients:
         ma1      ma2
      0.1928  -0.7670
s.e.  0.1429   0.1231

sigma^2 estimated as 1.221e+23:  log likelihood=-1680.32
AIC=3366.65   AICc=3367.08   BIC=3372.93
china_fit |> 
  select(arima_022) |> 
  report()
Series: price 
Model: ARIMA(0,2,2) 

Coefficients:
         ma1      ma2
      0.1928  -0.7670
s.e.  0.1429   0.1231

sigma^2 estimated as 1.221e+23:  log likelihood=-1680.32
AIC=3366.65   AICc=3367.08   BIC=3372.93
china_fit |>
  select(arima_semiauto) |> 
  report()
Series: price 
Model: ARIMA(0,2,2) 

Coefficients:
         ma1      ma2
      0.1928  -0.7670
s.e.  0.1429   0.1231

sigma^2 estimated as 1.221e+23:  log likelihood=-1680.32
AIC=3366.65   AICc=3367.08   BIC=3372.93
accuracy(china_fit) |> 
  arrange(MASE) |> 
  select(symbol:.type, MASE, MAPE, RMSE, MAE)
# A tibble: 5 × 7
  symbol           .model         .type       MASE   MAPE         RMSE       MAE
  <chr>            <chr>          <chr>      <dbl>  <dbl>        <dbl>     <dbl>
1 MKTGDPCNA646NWDB arima_022      Training   0.479  12.7       3.38e11   1.40e11
2 MKTGDPCNA646NWDB auto_arima     Training   0.479  12.7       3.38e11   1.40e11
3 MKTGDPCNA646NWDB arima_semiauto Training   0.479  12.7       3.38e11   1.40e11
4 MKTGDPCNA646NWDB arima_121      Training   0.539   7.21      3.81e11   1.58e11
5 MKTGDPCNA646NWDB arima_220      Training NaN     NaN       NaN       NaN      
china_fit |> 
  glance() |> 
  arrange(AICc)
# A tibble: 4 × 9
  symbol           .model       sigma2 log_lik   AIC  AICc   BIC ar_ro…¹ ma_ro…²
  <chr>            <chr>         <dbl>   <dbl> <dbl> <dbl> <dbl> <list>  <list> 
1 MKTGDPCNA646NWDB arima_022   1.22e23  -1680. 3367. 3367. 3373. <cpl>   <cpl>  
2 MKTGDPCNA646NWDB auto_arima  1.22e23  -1680. 3367. 3367. 3373. <cpl>   <cpl>  
3 MKTGDPCNA646NWDB arima_semi… 1.22e23  -1680. 3367. 3367. 3373. <cpl>   <cpl>  
4 MKTGDPCNA646NWDB arima_121   1.55e23  -1686. 3379. 3379. 3385. <cpl>   <cpl>  
# … with abbreviated variable names ¹​ar_roots, ²​ma_roots
china_fit |> 
  select(arima_121) |> 
  gg_tsresiduals()

china_fit |> 
  select(arima_022) |> 
  gg_tsresiduals()

china_fit |> 
  augment() |> 
  features(.innov, ljung_box, dof = 2, lag = 10)
# A tibble: 5 × 4
  symbol           .model         lb_stat lb_pvalue
  <chr>            <chr>            <dbl>     <dbl>
1 MKTGDPCNA646NWDB arima_022         13.1   0.108  
2 MKTGDPCNA646NWDB arima_121         21.4   0.00622
3 MKTGDPCNA646NWDB arima_220         NA    NA      
4 MKTGDPCNA646NWDB arima_semiauto    13.1   0.108  
5 MKTGDPCNA646NWDB auto_arima        13.1   0.108  
google_stock <- gafa_stock %>%
  filter(Symbol == "GOOG") %>%
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE) %>% 
  mutate(diff_close = difference(Close))

google_2015 <- google_stock %>% filter(year(Date) == 2015)

google_2015 |> 
  autoplot(Close)

google_2015 |> 
  features(Close,unitroot_ndiffs)
# A tibble: 1 × 2
  Symbol ndiffs
  <chr>   <int>
1 GOOG        1
google_2015 |>
  gg_tsdisplay(Close |> difference(),plot_type = "partial")

elec_equip <- as_tsibble(fpp2::elecequip)

elec_dcmp <- elec_equip %>%
  model(STL(value ~ season(window="periodic"))) %>%
  components() %>%
  select(-.model) %>%
  as_tsibble()
elec_dcmp %>%
  autoplot(season_adjust)

elec_dcmp |> 
  features(season_adjust, unitroot_ndiffs)
# A tibble: 1 × 1
  ndiffs
   <int>
1      1
elec_dcmp |> 
  autoplot(season_adjust |> difference())

elec_dcmp |> 
  gg_tsdisplay(season_adjust |> difference(), plot_type = "partial")

elec_fit <- elec_dcmp |> 
  model(
    arima_311  = ARIMA(season_adjust ~ pdq(3,1,1) + PDQ(0,0,0)),
    arima_312  = ARIMA(season_adjust ~ pdq(3,1,2) + PDQ(0,0,0)),
    auto_arima = ARIMA(season_adjust ~ pdq(d = 1) + PDQ(0,0,0))
  )

elec_fit |> 
  select(auto_arima) |> 
  report()
Series: season_adjust 
Model: ARIMA(3,1,0) 

Coefficients:
          ar1      ar2     ar3
      -0.3418  -0.0426  0.3185
s.e.   0.0681   0.0725  0.0682

sigma^2 estimated as 9.639:  log likelihood=-493.8
AIC=995.6   AICc=995.81   BIC=1008.67
elec_fit |> 
  glance() |> 
  arrange(AICc)
# A tibble: 3 × 8
  .model     sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
  <chr>       <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
1 arima_311    9.58   -493.  995.  996. 1012. <cpl [3]> <cpl [1]>
2 auto_arima   9.64   -494.  996.  996. 1009. <cpl [3]> <cpl [0]>
3 arima_312    9.61   -493.  997.  997. 1017. <cpl [3]> <cpl [2]>
accuracy(elec_fit) |> 
  select(.model, .type, RMSE, MAE, MAPE, MASE)
# A tibble: 3 × 6
  .model     .type     RMSE   MAE  MAPE  MASE
  <chr>      <chr>    <dbl> <dbl> <dbl> <dbl>
1 arima_311  Training  3.05  2.36  2.48 0.288
2 arima_312  Training  3.05  2.36  2.48 0.288
3 auto_arima Training  3.07  2.39  2.51 0.292
p <- elec_dcmp |> 
  autoplot(season_adjust) +
  autolayer(elec_fit |> augment(), .fitted) +
  theme(legend.position = "none")

plotly::ggplotly(p)

1 SARIMA

eu_retail <- as_tsibble(fpp2::euretail)
eu_retail %>% autoplot(value) + ylab("Retail index")

eu_retail |> 
  gg_season(value)

eu_retail |> 
  model(stl = STL(value, robust = TRUE)) |> 
  components() |> 
  autoplot()

eu_retail |> 
  gg_tsdisplay(value, plot_type = "partial")

eu_retail |> 
  features(value, unitroot_nsdiffs)
# A tibble: 1 × 1
  nsdiffs
    <int>
1       1
eu_retail |> 
  features(value |> difference(lag = 4), unitroot_ndiffs)
# A tibble: 1 × 1
  ndiffs
   <int>
1      1

\(ARIMA(p,1,q) (P,1,Q)_4\)

eu_retail |> 
  autoplot(value |> difference(4))

eu_retail |> 
  autoplot(value |> difference(4) |> difference())

eu_retail |> 
  gg_tsdisplay(value |> difference(4) |> difference(), plot_type = "partial")

\(ARIMA(1,1,1) (1,1,1)_4\)

eu_fit <- eu_retail |> 
  model(
    sarima_111_111 = ARIMA(value ~ pdq(1,1,1) + PDQ(1,1,1))
  )

eu_fit |> 
  report()
Series: value 
Model: ARIMA(1,1,1)(1,1,1)[4] 

Coefficients:
         ar1      ma1     sar1     sma1
      0.8836  -0.5202  -0.0286  -0.9389
s.e.  0.1376   0.1715   0.1630   0.3874

sigma^2 estimated as 0.1549:  log likelihood=-30.1
AIC=70.2   AICc=71.33   BIC=80.59
eu_fit |> 
  gg_tsresiduals()

eu_fit <- eu_retail |> 
  model(
    sarima_111_111 = ARIMA(value ~ pdq(1,1,1) + PDQ(1,1,1)),
    auto_arima = ARIMA(value)
  )

eu_fit |> 
  select(auto_arima) |> 
  report()
Series: value 
Model: ARIMA(0,1,3)(0,1,1)[4] 

Coefficients:
         ma1     ma2     ma3     sma1
      0.2630  0.3694  0.4200  -0.6636
s.e.  0.1237  0.1255  0.1294   0.1545

sigma^2 estimated as 0.156:  log likelihood=-28.63
AIC=67.26   AICc=68.39   BIC=77.65
eu_fit |> 
  select(auto_arima) |>
  gg_tsresiduals()                             

eu_fit |> 
  glance() |> 
  arrange(AICc) |> 
  relocate(.model,AICc)
# A tibble: 2 × 8
  .model          AICc sigma2 log_lik   AIC   BIC ar_roots  ma_roots 
  <chr>          <dbl>  <dbl>   <dbl> <dbl> <dbl> <list>    <list>   
1 auto_arima      68.4  0.156   -28.6  67.3  77.6 <cpl [0]> <cpl [7]>
2 sarima_111_111  71.3  0.155   -30.1  70.2  80.6 <cpl [5]> <cpl [5]>

0.000000000000

accuracy(eu_fit) |> 
  arrange(MAE)
# A tibble: 2 × 10
  .model         .type         ME  RMSE   MAE     MPE  MAPE  MASE RMSSE     ACF1
  <chr>          <chr>      <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>    <dbl>
1 sarima_111_111 Training -0.0260 0.365 0.275 -0.0243 0.285 0.224 0.248 -0.0406 
2 auto_arima     Training -0.0297 0.366 0.279 -0.0280 0.289 0.227 0.249  0.00646

2 Venta de medicamentos

h02 <- PBS %>%
  filter(ATC2 == "H02") %>%
  summarise(Cost = sum(Cost)/1e6)

h02 %>%
  mutate(log(Cost)) %>%
  gather() %>%
  ggplot(aes(x = Month, y = value)) +
  geom_line() +
  facet_grid(key ~ ., scales = "free_y") +
  xlab("Year") + ylab("") +
  ggtitle("Cortecosteroid drug scripts (H02)")

p1 <- h02 |> 
  autoplot(log(Cost) |> difference(12))

p2 <- h02 |> 
  autoplot(log(Cost) |> difference(1))

p3 <- h02 |> 
  autoplot(log(Cost) |> difference(12) |> difference())

p1 / p2 / p3

h02 %>% 
  gg_tsdisplay(difference(log(Cost), 12), 
               plot_type='partial', lag_max = 48)

Diferenciación no estacional:

\[ y_t' = y_t - y_{t-1} \]

# var |> difference(12) |> difference(1)

\[ ARIMA(3,0,9)(2,1,0)_{12} \]

h02 |> 
  gg_tsdisplay(log(Cost) |> difference(12), plot_type = "partial", lag_max = 48)

h02 |> 
  gg_tsdisplay(log(Cost) |> difference(12) |> difference(), plot_type = "partial", lag_max = 48)

\[ ARIMA(2,1,1:2)(1:2,1,2:3)_{12} \]