library(tidyverse)
library(fpp3)
library(patchwork)Arima
china <- tidyquant::tq_get("MKTGDPCNA646NWDB",
get = "economic.data",
from = "1960-01-01")
china# A tibble: 62 × 3
symbol date price
<chr> <date> <dbl>
1 MKTGDPCNA646NWDB 1960-01-01 59716467625.
2 MKTGDPCNA646NWDB 1961-01-01 50056868958.
3 MKTGDPCNA646NWDB 1962-01-01 47209359006.
4 MKTGDPCNA646NWDB 1963-01-01 50706799903.
5 MKTGDPCNA646NWDB 1964-01-01 59708343489.
6 MKTGDPCNA646NWDB 1965-01-01 70436266147.
7 MKTGDPCNA646NWDB 1966-01-01 76720285970.
8 MKTGDPCNA646NWDB 1967-01-01 72881631327.
9 MKTGDPCNA646NWDB 1968-01-01 70846535056.
10 MKTGDPCNA646NWDB 1969-01-01 79705906247.
# … with 52 more rows
china_tsbl <- china |>
mutate(date = year(date)) |>
as_tsibble(index = date, key = symbol)
china_tsbl |>
autoplot(price)china_tsbl |>
features(price, unitroot_ndiffs)# A tibble: 1 × 2
symbol ndiffs
<chr> <int>
1 MKTGDPCNA646NWDB 2
china_tsbl |>
autoplot(log(price) |> difference(lag = 1, differences = 2))china_tsbl |>
autoplot(log(price) |> difference(1) |> difference(1))# difference(lag = 1, differences = 2) es equivalente a hacer
# difference(lag = 1) |> difference(lag = 1)china_tsbl |>
gg_tsdisplay(price,
plot_type = "partial")china_tsbl |>
gg_tsdisplay(log(price) |> difference(1, differences = 2),
plot_type = "partial")china_fit <- china_tsbl |>
model(
arima_121 = ARIMA(price ~ pdq(1,2,1) + PDQ(0,0,0)),
arima_220 = ARIMA(price ~ pdq(2,2,0) + PDQ(0,0,0)),
arima_022 = ARIMA(price ~ pdq(0,2,2) + PDQ(0,0,0)),
auto_arima = ARIMA(price ~ PDQ(0,0,0), stepwise = FALSE,
approximation = FALSE),
arima_semiauto = ARIMA(price ~ pdq(0:1,2,0:3) + PDQ(0,0,0))
)
china_fit# A mable: 1 x 6
# Key: symbol [1]
symbol arima_121 arima_220 arima_022 auto_arima
<chr> <model> <model> <model> <model>
1 MKTGDPCNA646NWDB <ARIMA(1,2,1)> <NULL model> <ARIMA(0,2,2)> <ARIMA(0,2,2)>
# … with 1 more variable: arima_semiauto <model>
china_fit |>
select(auto_arima) |>
report()Series: price
Model: ARIMA(0,2,2)
Coefficients:
ma1 ma2
0.1928 -0.7670
s.e. 0.1429 0.1231
sigma^2 estimated as 1.221e+23: log likelihood=-1680.32
AIC=3366.65 AICc=3367.08 BIC=3372.93
china_fit |>
select(arima_022) |>
report()Series: price
Model: ARIMA(0,2,2)
Coefficients:
ma1 ma2
0.1928 -0.7670
s.e. 0.1429 0.1231
sigma^2 estimated as 1.221e+23: log likelihood=-1680.32
AIC=3366.65 AICc=3367.08 BIC=3372.93
china_fit |>
select(arima_semiauto) |>
report()Series: price
Model: ARIMA(0,2,2)
Coefficients:
ma1 ma2
0.1928 -0.7670
s.e. 0.1429 0.1231
sigma^2 estimated as 1.221e+23: log likelihood=-1680.32
AIC=3366.65 AICc=3367.08 BIC=3372.93
accuracy(china_fit) |>
arrange(MASE) |>
select(symbol:.type, MASE, MAPE, RMSE, MAE)# A tibble: 5 × 7
symbol .model .type MASE MAPE RMSE MAE
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 MKTGDPCNA646NWDB arima_022 Training 0.479 12.7 3.38e11 1.40e11
2 MKTGDPCNA646NWDB auto_arima Training 0.479 12.7 3.38e11 1.40e11
3 MKTGDPCNA646NWDB arima_semiauto Training 0.479 12.7 3.38e11 1.40e11
4 MKTGDPCNA646NWDB arima_121 Training 0.539 7.21 3.81e11 1.58e11
5 MKTGDPCNA646NWDB arima_220 Training NaN NaN NaN NaN
china_fit |>
glance() |>
arrange(AICc)# A tibble: 4 × 9
symbol .model sigma2 log_lik AIC AICc BIC ar_ro…¹ ma_ro…²
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 MKTGDPCNA646NWDB arima_022 1.22e23 -1680. 3367. 3367. 3373. <cpl> <cpl>
2 MKTGDPCNA646NWDB auto_arima 1.22e23 -1680. 3367. 3367. 3373. <cpl> <cpl>
3 MKTGDPCNA646NWDB arima_semi… 1.22e23 -1680. 3367. 3367. 3373. <cpl> <cpl>
4 MKTGDPCNA646NWDB arima_121 1.55e23 -1686. 3379. 3379. 3385. <cpl> <cpl>
# … with abbreviated variable names ¹ar_roots, ²ma_roots
china_fit |>
select(arima_121) |>
gg_tsresiduals()china_fit |>
select(arima_022) |>
gg_tsresiduals()china_fit |>
augment() |>
features(.innov, ljung_box, dof = 2, lag = 10)# A tibble: 5 × 4
symbol .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 MKTGDPCNA646NWDB arima_022 13.1 0.108
2 MKTGDPCNA646NWDB arima_121 21.4 0.00622
3 MKTGDPCNA646NWDB arima_220 NA NA
4 MKTGDPCNA646NWDB arima_semiauto 13.1 0.108
5 MKTGDPCNA646NWDB auto_arima 13.1 0.108
google_stock <- gafa_stock %>%
filter(Symbol == "GOOG") %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE) %>%
mutate(diff_close = difference(Close))
google_2015 <- google_stock %>% filter(year(Date) == 2015)
google_2015 |>
autoplot(Close)google_2015 |>
features(Close,unitroot_ndiffs)# A tibble: 1 × 2
Symbol ndiffs
<chr> <int>
1 GOOG 1
google_2015 |>
gg_tsdisplay(Close |> difference(),plot_type = "partial")elec_equip <- as_tsibble(fpp2::elecequip)
elec_dcmp <- elec_equip %>%
model(STL(value ~ season(window="periodic"))) %>%
components() %>%
select(-.model) %>%
as_tsibble()
elec_dcmp %>%
autoplot(season_adjust)elec_dcmp |>
features(season_adjust, unitroot_ndiffs)# A tibble: 1 × 1
ndiffs
<int>
1 1
elec_dcmp |>
autoplot(season_adjust |> difference())elec_dcmp |>
gg_tsdisplay(season_adjust |> difference(), plot_type = "partial")elec_fit <- elec_dcmp |>
model(
arima_311 = ARIMA(season_adjust ~ pdq(3,1,1) + PDQ(0,0,0)),
arima_312 = ARIMA(season_adjust ~ pdq(3,1,2) + PDQ(0,0,0)),
auto_arima = ARIMA(season_adjust ~ pdq(d = 1) + PDQ(0,0,0))
)
elec_fit |>
select(auto_arima) |>
report()Series: season_adjust
Model: ARIMA(3,1,0)
Coefficients:
ar1 ar2 ar3
-0.3418 -0.0426 0.3185
s.e. 0.0681 0.0725 0.0682
sigma^2 estimated as 9.639: log likelihood=-493.8
AIC=995.6 AICc=995.81 BIC=1008.67
elec_fit |>
glance() |>
arrange(AICc)# A tibble: 3 × 8
.model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 arima_311 9.58 -493. 995. 996. 1012. <cpl [3]> <cpl [1]>
2 auto_arima 9.64 -494. 996. 996. 1009. <cpl [3]> <cpl [0]>
3 arima_312 9.61 -493. 997. 997. 1017. <cpl [3]> <cpl [2]>
accuracy(elec_fit) |>
select(.model, .type, RMSE, MAE, MAPE, MASE)# A tibble: 3 × 6
.model .type RMSE MAE MAPE MASE
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 arima_311 Training 3.05 2.36 2.48 0.288
2 arima_312 Training 3.05 2.36 2.48 0.288
3 auto_arima Training 3.07 2.39 2.51 0.292
p <- elec_dcmp |>
autoplot(season_adjust) +
autolayer(elec_fit |> augment(), .fitted) +
theme(legend.position = "none")
plotly::ggplotly(p)1 SARIMA
eu_retail <- as_tsibble(fpp2::euretail)
eu_retail %>% autoplot(value) + ylab("Retail index")eu_retail |>
gg_season(value)eu_retail |>
model(stl = STL(value, robust = TRUE)) |>
components() |>
autoplot()eu_retail |>
gg_tsdisplay(value, plot_type = "partial")eu_retail |>
features(value, unitroot_nsdiffs)# A tibble: 1 × 1
nsdiffs
<int>
1 1
eu_retail |>
features(value |> difference(lag = 4), unitroot_ndiffs)# A tibble: 1 × 1
ndiffs
<int>
1 1
\(ARIMA(p,1,q) (P,1,Q)_4\)
eu_retail |>
autoplot(value |> difference(4))eu_retail |>
autoplot(value |> difference(4) |> difference())eu_retail |>
gg_tsdisplay(value |> difference(4) |> difference(), plot_type = "partial")\(ARIMA(1,1,1) (1,1,1)_4\)
eu_fit <- eu_retail |>
model(
sarima_111_111 = ARIMA(value ~ pdq(1,1,1) + PDQ(1,1,1))
)
eu_fit |>
report()Series: value
Model: ARIMA(1,1,1)(1,1,1)[4]
Coefficients:
ar1 ma1 sar1 sma1
0.8836 -0.5202 -0.0286 -0.9389
s.e. 0.1376 0.1715 0.1630 0.3874
sigma^2 estimated as 0.1549: log likelihood=-30.1
AIC=70.2 AICc=71.33 BIC=80.59
eu_fit |>
gg_tsresiduals()eu_fit <- eu_retail |>
model(
sarima_111_111 = ARIMA(value ~ pdq(1,1,1) + PDQ(1,1,1)),
auto_arima = ARIMA(value)
)
eu_fit |>
select(auto_arima) |>
report()Series: value
Model: ARIMA(0,1,3)(0,1,1)[4]
Coefficients:
ma1 ma2 ma3 sma1
0.2630 0.3694 0.4200 -0.6636
s.e. 0.1237 0.1255 0.1294 0.1545
sigma^2 estimated as 0.156: log likelihood=-28.63
AIC=67.26 AICc=68.39 BIC=77.65
eu_fit |>
select(auto_arima) |>
gg_tsresiduals() eu_fit |>
glance() |>
arrange(AICc) |>
relocate(.model,AICc)# A tibble: 2 × 8
.model AICc sigma2 log_lik AIC BIC ar_roots ma_roots
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 auto_arima 68.4 0.156 -28.6 67.3 77.6 <cpl [0]> <cpl [7]>
2 sarima_111_111 71.3 0.155 -30.1 70.2 80.6 <cpl [5]> <cpl [5]>
0.000000000000
accuracy(eu_fit) |>
arrange(MAE)# A tibble: 2 × 10
.model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 sarima_111_111 Training -0.0260 0.365 0.275 -0.0243 0.285 0.224 0.248 -0.0406
2 auto_arima Training -0.0297 0.366 0.279 -0.0280 0.289 0.227 0.249 0.00646
2 Venta de medicamentos
h02 <- PBS %>%
filter(ATC2 == "H02") %>%
summarise(Cost = sum(Cost)/1e6)
h02 %>%
mutate(log(Cost)) %>%
gather() %>%
ggplot(aes(x = Month, y = value)) +
geom_line() +
facet_grid(key ~ ., scales = "free_y") +
xlab("Year") + ylab("") +
ggtitle("Cortecosteroid drug scripts (H02)")p1 <- h02 |>
autoplot(log(Cost) |> difference(12))
p2 <- h02 |>
autoplot(log(Cost) |> difference(1))
p3 <- h02 |>
autoplot(log(Cost) |> difference(12) |> difference())
p1 / p2 / p3h02 %>%
gg_tsdisplay(difference(log(Cost), 12),
plot_type='partial', lag_max = 48)Diferenciación no estacional:
\[ y_t' = y_t - y_{t-1} \]
# var |> difference(12) |> difference(1)\[ ARIMA(3,0,9)(2,1,0)_{12} \]
h02 |>
gg_tsdisplay(log(Cost) |> difference(12), plot_type = "partial", lag_max = 48)h02 |>
gg_tsdisplay(log(Cost) |> difference(12) |> difference(), plot_type = "partial", lag_max = 48)\[ ARIMA(2,1,1:2)(1:2,1,2:3)_{12} \]