Licença
This work is licensed under the Creative Commons
Attribution-ShareAlike 4.0 International License. To view a copy of this
license, visit http://creativecommons.org/licenses/by-sa/4.0/ or send a
letter to Creative Commons, PO Box 1866, Mountain View, CA 94042,
USA.
Introdução
Os dados vem do pacote datasets, AirPassengers, seguindo o exemplo da
vignette do pacote modeltime (Dancho, 2023).
Carregar pacotes
library(xgboost)
library(tidymodels)
library(modeltime)
library(modeltime.ensemble)
library(tidyverse)
library(lubridate)
library(timetk)
library(zoo)
# This toggles plots from plotly (interactive) to ggplot (static)
interactive <- FALSE
Passo 1: dados
# Load the data
data(AirPassengers) # ts format
#
df<-data.frame(date = as.Date(as.yearmon(time(AirPassengers))),
value=as.matrix(AirPassengers))
Visualização de
dados
# visualize
df %>%
plot_time_series(date, value, .interactive = interactive)

Passo 2: separar
amostras treino x teste
# Split the data into train and test sets
# Split Data 80/20
splits <- initial_time_split(df, prop = 0.8) # 80:20
#
Passo 3: modelagem
Model 1: Auto ARIMA
(Modeltime)
## Model 1: auto_arima
model_fit_arima_no_boost <- arima_reg() %>%
set_engine(engine = "auto_arima") %>%
fit(value ~ date, data = training(splits))
# frequency = 12 observations per 1 year
Model 2: Boosted Auto
ARIMA (Modeltime)
## Model 2: arima_boost
model_fit_arima_boosted <- arima_boost(
min_n = 2,
learn_rate = 0.015
) %>%
set_engine(engine = "auto_arima_xgboost") %>%
fit(value ~ date + as.numeric(date) + factor(month(date, label = TRUE), ordered = F),
data = training(splits))
# frequency = 12 observations per 1 year
Model 3: Exponential
Smoothing - ETS (Modeltime)
## Model 3: ets
model_fit_ets <- exp_smoothing() %>%
set_engine(engine = "ets") %>%
fit(value ~ date, data = training(splits))
#> frequency = 12 observations per 1 year
Model 4: Prophet
(Modeltime)
## Model 4: prophet
model_fit_prophet <- prophet_reg() %>%
set_engine(engine = "prophet") %>%
fit(value ~ date, data = training(splits))
#> Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
#> Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
Model 5: Linear
Regression (Parsnip)
# Model 5: lm
model_fit_lm <- linear_reg() %>%
set_engine("lm") %>%
fit(value ~ as.numeric(date) + factor(month(date, label = TRUE), ordered = FALSE),
data = training(splits))
Model 6: MARS
(Workflow)
# Multivariate Adaptive Regression Spline model
# Model 6: earth ----
model_spec_mars <- mars(mode = "regression") %>%
set_engine("earth")
recipe_spec <- recipe(value ~ date, data = training(splits)) %>%
step_date(date, features = "month", ordinal = FALSE) %>%
step_mutate(date_num = as.numeric(date)) %>%
step_normalize(date_num) %>%
step_rm(date)
library(earth)
wflw_fit_mars <- workflow() %>%
add_recipe(recipe_spec) %>%
add_model(model_spec_mars) %>%
fit(training(splits))
Model 7: Theta
method
# Model 7 - Theta method
model_fit_theta <- exp_smoothing() %>%
set_engine(engine = "theta") %>%
fit(value ~ date, data = training(splits))
# frequency = 12 observations per 1 year
Passo 4: Add fitted
models to a Model Table
models_tbl <- modeltime_table(
model_fit_arima_no_boost,
model_fit_arima_boosted,
model_fit_ets,
model_fit_prophet,
model_fit_lm,
wflw_fit_mars,
model_fit_theta
)
models_tbl
Passo 5: Calibrate the
model to a testing set
calibration_tbl <- models_tbl %>%
modeltime_calibrate(new_data = testing(splits))
calibration_tbl
Passo 6: Testing Set
Forecast & Accuracy Evaluation
calibration_tbl %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = df
) %>%
plot_modeltime_forecast(
.legend_max_width = 25, # For mobile screens
.interactive = interactive
)

Accuracy Metrics
calibration_tbl %>%
modeltime_accuracy() %>%
table_modeltime_accuracy(
.interactive = FALSE
)
Accuracy Table |
.model_id |
.model_desc |
.type |
mae |
mape |
mase |
smape |
rmse |
rsq |
1 |
ARIMA(1,1,0)(0,1,0)[12] |
Test |
28.55 |
6.12 |
0.62 |
6.35 |
35.08 |
0.93 |
2 |
ARIMA(1,1,0)(0,1,0)[12] W/ XGBOOST ERRORS |
Test |
28.14 |
6.04 |
0.61 |
6.26 |
34.65 |
0.93 |
3 |
ETS(M,AD,M) |
Test |
29.98 |
6.53 |
0.65 |
6.68 |
35.81 |
0.89 |
4 |
PROPHET |
Test |
33.84 |
7.70 |
0.74 |
7.62 |
41.31 |
0.91 |
5 |
LM |
Test |
34.31 |
7.08 |
0.75 |
7.39 |
47.55 |
0.95 |
6 |
EARTH |
Test |
57.51 |
11.88 |
1.26 |
12.92 |
73.02 |
0.86 |
7 |
THETA METHOD |
Test |
90.63 |
22.70 |
1.98 |
19.86 |
101.55 |
0.20 |
Refit to Full Dataset
& Forecast Forward
refit_tbl <- calibration_tbl %>%
modeltime_refit(data = df)
refit_tbl %>%
modeltime_forecast(h = "3 years", actual_data = df) %>%
plot_modeltime_forecast(
.legend_max_width = 25, # For mobile screens
.interactive = interactive
)

Model 8: Ensemble
Faremos a rotina de combinação de forecasts para os modelos auto
ARIMA, Prophet, Elastic Net e ETS.
# https://business-science.github.io/modeltime.ensemble/articles/getting-started-with-modeltime-ensemble.html
# Recipe
recipe_spec <- recipe(value ~ date, training(splits)) %>%
step_timeseries_signature(date) %>%
step_rm(matches("(.iso$)|(.xts$)")) %>%
step_normalize(matches("(index.num$)|(_year$)")) %>%
step_dummy(all_nominal()) %>%
step_fourier(date, K = 1, period = 12)
recipe_spec %>% prep() %>% juice()
# Model 1 auto ARIMA
model_spec_arima <- arima_reg() %>%
set_engine("auto_arima")
wflw_fit_arima <- workflow() %>%
add_model(model_spec_arima) %>%
add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
fit(training(splits))
# Model 2 - Prophet
model_spec_prophet <- prophet_reg() %>%
set_engine("prophet")
wflw_fit_prophet <- workflow() %>%
add_model(model_spec_prophet) %>%
add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
fit(training(splits))
# Model 3 - Elastic Net
model_spec_glmnet <- linear_reg(
mixture = 0.9,
penalty = 4.36e-6 ) %>%
set_engine("glmnet")
wflw_fit_glmnet <- workflow() %>%
add_model(model_spec_glmnet) %>%
add_recipe(recipe_spec %>% step_rm(date)) %>%
fit(training(splits))
# Model 4 - ETS
model_spec_ets <- exp_smoothing() %>%
set_engine(engine = "ets")
wflw_fit_ets <- workflow() %>%
add_model(model_spec_ets) %>%
add_recipe(recipe_spec %>%
step_rm(all_predictors(), -date)) %>%
fit(training(splits))
Step 1 - Create a
Modeltime Table
airpass_models <- modeltime_table(
wflw_fit_arima,
wflw_fit_prophet,
wflw_fit_glmnet,
wflw_fit_ets
)
airpass_models
Step 2 - Make an
Ensemble
ensemble_fit <- airpass_models %>%
ensemble_average(type = "mean")
ensemble_fit
## ── Modeltime Ensemble ───────────────────────────────────────────
## Ensemble of 4 Models (MEAN)
##
## # Modeltime Table
## # A tibble: 4 × 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <workflow> ARIMA(1,1,0)(0,1,0)[12]
## 2 2 <workflow> PROPHET
## 3 3 <workflow> GLMNET
## 4 4 <workflow> ETS(M,AD,M)
Step 3 - Forecast!
(the Test Data)
# Calibration
calibration_tbl <- modeltime_table(
ensemble_fit
) %>%
modeltime_calibrate(testing(splits))
# Forecast vs Test Set
calibration_tbl %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = df
) %>%
plot_modeltime_forecast(.interactive = T)
Step 4 - Refit on
Full Data & Forecast Future
calibration_tbl %>%
modeltime_accuracy() %>%
table_modeltime_accuracy(
.interactive = T
)
refit_tbl <- calibration_tbl %>%
modeltime_refit(df)
refit_tbl %>%
modeltime_forecast(
h = "2 years",
actual_data = df
) %>%
plot_modeltime_forecast(.interactive = T)
---
title: "Exemplo Séries Temporais: modeltime - AirPassengers"
author: "Adriano Marcos Rodrigues Figueiredo, *e-mail: adriano.figueiredo@ufms.br*"
date: "`r format(Sys.Date(), '%d %B %Y')`"
output:
  html_document:
    code_download: yes
    theme: default
    number_sections: yes
    toc: yes
    toc_float: no
    df_print: paged
    fig_caption: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Licença {#Licença .unnumbered}

This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. To view a copy of this license, visit <http://creativecommons.org/licenses/by-sa/4.0/> or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.

![License: CC BY-SA 4.0](https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png){width="25%"}

# Citação {#Citação .unnumbered}

Sugestão de citação: FIGUEIREDO, Adriano Marcos Rodrigues. Exemplo Séries Temporais: modeltime - AirPassengers. Campo Grande-MS, Brasil: RStudio/Rpubs, 2023. Disponível em <https://rpubs.com/amrofi/modeltime_airpassengers>.

# Introdução {#Introdução .unnumbered}

Os dados vem do pacote datasets, AirPassengers, seguindo o exemplo da vignette do pacote modeltime (Dancho, 2023).

## Carregar pacotes

```{r pacotes, message=FALSE, warning=FALSE}
library(xgboost)
library(tidymodels)
library(modeltime)
library(modeltime.ensemble)
library(tidyverse)
library(lubridate)
library(timetk)
library(zoo)
# This toggles plots from plotly (interactive) to ggplot (static)
interactive <- FALSE
```

# Passo 1: dados

```{r passo1, message=FALSE, warning=FALSE}
# Load the data
data(AirPassengers)  # ts format
#
df<-data.frame(date = as.Date(as.yearmon(time(AirPassengers))),
               value=as.matrix(AirPassengers))
```

## Visualização de dados

```{r visualdados, message=FALSE, warning=FALSE}

# visualize
df %>%
  plot_time_series(date, value, .interactive = interactive)
```

# Passo 2: separar amostras treino x teste

```{r passo2, message=FALSE, warning=FALSE}

# Split the data into train and test sets
# Split Data 80/20
splits <- initial_time_split(df, prop = 0.8)  # 80:20
#
```

# Passo 3: modelagem

## Model 1: Auto ARIMA (Modeltime) 

```{r mod1, message=FALSE, warning=FALSE}

## Model 1: auto_arima
model_fit_arima_no_boost <- arima_reg() %>%
  set_engine(engine = "auto_arima") %>%
  fit(value ~ date, data = training(splits))
# frequency = 12 observations per 1 year

```

## Model 2: Boosted Auto ARIMA (Modeltime)

```{r mod2, message=FALSE, warning=FALSE}
## Model 2: arima_boost
model_fit_arima_boosted <- arima_boost(
  min_n = 2,
  learn_rate = 0.015
) %>%
  set_engine(engine = "auto_arima_xgboost") %>%
  fit(value ~ date + as.numeric(date) + factor(month(date, label = TRUE), ordered = F),
      data = training(splits))
# frequency = 12 observations per 1 year
```

## Model 3: Exponential Smoothing - ETS (Modeltime)

```{r mod3, message=FALSE, warning=FALSE}
## Model 3: ets
model_fit_ets <- exp_smoothing() %>%
  set_engine(engine = "ets") %>%
  fit(value ~ date, data = training(splits))
#> frequency = 12 observations per 1 year
```

## Model 4: Prophet (Modeltime) 

```{r mod4, message=FALSE, warning=FALSE}

## Model 4: prophet 
model_fit_prophet <- prophet_reg() %>%
  set_engine(engine = "prophet") %>%
  fit(value ~ date, data = training(splits))
#> Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
#> Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
```



## Model 5: Linear Regression (Parsnip)

```{r mod5, message=FALSE, warning=FALSE}
# Model 5: lm 
model_fit_lm <- linear_reg() %>%
  set_engine("lm") %>%
  fit(value ~ as.numeric(date) + factor(month(date, label = TRUE), ordered = FALSE),
      data = training(splits))
```


## Model 6: MARS (Workflow)

```{r mod6, message=FALSE, warning=FALSE}
# Multivariate Adaptive Regression Spline model 
# Model 6: earth ----
model_spec_mars <- mars(mode = "regression") %>%
  set_engine("earth") 

recipe_spec <- recipe(value ~ date, data = training(splits)) %>%
  step_date(date, features = "month", ordinal = FALSE) %>%
  step_mutate(date_num = as.numeric(date)) %>%
  step_normalize(date_num) %>%
  step_rm(date)
library(earth)
wflw_fit_mars <- workflow() %>%
  add_recipe(recipe_spec) %>%
  add_model(model_spec_mars) %>%
  fit(training(splits))
```

## Model 7: Theta method

```{r mod7, message=FALSE, warning=FALSE}
# Model 7 - Theta method
model_fit_theta <- exp_smoothing() %>%
  set_engine(engine = "theta") %>%
  fit(value ~ date, data = training(splits))
# frequency = 12 observations per 1 year
```



# Passo 4: Add fitted models to a Model Table

```{r fitted_models, message=FALSE, warning=FALSE}
models_tbl <- modeltime_table(
  model_fit_arima_no_boost,
  model_fit_arima_boosted,
  model_fit_ets,
  model_fit_prophet,
  model_fit_lm,
  wflw_fit_mars,
  model_fit_theta
)

models_tbl
```

# Passo 5: Calibrate the model to a testing set

```{r calibrate, message=FALSE, warning=FALSE}
calibration_tbl <- models_tbl %>%
  modeltime_calibrate(new_data = testing(splits))

calibration_tbl
```

# Passo 6: Testing Set Forecast & Accuracy Evaluation

```{r forecast, message=FALSE, warning=FALSE}
calibration_tbl %>%
  modeltime_forecast(
    new_data    = testing(splits),
    actual_data = df
  ) %>%
  plot_modeltime_forecast(
    .legend_max_width = 25, # For mobile screens
    .interactive      = interactive
  )
```

## Accuracy Metrics

```{r accuracy, message=FALSE, warning=FALSE}
calibration_tbl %>%
  modeltime_accuracy() %>%
  table_modeltime_accuracy(
    .interactive = FALSE
  )
```

## Refit to Full Dataset & Forecast Forward

```{r refit, message=FALSE, warning=FALSE}
refit_tbl <- calibration_tbl %>%
  modeltime_refit(data = df)

refit_tbl %>%
  modeltime_forecast(h = "3 years", actual_data = df) %>%
  plot_modeltime_forecast(
    .legend_max_width = 25, # For mobile screens
    .interactive      = interactive
  )
```  

# Model 8: Ensemble

Faremos a rotina de combinação de forecasts para os modelos auto ARIMA, Prophet, Elastic Net e ETS. 

```{r mod8, message=FALSE, warning=FALSE}
# https://business-science.github.io/modeltime.ensemble/articles/getting-started-with-modeltime-ensemble.html
# Recipe
recipe_spec <- recipe(value ~ date, training(splits)) %>%
    step_timeseries_signature(date) %>%
    step_rm(matches("(.iso$)|(.xts$)")) %>%
    step_normalize(matches("(index.num$)|(_year$)")) %>%
    step_dummy(all_nominal()) %>%
    step_fourier(date, K = 1, period = 12)

recipe_spec %>% prep() %>% juice()
# Model 1 auto ARIMA
model_spec_arima <- arima_reg() %>%
    set_engine("auto_arima")

wflw_fit_arima <- workflow() %>%
    add_model(model_spec_arima) %>%
    add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
    fit(training(splits))

# Model 2 - Prophet
model_spec_prophet <- prophet_reg() %>%
    set_engine("prophet")

wflw_fit_prophet <- workflow() %>%
    add_model(model_spec_prophet) %>%
    add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
    fit(training(splits))

# Model 3 - Elastic Net
model_spec_glmnet <- linear_reg(
    mixture = 0.9,
    penalty = 4.36e-6 ) %>%
    set_engine("glmnet")

wflw_fit_glmnet <- workflow() %>%
    add_model(model_spec_glmnet) %>%
    add_recipe(recipe_spec %>% step_rm(date)) %>%
    fit(training(splits))

# Model 4 - ETS
model_spec_ets <- exp_smoothing() %>%
    set_engine(engine = "ets")
  
wflw_fit_ets <- workflow() %>%
    add_model(model_spec_ets) %>%
    add_recipe(recipe_spec %>%
step_rm(all_predictors(), -date)) %>%
    fit(training(splits))

```

## Step 1 - Create a Modeltime Table

```{r create_tableens, message=FALSE, warning=FALSE}
airpass_models <- modeltime_table(
    wflw_fit_arima,
    wflw_fit_prophet,
    wflw_fit_glmnet,
    wflw_fit_ets
)

airpass_models
```


## Step 2 - Make an Ensemble

```{r make_ens, message=FALSE, warning=FALSE}
ensemble_fit <- airpass_models %>%
    ensemble_average(type = "mean")

ensemble_fit
```


## Step 3 - Forecast! (the Test Data)

```{r fcst_ens, message=FALSE, warning=FALSE}
# Calibration
calibration_tbl <- modeltime_table(
    ensemble_fit
) %>%
    modeltime_calibrate(testing(splits))

# Forecast vs Test Set
calibration_tbl %>%
    modeltime_forecast(
        new_data    = testing(splits),
        actual_data = df
    ) %>%
    plot_modeltime_forecast(.interactive = T)
```

## Step 4 - Refit on Full Data & Forecast Future

```{r accu_ens, message=FALSE, warning=FALSE}
calibration_tbl %>%
  modeltime_accuracy() %>%
  table_modeltime_accuracy(
    .interactive = T
  )
```

```{r calib_ens, message=FALSE, warning=FALSE}
refit_tbl <- calibration_tbl %>%
    modeltime_refit(df)

refit_tbl %>%
    modeltime_forecast(
        h = "2 years",
        actual_data = df
    ) %>%
    plot_modeltime_forecast(.interactive = T)
```

# Referências {#Referências .unnumbered}

Dancho M (2023). _modeltime: The Tidymodels Extension for Time Series Modeling_. R package version 1.2.6, <https://CRAN.R-project.org/package=modeltime>.
