Licença

This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.

License: CC BY-SA 4.0

Citação

Sugestão de citação: FIGUEIREDO, Adriano Marcos Rodrigues. Exemplo Séries Temporais: modeltime - AirPassengers. Campo Grande-MS, Brasil: RStudio/Rpubs, 2023. Disponível em https://rpubs.com/amrofi/modeltime_airpassengers.

Introdução

Os dados vem do pacote datasets, AirPassengers, seguindo o exemplo da vignette do pacote modeltime (Dancho, 2023).

0.1 Carregar pacotes

library(xgboost)
library(tidymodels)
library(modeltime)
library(modeltime.ensemble)
library(tidyverse)
library(lubridate)
library(timetk)
library(zoo)
# This toggles plots from plotly (interactive) to ggplot (static)
interactive <- FALSE

1 Passo 1: dados

# Load the data
data(AirPassengers)  # ts format
#
df<-data.frame(date = as.Date(as.yearmon(time(AirPassengers))),
               value=as.matrix(AirPassengers))

1.1 Visualização de dados

# visualize
df %>%
  plot_time_series(date, value, .interactive = interactive)

2 Passo 2: separar amostras treino x teste

# Split the data into train and test sets
# Split Data 80/20
splits <- initial_time_split(df, prop = 0.8)  # 80:20
#

3 Passo 3: modelagem

3.1 Model 1: Auto ARIMA (Modeltime)

## Model 1: auto_arima
model_fit_arima_no_boost <- arima_reg() %>%
  set_engine(engine = "auto_arima") %>%
  fit(value ~ date, data = training(splits))
# frequency = 12 observations per 1 year

3.2 Model 2: Boosted Auto ARIMA (Modeltime)

## Model 2: arima_boost
model_fit_arima_boosted <- arima_boost(
  min_n = 2,
  learn_rate = 0.015
) %>%
  set_engine(engine = "auto_arima_xgboost") %>%
  fit(value ~ date + as.numeric(date) + factor(month(date, label = TRUE), ordered = F),
      data = training(splits))
# frequency = 12 observations per 1 year

3.3 Model 3: Exponential Smoothing - ETS (Modeltime)

## Model 3: ets
model_fit_ets <- exp_smoothing() %>%
  set_engine(engine = "ets") %>%
  fit(value ~ date, data = training(splits))
#> frequency = 12 observations per 1 year

3.4 Model 4: Prophet (Modeltime)

## Model 4: prophet 
model_fit_prophet <- prophet_reg() %>%
  set_engine(engine = "prophet") %>%
  fit(value ~ date, data = training(splits))
#> Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
#> Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

3.5 Model 5: Linear Regression (Parsnip)

# Model 5: lm 
model_fit_lm <- linear_reg() %>%
  set_engine("lm") %>%
  fit(value ~ as.numeric(date) + factor(month(date, label = TRUE), ordered = FALSE),
      data = training(splits))

3.6 Model 6: MARS (Workflow)

# Multivariate Adaptive Regression Spline model 
# Model 6: earth ----
model_spec_mars <- mars(mode = "regression") %>%
  set_engine("earth") 

recipe_spec <- recipe(value ~ date, data = training(splits)) %>%
  step_date(date, features = "month", ordinal = FALSE) %>%
  step_mutate(date_num = as.numeric(date)) %>%
  step_normalize(date_num) %>%
  step_rm(date)
library(earth)
wflw_fit_mars <- workflow() %>%
  add_recipe(recipe_spec) %>%
  add_model(model_spec_mars) %>%
  fit(training(splits))

3.7 Model 7: Theta method

# Model 7 - Theta method
model_fit_theta <- exp_smoothing() %>%
  set_engine(engine = "theta") %>%
  fit(value ~ date, data = training(splits))
# frequency = 12 observations per 1 year

4 Passo 4: Add fitted models to a Model Table

models_tbl <- modeltime_table(
  model_fit_arima_no_boost,
  model_fit_arima_boosted,
  model_fit_ets,
  model_fit_prophet,
  model_fit_lm,
  wflw_fit_mars,
  model_fit_theta
)

models_tbl

5 Passo 5: Calibrate the model to a testing set

calibration_tbl <- models_tbl %>%
  modeltime_calibrate(new_data = testing(splits))

calibration_tbl

6 Passo 6: Testing Set Forecast & Accuracy Evaluation

calibration_tbl %>%
  modeltime_forecast(
    new_data    = testing(splits),
    actual_data = df
  ) %>%
  plot_modeltime_forecast(
    .legend_max_width = 25, # For mobile screens
    .interactive      = interactive
  )

6.1 Accuracy Metrics

calibration_tbl %>%
  modeltime_accuracy() %>%
  table_modeltime_accuracy(
    .interactive = FALSE
  )
Accuracy Table
.model_id .model_desc .type mae mape mase smape rmse rsq
1 ARIMA(1,1,0)(0,1,0)[12] Test 28.55 6.12 0.62 6.35 35.08 0.93
2 ARIMA(1,1,0)(0,1,0)[12] W/ XGBOOST ERRORS Test 28.14 6.04 0.61 6.26 34.65 0.93
3 ETS(M,AD,M) Test 29.98 6.53 0.65 6.68 35.81 0.89
4 PROPHET Test 33.84 7.70 0.74 7.62 41.31 0.91
5 LM Test 34.31 7.08 0.75 7.39 47.55 0.95
6 EARTH Test 57.51 11.88 1.26 12.92 73.02 0.86
7 THETA METHOD Test 90.63 22.70 1.98 19.86 101.55 0.20

6.2 Refit to Full Dataset & Forecast Forward

refit_tbl <- calibration_tbl %>%
  modeltime_refit(data = df)

refit_tbl %>%
  modeltime_forecast(h = "3 years", actual_data = df) %>%
  plot_modeltime_forecast(
    .legend_max_width = 25, # For mobile screens
    .interactive      = interactive
  )

7 Model 8: Ensemble

Faremos a rotina de combinação de forecasts para os modelos auto ARIMA, Prophet, Elastic Net e ETS.

# https://business-science.github.io/modeltime.ensemble/articles/getting-started-with-modeltime-ensemble.html
# Recipe
recipe_spec <- recipe(value ~ date, training(splits)) %>%
    step_timeseries_signature(date) %>%
    step_rm(matches("(.iso$)|(.xts$)")) %>%
    step_normalize(matches("(index.num$)|(_year$)")) %>%
    step_dummy(all_nominal()) %>%
    step_fourier(date, K = 1, period = 12)

recipe_spec %>% prep() %>% juice()
# Model 1 auto ARIMA
model_spec_arima <- arima_reg() %>%
    set_engine("auto_arima")

wflw_fit_arima <- workflow() %>%
    add_model(model_spec_arima) %>%
    add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
    fit(training(splits))

# Model 2 - Prophet
model_spec_prophet <- prophet_reg() %>%
    set_engine("prophet")

wflw_fit_prophet <- workflow() %>%
    add_model(model_spec_prophet) %>%
    add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
    fit(training(splits))

# Model 3 - Elastic Net
model_spec_glmnet <- linear_reg(
    mixture = 0.9,
    penalty = 4.36e-6 ) %>%
    set_engine("glmnet")

wflw_fit_glmnet <- workflow() %>%
    add_model(model_spec_glmnet) %>%
    add_recipe(recipe_spec %>% step_rm(date)) %>%
    fit(training(splits))

# Model 4 - ETS
model_spec_ets <- exp_smoothing() %>%
    set_engine(engine = "ets")
  
wflw_fit_ets <- workflow() %>%
    add_model(model_spec_ets) %>%
    add_recipe(recipe_spec %>%
step_rm(all_predictors(), -date)) %>%
    fit(training(splits))

7.1 Step 1 - Create a Modeltime Table

airpass_models <- modeltime_table(
    wflw_fit_arima,
    wflw_fit_prophet,
    wflw_fit_glmnet,
    wflw_fit_ets
)

airpass_models

7.2 Step 2 - Make an Ensemble

ensemble_fit <- airpass_models %>%
    ensemble_average(type = "mean")

ensemble_fit
## ── Modeltime Ensemble ───────────────────────────────────────────
## Ensemble of 4 Models (MEAN)
## 
## # Modeltime Table
## # A tibble: 4 × 3
##   .model_id .model     .model_desc            
##       <int> <list>     <chr>                  
## 1         1 <workflow> ARIMA(1,1,0)(0,1,0)[12]
## 2         2 <workflow> PROPHET                
## 3         3 <workflow> GLMNET                 
## 4         4 <workflow> ETS(M,AD,M)

7.3 Step 3 - Forecast! (the Test Data)

# Calibration
calibration_tbl <- modeltime_table(
    ensemble_fit
) %>%
    modeltime_calibrate(testing(splits))

# Forecast vs Test Set
calibration_tbl %>%
    modeltime_forecast(
        new_data    = testing(splits),
        actual_data = df
    ) %>%
    plot_modeltime_forecast(.interactive = T)

7.4 Step 4 - Refit on Full Data & Forecast Future

calibration_tbl %>%
  modeltime_accuracy() %>%
  table_modeltime_accuracy(
    .interactive = T
  )
refit_tbl <- calibration_tbl %>%
    modeltime_refit(df)

refit_tbl %>%
    modeltime_forecast(
        h = "2 years",
        actual_data = df
    ) %>%
    plot_modeltime_forecast(.interactive = T)

Referências

Dancho M (2023). modeltime: The Tidymodels Extension for Time Series Modeling. R package version 1.2.6, https://CRAN.R-project.org/package=modeltime.

---
title: "Exemplo Séries Temporais: modeltime - AirPassengers"
author: "Adriano Marcos Rodrigues Figueiredo, *e-mail: adriano.figueiredo@ufms.br*"
date: "`r format(Sys.Date(), '%d %B %Y')`"
output:
  html_document:
    code_download: yes
    theme: default
    number_sections: yes
    toc: yes
    toc_float: no
    df_print: paged
    fig_caption: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Licença {#Licença .unnumbered}

This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. To view a copy of this license, visit <http://creativecommons.org/licenses/by-sa/4.0/> or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.

![License: CC BY-SA 4.0](https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-sa.png){width="25%"}

# Citação {#Citação .unnumbered}

Sugestão de citação: FIGUEIREDO, Adriano Marcos Rodrigues. Exemplo Séries Temporais: modeltime - AirPassengers. Campo Grande-MS, Brasil: RStudio/Rpubs, 2023. Disponível em <https://rpubs.com/amrofi/modeltime_airpassengers>.

# Introdução {#Introdução .unnumbered}

Os dados vem do pacote datasets, AirPassengers, seguindo o exemplo da vignette do pacote modeltime (Dancho, 2023).

## Carregar pacotes

```{r pacotes, message=FALSE, warning=FALSE}
library(xgboost)
library(tidymodels)
library(modeltime)
library(modeltime.ensemble)
library(tidyverse)
library(lubridate)
library(timetk)
library(zoo)
# This toggles plots from plotly (interactive) to ggplot (static)
interactive <- FALSE
```

# Passo 1: dados

```{r passo1, message=FALSE, warning=FALSE}
# Load the data
data(AirPassengers)  # ts format
#
df<-data.frame(date = as.Date(as.yearmon(time(AirPassengers))),
               value=as.matrix(AirPassengers))
```

## Visualização de dados

```{r visualdados, message=FALSE, warning=FALSE}

# visualize
df %>%
  plot_time_series(date, value, .interactive = interactive)
```

# Passo 2: separar amostras treino x teste

```{r passo2, message=FALSE, warning=FALSE}

# Split the data into train and test sets
# Split Data 80/20
splits <- initial_time_split(df, prop = 0.8)  # 80:20
#
```

# Passo 3: modelagem

## Model 1: Auto ARIMA (Modeltime) 

```{r mod1, message=FALSE, warning=FALSE}

## Model 1: auto_arima
model_fit_arima_no_boost <- arima_reg() %>%
  set_engine(engine = "auto_arima") %>%
  fit(value ~ date, data = training(splits))
# frequency = 12 observations per 1 year

```

## Model 2: Boosted Auto ARIMA (Modeltime)

```{r mod2, message=FALSE, warning=FALSE}
## Model 2: arima_boost
model_fit_arima_boosted <- arima_boost(
  min_n = 2,
  learn_rate = 0.015
) %>%
  set_engine(engine = "auto_arima_xgboost") %>%
  fit(value ~ date + as.numeric(date) + factor(month(date, label = TRUE), ordered = F),
      data = training(splits))
# frequency = 12 observations per 1 year
```

## Model 3: Exponential Smoothing - ETS (Modeltime)

```{r mod3, message=FALSE, warning=FALSE}
## Model 3: ets
model_fit_ets <- exp_smoothing() %>%
  set_engine(engine = "ets") %>%
  fit(value ~ date, data = training(splits))
#> frequency = 12 observations per 1 year
```

## Model 4: Prophet (Modeltime) 

```{r mod4, message=FALSE, warning=FALSE}

## Model 4: prophet 
model_fit_prophet <- prophet_reg() %>%
  set_engine(engine = "prophet") %>%
  fit(value ~ date, data = training(splits))
#> Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
#> Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
```



## Model 5: Linear Regression (Parsnip)

```{r mod5, message=FALSE, warning=FALSE}
# Model 5: lm 
model_fit_lm <- linear_reg() %>%
  set_engine("lm") %>%
  fit(value ~ as.numeric(date) + factor(month(date, label = TRUE), ordered = FALSE),
      data = training(splits))
```


## Model 6: MARS (Workflow)

```{r mod6, message=FALSE, warning=FALSE}
# Multivariate Adaptive Regression Spline model 
# Model 6: earth ----
model_spec_mars <- mars(mode = "regression") %>%
  set_engine("earth") 

recipe_spec <- recipe(value ~ date, data = training(splits)) %>%
  step_date(date, features = "month", ordinal = FALSE) %>%
  step_mutate(date_num = as.numeric(date)) %>%
  step_normalize(date_num) %>%
  step_rm(date)
library(earth)
wflw_fit_mars <- workflow() %>%
  add_recipe(recipe_spec) %>%
  add_model(model_spec_mars) %>%
  fit(training(splits))
```

## Model 7: Theta method

```{r mod7, message=FALSE, warning=FALSE}
# Model 7 - Theta method
model_fit_theta <- exp_smoothing() %>%
  set_engine(engine = "theta") %>%
  fit(value ~ date, data = training(splits))
# frequency = 12 observations per 1 year
```



# Passo 4: Add fitted models to a Model Table

```{r fitted_models, message=FALSE, warning=FALSE}
models_tbl <- modeltime_table(
  model_fit_arima_no_boost,
  model_fit_arima_boosted,
  model_fit_ets,
  model_fit_prophet,
  model_fit_lm,
  wflw_fit_mars,
  model_fit_theta
)

models_tbl
```

# Passo 5: Calibrate the model to a testing set

```{r calibrate, message=FALSE, warning=FALSE}
calibration_tbl <- models_tbl %>%
  modeltime_calibrate(new_data = testing(splits))

calibration_tbl
```

# Passo 6: Testing Set Forecast & Accuracy Evaluation

```{r forecast, message=FALSE, warning=FALSE}
calibration_tbl %>%
  modeltime_forecast(
    new_data    = testing(splits),
    actual_data = df
  ) %>%
  plot_modeltime_forecast(
    .legend_max_width = 25, # For mobile screens
    .interactive      = interactive
  )
```

## Accuracy Metrics

```{r accuracy, message=FALSE, warning=FALSE}
calibration_tbl %>%
  modeltime_accuracy() %>%
  table_modeltime_accuracy(
    .interactive = FALSE
  )
```

## Refit to Full Dataset & Forecast Forward

```{r refit, message=FALSE, warning=FALSE}
refit_tbl <- calibration_tbl %>%
  modeltime_refit(data = df)

refit_tbl %>%
  modeltime_forecast(h = "3 years", actual_data = df) %>%
  plot_modeltime_forecast(
    .legend_max_width = 25, # For mobile screens
    .interactive      = interactive
  )
```  

# Model 8: Ensemble

Faremos a rotina de combinação de forecasts para os modelos auto ARIMA, Prophet, Elastic Net e ETS. 

```{r mod8, message=FALSE, warning=FALSE}
# https://business-science.github.io/modeltime.ensemble/articles/getting-started-with-modeltime-ensemble.html
# Recipe
recipe_spec <- recipe(value ~ date, training(splits)) %>%
    step_timeseries_signature(date) %>%
    step_rm(matches("(.iso$)|(.xts$)")) %>%
    step_normalize(matches("(index.num$)|(_year$)")) %>%
    step_dummy(all_nominal()) %>%
    step_fourier(date, K = 1, period = 12)

recipe_spec %>% prep() %>% juice()
# Model 1 auto ARIMA
model_spec_arima <- arima_reg() %>%
    set_engine("auto_arima")

wflw_fit_arima <- workflow() %>%
    add_model(model_spec_arima) %>%
    add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
    fit(training(splits))

# Model 2 - Prophet
model_spec_prophet <- prophet_reg() %>%
    set_engine("prophet")

wflw_fit_prophet <- workflow() %>%
    add_model(model_spec_prophet) %>%
    add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
    fit(training(splits))

# Model 3 - Elastic Net
model_spec_glmnet <- linear_reg(
    mixture = 0.9,
    penalty = 4.36e-6 ) %>%
    set_engine("glmnet")

wflw_fit_glmnet <- workflow() %>%
    add_model(model_spec_glmnet) %>%
    add_recipe(recipe_spec %>% step_rm(date)) %>%
    fit(training(splits))

# Model 4 - ETS
model_spec_ets <- exp_smoothing() %>%
    set_engine(engine = "ets")
  
wflw_fit_ets <- workflow() %>%
    add_model(model_spec_ets) %>%
    add_recipe(recipe_spec %>%
step_rm(all_predictors(), -date)) %>%
    fit(training(splits))

```

## Step 1 - Create a Modeltime Table

```{r create_tableens, message=FALSE, warning=FALSE}
airpass_models <- modeltime_table(
    wflw_fit_arima,
    wflw_fit_prophet,
    wflw_fit_glmnet,
    wflw_fit_ets
)

airpass_models
```


## Step 2 - Make an Ensemble

```{r make_ens, message=FALSE, warning=FALSE}
ensemble_fit <- airpass_models %>%
    ensemble_average(type = "mean")

ensemble_fit
```


## Step 3 - Forecast! (the Test Data)

```{r fcst_ens, message=FALSE, warning=FALSE}
# Calibration
calibration_tbl <- modeltime_table(
    ensemble_fit
) %>%
    modeltime_calibrate(testing(splits))

# Forecast vs Test Set
calibration_tbl %>%
    modeltime_forecast(
        new_data    = testing(splits),
        actual_data = df
    ) %>%
    plot_modeltime_forecast(.interactive = T)
```

## Step 4 - Refit on Full Data & Forecast Future

```{r accu_ens, message=FALSE, warning=FALSE}
calibration_tbl %>%
  modeltime_accuracy() %>%
  table_modeltime_accuracy(
    .interactive = T
  )
```

```{r calib_ens, message=FALSE, warning=FALSE}
refit_tbl <- calibration_tbl %>%
    modeltime_refit(df)

refit_tbl %>%
    modeltime_forecast(
        h = "2 years",
        actual_data = df
    ) %>%
    plot_modeltime_forecast(.interactive = T)
```

# Referências {#Referências .unnumbered}

Dancho M (2023). _modeltime: The Tidymodels Extension for Time Series Modeling_. R package version 1.2.6, <https://CRAN.R-project.org/package=modeltime>.
