library(pacman)

p_load("lubridate","tidyverse", "data.table", "forecast", "scales", "kableExtra","recipes","sweep",   "tidyquant", "timetk", "magrittr", "yardstick", "modeltime", "tidymodels","TSstudio")

Introducción

set.seed(345)

n <- 200

datos <- tibble    (Paris   = cumsum(sample(c(500, 3500), n, TRUE)),
                    Madrid  = cumsum(sample(c(700, 4500), n, TRUE)),
                    Rio     = cumsum(sample(c(900, 6500), n, TRUE)))

Pronóstico simúltaneo de tres bases de datos

  • La línea Arima_Model_Forecast permite obtener la proyección de las tres bases de datos, junto con los intervalos de confianza al 95% nivel de confianza, y los guarda en una lista por cada base de datos.
  • La línea Forecast permite obtener la proyección de las tres bases de datos, junto con los intervalos de confianza al 95% nivel de confianza.
#Pronóstico

Base_A <- ts(datos[1:3],start=c(2011),frequency=12)

forecasting_function_ARIMA <- function(Z, hrz = 16) {
                timeseries <- msts(Z, start = 2011, seasonal.periods = 12)
                forecast   <- auto.arima(timeseries)
}

Forecasting_list_ARIMA <- lapply(X = Base_A, forecasting_function_ARIMA)
Arima_Model_Forecast   <- lapply(Forecasting_list_ARIMA, forecast,h=6,level=95)

Forecast      <- lapply(Forecasting_list_ARIMA, forecast,h=6,level=95)%>%data.frame()

Del ejercicio anterior, si queremos modificar la estrutura de los encabezados de la matriz de proyecciones, aplicamos la siguiente rutina:

Forecast      <- lapply(Forecasting_list_ARIMA, forecast,h=6,level=95)%>%data.frame()%>%
                  rownames_to_column(var = "FECHA")%>% as_tibble()%>%
                  rename_with(stringr::str_replace, pattern = ".Point.Forecast", replacement = " Pronóstico", 
                              matches(".Point.Forecast"))%>%
                  rename_with(stringr::str_replace, pattern = ".Lo.95", replacement = " Límite Inf", 
                              matches(".Lo.95"))%>%
                  rename_with(stringr::str_replace, pattern = ".Hi.95", replacement = " Límite Sup", 
                              matches(".Hi.95"))%>%
                  rename_at(1:4,~ str_to_title(.))

Gráficos simultáneos

y <- datos
colnames <- dimnames(y)[[2]]

TEMAD <- theme(axis.line   = element_line(size=0.5,  colour = "black"),
               plot.title  = element_text(hjust=0.5, size = 12, face = "bold"),
               legend.text = element_text(size = 20),
               plot.caption       =  element_text(size = 8),
               legend.position    = 'none',
               legend.title       = element_blank(),
               axis.text.x        = element_text(size = 11),
               axis.text.y        = element_text(size = 11),
               text               = element_text(size = 11),
               panel.grid.major   = element_blank(),
               panel.grid.minor   = element_blank(),
               panel.border       = element_blank(),
               panel.background   = element_blank())
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## i Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
GRA <- lapply(seq_along(datos),function(i) {
       autoplot(fit <- forecast(auto.arima(ts(datos[[i]],start=c(2011,1),
                                       end = c(2027,8),frequency = 12)),6,
                                level=95),
                ts.colour = "dodgerblue4",predict.colour = "#999999", 
                predict.linetype = "dashed")+
    labs(x = 'Años', y = "Número de atenciones") +
    ggtitle(colnames[i])+ 
    theme(plot.title = element_text(hjust = 0.5))+
    scale_y_continuous(labels=comma) + TEMAD+
    coord_cartesian(xlim = c(2011, 2027.25))
  
} 
)

GRA <- set_names(GRA, colnames)
GRA
## $Paris

## 
## $Madrid

## 
## $Rio

Creación de fechas:

Ejemplo I:

n <- 6

set <- tibble(Vinces  = cumsum(sample(c(500, 3500), n, TRUE)))%>%
       mutate(FECHA   = seq(as.Date("2022-8-1"), as.Date("2023-1-1"), by = "months"))  

set
## # A tibble: 6 x 2
##   Vinces FECHA     
##    <dbl> <date>    
## 1   3500 2022-08-01
## 2   7000 2022-09-01
## 3   7500 2022-10-01
## 4   8000 2022-11-01
## 5  11500 2022-12-01
## 6  15000 2023-01-01

Ejemplo II:

n <- 20

set_A <- tibble(Babahoyo  = cumsum(sample(c(800, 4500), n, TRUE)))%>%
         mutate(FECHA= seq(as.Date("2018-1-1"),by = "months",length.out = 20))
head(set_A,5)
## # A tibble: 5 x 2
##   Babahoyo FECHA     
##      <dbl> <date>    
## 1      800 2018-01-01
## 2     1600 2018-02-01
## 3     6100 2018-03-01
## 4    10600 2018-04-01
## 5    11400 2018-05-01

Ejemplo III:

Crear variables Fecha que comience con el día final de cada mes

n <- 20

set_A <- tibble(Napo  = cumsum(sample(c(800, 4500), n, TRUE)))%>%
         mutate(FECHA= seq(as.Date("2018-1-1"),by = "months",length.out = 20)-1)
head(set_A,5)
## # A tibble: 5 x 2
##    Napo FECHA     
##   <dbl> <date>    
## 1  4500 2017-12-31
## 2  9000 2018-01-31
## 3  9800 2018-02-28
## 4 10600 2018-03-31
## 5 15100 2018-04-30

Ejemplo IV:

Creación de fechas con la librería lubridate

fecha_de_analisis <- dmy("31-12-2022")

Ejemplo VI:

cambiar Formato fechas

## # A tibble: 3 x 2
##    Napo FECHA     
##   <dbl> <date>    
## 1   800 2017-12-31
## 2  1600 2018-01-31
## 3  6100 2018-02-28

Para formato de fechas, se recomienda el siguiente link: https://www.geeksforgeeks.org/how-to-use-date-formats-in-r/

%b: Abreviación de mes %y: Abreviación de año

AA_set <- A_set %>%
          mutate(FECHA=label_date(format = "%b - %y")(FECHA))

head(AA_set)
## # A tibble: 3 x 2
##    Napo FECHA    
##   <dbl> <chr>    
## 1   800 dic. - 17
## 2  1600 ene. - 18
## 3  6100 feb. - 18

Ejercicio Completo

Jamaica Canada Honduras
-0.3465840 6.303960 11.321528
0.6276359 3.953276 12.066658
0.6437831 4.370569 11.091397
-0.3125563 4.856155 12.298446
1.0578810 5.592618 11.263103
0.3202071 4.931182 13.604049
-0.0607272 4.375798 12.169801
1.4481094 5.346821 13.492133
0.5840688 6.112750 11.751682
-0.5442221 4.188722 13.107554
0.9309310 4.616367 12.809937
-0.2657284 6.157338 11.740550
0.5602174 3.981209 11.738767
-0.1732012 5.980474 12.988104
-0.1593099 5.141921 12.105388
-0.2498411 4.057758 12.214577
1.1909899 4.062254 11.143715
-1.6425621 5.903888 11.533752
-1.2030146 5.369082 13.552865
-0.5381974 5.312796 12.183695
-0.5510144 6.083338 12.100343
-0.1480458 5.207964 14.040965
-0.5208616 4.447765 13.231016
-1.5645956 5.358313 13.398592
0.8559413 6.631799 9.574225
-0.6693584 5.021633 11.259087
-1.1820668 5.756193 12.744493
0.8058029 3.638682 11.141806
-1.2877233 4.391235 11.619835
-0.2914974 4.788253 12.831156

Creamos una función, que identifique valores atípicos y lo transforme por NA. Es importante recalcar que está función es disponible si el conjunto de datos es data.frame, no funciona para tibble.

atipicos = function(df) {
    for (i in 1:ncol(df)) {
        dat = df[which(df[,i] > quantile(df[,i], .1) & df[,i] < quantile(df[,i], .9)),i]
        mean = mean(dat)
        sd = sd(dat)
        df[which(   abs((df[,i]) - mean) > (sd * 3)), i] = NA
    }
    return(df)
}

Transformamos a lista y por data.frame eliminamos NA. En este caso la base de datos denominado Honduras tendrá una observación menos.

AA <- atipicos(country) %>%
      pivot_longer(cols  = Jamaica:Honduras,
                   names_to = c("Países"),
                   values_to = "Saldos")

AB <- split(AA, AA$Países)

AB <- map(AB, ~filter(.x,if_all(everything(),~!is.na(.))))

Para correr auto.arima por listas, se aplica la siguiente rutina

mod <- function(x){
  x$Saldos %>%
    ts(frequency = 12, start = c(2019,1)) %>%
    auto.arima() %>%
    forecast(h=8, level=95) %>%
    as.data.frame()%>%
    mutate_if(is.numeric,round,digits=2)%>%
    rownames_to_column(var="Fecha") %>%
    as_tibble()
}

AC <- lapply(AB, mod)

Introducción libreria sweep

Aplicación de la libreria sweep

## New names:
## * `` -> `...1`
## * `` -> `...225`
## * `` -> `...226`
## # A tibble: 5 x 2
##   FECHA       Saldos
##   <date>       <dbl>
## 1 2014-12-01 103787.
## 2 2015-01-01 101090.
## 3 2015-02-01 107776.
## 4 2015-03-01 115545.
## 5 2015-04-01 119154.

El primer paso consiste en tener la base de datos junto con una variable de tipo Date

BA_ts <- tk_ts(BA, start = 2014, freq = 12, silent = TRUE)
fit_ets <- BA_ts %>%
    ets()

Retorna los parámetros del modelo

library(sweep)

sw_tidy(fit_ets)
## # A tibble: 4 x 2
##   term       estimate
##   <chr>         <dbl>
## 1 alpha      1.00    
## 2 beta       0.000100
## 3 l     107325.      
## 4 b       1500.

Retorna la calidad del modelo

sw_glance(fit_ets)
## # A tibble: 1 x 12
##   model.desc sigma logLik   AIC   BIC    ME  RMSE   MAE    MPE  MAPE  MASE
##   <chr>      <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1 ETS(A,A,N) 3802.  -745. 1501. 1512. -116. 3694. 2928. -0.194  2.13 0.192
## # i 1 more variable: ACF1 <dbl>

w_augment() retorna los valores reales, ajustados y residuos del modelo

augment_fit_ets <- sw_augment(fit_ets)
head(augment_fit_ets,5)
## # A tibble: 5 x 4
##   index     .actual .fitted .resid
##   <yearmon>   <dbl>   <dbl>  <dbl>
## 1 ene. 2014 103787. 108825. -5038.
## 2 feb. 2014 101090. 105288. -4198.
## 3 mar. 2014 107776. 102589.  5186.
## 4 abr. 2014 115545. 109275.  6270.
## 5 may. 2014 119154. 117045.  2109.

Pronóstico del modelo

fcast_ets <- fit_ets %>%
             forecast(h = 12, level=95) 

Tidy the forecast object

Fore <- sw_sweep(fcast_ets, fitted = TRUE)

Gráfico

sw_sweep(fcast_ets, timetk_idx = TRUE) %>%
    ggplot(aes(x = index, y = Saldos, color = key)) +
    geom_ribbon(aes(ymin = lo.95, ymax = hi.95), 
                fill = "#D5DBFF", color = NA, size = 0) +
    #geom_ribbon(aes(ymin = lo.80, ymax = hi.80, fill = key), 
     #           fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
    geom_line(size = 1) +
    labs(title = "Pronóstico- Obligaciones con el Público", x = "", y = "USD", 
         subtitle = "Irregular Time Index") +
    scale_y_continuous(labels = scales::dollar) +
    scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
    scale_color_tq() +
    scale_fill_tq() +
    theme_tq() 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Aplicación de Varios modelos.

Tomando el mismo modelo del punto anterior

BA_ts_II <- BA %>%
            tk_ts(select = -FECHA, start = c(1990, 3), freq = 4)

Creamos una lista por cada uno de los modelos a emplear

models_list <- list(
    auto.arima = list(
        y = BA_ts_II
        ),
    ets = list(
        y = BA_ts_II,
        damped = TRUE
    ),
    bats = list(
        y = BA_ts_II
    )
)

Convertimos a datos la lista previamente creada

models_tbl <- enframe(models_list, name = "f", value = "params")
models_tbl_fit <- models_tbl %>%
                  mutate(fit = invoke_map(f, params))
## Warning: There was 1 warning in `mutate()`.
## i In argument: `fit = invoke_map(f, params)`.
## Caused by warning:
## ! `invoke_map()` was deprecated in purrr 1.0.0.
## i Please use map() + exec() instead.

Inspección del Modelo

AA <- models_tbl_fit %>%
      mutate(tidy = map(fit, sw_tidy)) %>%
      unnest(tidy) %>%
      spread(key = f, value = estimate)

SW glance

AB <- models_tbl_fit %>%
      mutate(glance = map(fit, sw_glance)) %>%
      unnest(glance, .drop = TRUE)
## Warning: The `.drop` argument of `unnest()` is deprecated as of tidyr 1.0.0.
## i All list-columns are now preserved.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Pronóstico del Modelo

models_tbl_fcast <- models_tbl_fit %>%
    mutate(fcast = map(fit, forecast, h = 6, level=95))
models_tbl_fcast
## # A tibble: 3 x 4
##   f          params           fit        fcast     
##   <chr>      <list>           <list>     <list>    
## 1 auto.arima <named list [1]> <fr_ARIMA> <forecast>
## 2 ets        <named list [2]> <ets>      <forecast>
## 3 bats       <named list [1]> <bats>     <forecast>

Tidyng the forcast

models_tbl_fcast_tidy <- models_tbl_fcast %>%
                          mutate(sweep = map(fcast, sw_sweep, fitted = FALSE, timetk_idx = TRUE, rename_index = "date"))
models_tbl_fcast_tidy
## # A tibble: 3 x 5
##   f          params           fit        fcast      sweep            
##   <chr>      <list>           <list>     <list>     <list>           
## 1 auto.arima <named list [1]> <fr_ARIMA> <forecast> <tibble [78 x 5]>
## 2 ets        <named list [2]> <ets>      <forecast> <tibble [78 x 5]>
## 3 bats       <named list [1]> <bats>     <forecast> <tibble [78 x 5]>

Obtenemos el resultado por data.frame

AD <- models_tbl_fcast_tidy %>%
                        unnest(sweep)

Gráficamos el Modelo

models_tbl_fcast_tidy %>%
    unnest(sweep) %>%
    ggplot(aes(x = date, y = Saldos, color = key, group = f)) +
    geom_ribbon(aes(ymin = lo.95, ymax = hi.95),
                fill = "#D5DBFF", color = NA, size = 0) +
 #   geom_ribbon(aes(ymin = lo.80, ymax = hi.80, fill = key),
 #               fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
    geom_line(size = 1) +
    facet_wrap(~f, nrow = 3) +
    labs(title = "Pronóstico Saldos",
         subtitle = "Pronóstico aplicando varios modelos: ARIMA, BATS, ETS",
         x = "", y = "Price") +
    scale_y_continuous(labels = scales::dollar) +
    #scale_x_date(date_breaks = "5 years", date_labels = "%Y") +
    theme_tq() +
    scale_color_tq()

Un modelo para varios series de tiempo

Previo al ejercicio de pronóstico, aplicamos un pequeño ajuste a la base de datos.

files <- list.files(pattern=".xlsx")
files <- files[c(1:4)]
#print(files)

GH <- list()

for (i in seq_along(files)) {
     GH[[i]] <- files[i] %>%
       map_df(
         ~read_xlsx(path=files[i], sheet= "BAL",skip = 4))
}

GH <- set_names(GH, files)

GHA <- map(GH, ~select(.x,CÓDIGO, CUENTA, `43131`:`44804`))    %>%
      map(~dplyr::filter(.x,CUENTA == "CARTERA DE CRÉDITOS")) %>%
      map(~pivot_longer(.x,cols    = `43131`:`44804`,
                         names_to  = c("Años"),
                         values_to = "Créditos"))%>%
      map(~mutate(.x,Fecha= seq(as.Date("2018-1-1"),by =  "months",length.out = nrow(.))))%>%
      map(~select(.x,Fecha,Créditos))

# Ponemos en cada data.frame, el nombre de cada entidad

Nammes <- c("Di", "Gua", "Int", "Pic")

GHA <- map2(GHA, Nammes, ~ .x %>% 
            mutate(Nammes = .y, .before = 1)) 

Bank <- bind_rows(GHA)

#save(Bank, file = "Bank.RData")

Paso 1: Nest data.frame

Bank_nest <- Bank %>%
             group_by(Nammes) %>%
             nest()

Paso 2: Declaramos a la base como time series (ts)

Bank_cat2_ts <- Bank_nest %>%
                mutate(data.ts = map(.x       = data, 
                                     .f       = tk_ts, 
                                     select   = -Fecha, 
                                     start    = 2018,
                                     freq     = 12))

Paso 3: Modelamos la Serie de Tiempo

Para el presente ejercicio aplicamos un modelo Holt-winters.

Bank_cat2_ts_fit <- Bank_cat2_ts %>%
                    mutate(fit.HW = map(data.ts, HoltWinters))

sw_tidy

Esta opción, nos permite ver los parámetros del modelo. En la opción key se debe anotar la variable categórica.

PM <- Bank_cat2_ts_fit  %>%
      mutate(tidy = map(fit.HW, sw_tidy)) %>%
      unnest(tidy) %>%
      spread(key = Nammes, value = estimate)

sw_glance

PSW <- Bank_cat2_ts_fit %>%
       mutate(glance = map(fit.HW, sw_glance)) %>%
       unnest(glance)
PSW
## # A tibble: 4 x 16
## # Groups:   Nammes [4]
##   Nammes data     data.ts       fit.HW     model.desc   sigma logLik AIC   BIC  
##   <chr>  <list>   <list>        <list>     <chr>        <dbl> <lgl>  <lgl> <lgl>
## 1 Di     <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 3.14e5 NA     NA    NA   
## 2 Gua    <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 3.68e5 NA     NA    NA   
## 3 Int    <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 2.24e5 NA     NA    NA   
## 4 Pic    <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 9.37e5 NA     NA    NA   
## # i 7 more variables: ME <dbl>, RMSE <dbl>, MAE <dbl>, MPE <dbl>, MAPE <dbl>,
## #   MASE <dbl>, ACF1 <dbl>

sw_tidy_decomp

PSWT <- Bank_cat2_ts_fit %>%
        mutate(decomp = map(fit.HW, sw_tidy_decomp, timetk_idx = TRUE,   rename_index = "date")) %>%
        unnest(decomp)

Pronóstico del Modelo

Bank_cat2_fcast <- Bank_cat2_ts_fit %>%
                   mutate(fcast.HW = map(fit.HW, forecast, h = 6, level=95))

Pronóstico del Modelo: Tidy

Bank_cat2_fcast <- Bank_cat2_fcast %>%
    mutate(sweep = map(fcast.HW, sw_sweep, fitted = FALSE, 
                       timetk_idx = TRUE)) %>%
    unnest(sweep)
Bank_cat2_fcast %>%
    ggplot(aes(x = index, y = Créditos, color = key, group = Nammes)) +
    geom_ribbon(aes(ymin = lo.95, ymax = hi.95), 
                fill = "#D5DBFF", color = NA, size = 0) +
    geom_line() +
    labs(title = "Pronóstico Cartera de Crédito",
         subtitle = "Modelo Holt-Winters",
         x = "", y = "Units") +
    scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
    scale_color_tq() +
    scale_fill_tq() +
    facet_wrap(~ Nammes, scales = "free_y", ncol = 3) +
    theme_tq() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Bank_cat2_fcast
## # A tibble: 248 x 10
## # Groups:   Nammes [4]
##    Nammes data     data.ts       fit.HW     fcast.HW   index      key   Créditos
##    <chr>  <list>   <list>        <list>     <list>     <date>     <chr>    <dbl>
##  1 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-01-01 actu~ 1335810.
##  2 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-02-01 actu~ 1336984.
##  3 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-03-01 actu~ 1400660.
##  4 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-04-01 actu~ 1428374.
##  5 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-05-01 actu~ 1458558.
##  6 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-06-01 actu~ 1488693.
##  7 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-07-01 actu~ 1524040.
##  8 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-08-01 actu~ 1633440.
##  9 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-09-01 actu~ 1696293.
## 10 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-10-01 actu~ 1717122.
## # i 238 more rows
## # i 2 more variables: lo.95 <dbl>, hi.95 <dbl>

Varios Modelos - Varias Series de Tiempo

load("D:/Documentos/Estadisticos/R/R_studio/Time_Series/Bank.RData")

#glimpse(Bank)
Bank %>%
ggplot(mapping = aes(x =Fecha, y= Créditos))+
  geom_line()+
  facet_wrap(~Nammes, scale ="free", ncol =2)+
  tidyquant::theme_tq()

Validación Cruzada

# get the min-max of the time index for each sample
test_end   <- max(Bank$Fecha)
test_start <- dmy("01-02-2022")

train_end   <- test_start %m-% months(1)
train_start <- min(Bank$Fecha)

interval_train   <- interval(start = train_start, end = train_end)
interval_intest  <- interval(start = test_start, end = test_end)

Con este gráfico, se identifica dentro de la base, aquella clasificada como train y aquella como test

Bank %>%
  mutate(sample = case_when(
    Fecha %within% interval_train ~ "train",
    Fecha %within% interval_intest ~ "test"
  )) %>%
  drop_na() %>%
  mutate(sample = factor(sample, levels = c("train", "test"))) %>%
  ggplot(aes(x = Fecha, y = Créditos, colour = sample)) +
    geom_line() +
    labs(x = NULL, y = NULL, colour = NULL) +
    facet_wrap(~ Nammes, scale = "free", ncol = 2) +
    tidyquant::theme_tq() +
    tidyquant::scale_colour_tq()

Procesamiento de la Información

Con el objetivo de prevenir que valores atípicos afecten al modelo, se rescala la información aplicando para este fin el paquete recipes.

Bank_I <- Bank %>%
           spread(Nammes, Créditos)
recipe <- recipe(~., filter(Bank_I, Fecha %within% interval_train)) %>% 
          step_sqrt(all_numeric()) %>%
          step_center(all_numeric()) %>%
          step_scale(all_numeric()) %>%
          prep()

Bank_I <- bake(recipe, Bank_I)

Luego, cambiamos la data a la forma original.

Bank_II <- Bank_I %>%
           gather(Bancos, Valores, -Fecha)

Procesamiento de la Base de datos usando la libreria recipe

# revert back function
rec_revert <- function(vector, recipe, varname) {

# store recipe values
rec_center <- recipe$steps[[2]]$means[varname]
rec_scale <-  recipe$steps[[3]]$sds[varname]

# convert back based on the recipe
results <- (vector * rec_scale + rec_center) ^ 2

# add additional adjustment if necessary
results <- round(results)

# return the results
results

}

Modelamiento y Pronóstico

# adjust by sample
Bank_III <- Bank_II %>%
            mutate(sample = case_when(
              Fecha %within% interval_train ~ "train",
              Fecha %within% interval_intest ~ "test"
            )) %>%
            drop_na()

# nest the data train
Bank_IV <- Bank_III %>% 
           group_by(Bancos, sample) %>%
           nest(.key = "data") %>%
           spread(sample, data)

Función

data_funs <- list(
  ts = function(x) ts(x$Valores, frequency = 12)
  )
data_funs <- data_funs %>% 
  rep(length(unique(Bank_III$Bancos))) %>%
  enframe("func_name", "func") %>%
  mutate(
    Bancos = sort(rep(unique(Bank_III$Bancos), length(unique(.$func_name))))
  )
Bank_V <- Bank_IV %>%
     left_join(data_funs)
## Joining with `by = join_by(Bancos)`

Preparación del Modelo de Serie de Tiempo

Corremos la lista con cinco modelos: auto.arima, ets, stlm. tbats y holt.winters

models <- list(
  auto.arima = function(x) auto.arima(x),
  ets = function(x) ets(x),
  stlm = function(x) stlm(x),
  tbats = function(x) tbats(x, use.box.cox = FALSE),
  holt.winter = function (x) HoltWinters(x, seasonal = "additive")
)

Es posible convertirlo en formato agrupado como el anterior ejemplo.

models <- models %>%
  rep(length(unique(Bank_III$Bancos))) %>%
  enframe("model_name", "model") %>%
  mutate(Bancos =
    sort(rep(unique(Bank_III$Bancos), length(unique(.$model_name))))
  )

Combinación de Modelos

Bank_VI <- Bank_V %>% 
           left_join(models)
## Joining with `by = join_by(Bancos)`

Ejecución del Modelo

Bank_VII <- Bank_VI %>% 
            mutate(
              params = map(train, ~ list(x = .x)),
              data = invoke_map(func, params),
              params = map(data, ~ list(x = .x)),
              fitted = invoke_map(model, params)
            ) %>%
            select(-data, -params)

Bank_VII
## # A tibble: 20 x 8
## # Groups:   Bancos [4]
##    Bancos test             train    func_name func   model_name model fitted    
##    <chr>  <list>           <list>   <chr>     <list> <chr>      <lis> <list>    
##  1 Di     <tibble [7 x 2]> <tibble> ts        <fn>   auto.arima <fn>  <fr_ARIMA>
##  2 Di     <tibble [7 x 2]> <tibble> ts        <fn>   ets        <fn>  <ets>     
##  3 Di     <tibble [7 x 2]> <tibble> ts        <fn>   stlm       <fn>  <stlm>    
##  4 Di     <tibble [7 x 2]> <tibble> ts        <fn>   tbats      <fn>  <tbats>   
##  5 Di     <tibble [7 x 2]> <tibble> ts        <fn>   holt.wint~ <fn>  <HltWntrs>
##  6 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   auto.arima <fn>  <fr_ARIMA>
##  7 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   ets        <fn>  <ets>     
##  8 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   stlm       <fn>  <stlm>    
##  9 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   tbats      <fn>  <bats>    
## 10 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   holt.wint~ <fn>  <HltWntrs>
## 11 Int    <tibble [7 x 2]> <tibble> ts        <fn>   auto.arima <fn>  <fr_ARIMA>
## 12 Int    <tibble [7 x 2]> <tibble> ts        <fn>   ets        <fn>  <ets>     
## 13 Int    <tibble [7 x 2]> <tibble> ts        <fn>   stlm       <fn>  <stlm>    
## 14 Int    <tibble [7 x 2]> <tibble> ts        <fn>   tbats      <fn>  <bats>    
## 15 Int    <tibble [7 x 2]> <tibble> ts        <fn>   holt.wint~ <fn>  <HltWntrs>
## 16 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   auto.arima <fn>  <fr_ARIMA>
## 17 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   ets        <fn>  <ets>     
## 18 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   stlm       <fn>  <stlm>    
## 19 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   tbats      <fn>  <bats>    
## 20 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   holt.wint~ <fn>  <HltWntrs>

Cálculo de los errores

Activar librería yardstick

Bank_error <- Bank_VII %>%
              mutate(error =
                            map(fitted, ~ forecast(.x, h = 7)) %>%
                            map2_dbl(test, ~ mae_vec(truth = .y$Valores, estimate = .x$mean))
                          ) %>%
                    arrange(Bancos, error) #arrange based on smallest error for each area

Bank_error
## # A tibble: 20 x 9
## # Groups:   Bancos [4]
##    Bancos test     train    func_name func   model_name  model fitted      error
##    <chr>  <list>   <list>   <chr>     <list> <chr>       <lis> <list>      <dbl>
##  1 Di     <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 0.164 
##  2 Di     <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.206 
##  3 Di     <tibble> <tibble> ts        <fn>   tbats       <fn>  <tbats>    0.303 
##  4 Di     <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.347 
##  5 Di     <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     0.348 
##  6 Gua    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     0.0988
##  7 Gua    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 0.155 
##  8 Gua    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.177 
##  9 Gua    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     0.205 
## 10 Gua    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.219 
## 11 Int    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     1.24  
## 12 Int    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 1.47  
## 13 Int    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      1.51  
## 14 Int    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 1.57  
## 15 Int    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     2.06  
## 16 Pic    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.297 
## 17 Pic    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.446 
## 18 Pic    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     1.04  
## 19 Pic    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 1.07  
## 20 Pic    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     1.30

Escogemos el modelo que tenga el menor error

Bank_best_model <-  Bank_error %>%
                    group_by(Bancos) %>% 
                    arrange(error) %>% 
                    slice(1) %>%  #choose the smallest error & best model for each area directly
                    ungroup() %>% 
                    select(Bancos, ends_with("_name"),error)

Bank_best_model <- Bank_best_model %>% 
                    select(-error) %>% 
                    left_join(Bank_error)%>% 
                    select(-error)
## Joining with `by = join_by(Bancos, func_name, model_name)`

Pasar a de lista a data.frame

Bank_test <-  Bank_error %>% 
              mutate(
                forecast =
                  map(fitted, ~ forecast(.x, h = 7)) %>%
                  map2(test, ~ tibble(
                    Fecha = .y$Fecha,
                    Valores = as.vector(.x$mean)
                  )),
                key = paste(func_name, model_name, sep = "-")
              )
  
Bank_test
## # A tibble: 20 x 11
## # Groups:   Bancos [4]
##    Bancos test     train    func_name func   model_name  model fitted      error
##    <chr>  <list>   <list>   <chr>     <list> <chr>       <lis> <list>      <dbl>
##  1 Di     <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 0.164 
##  2 Di     <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.206 
##  3 Di     <tibble> <tibble> ts        <fn>   tbats       <fn>  <tbats>    0.303 
##  4 Di     <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.347 
##  5 Di     <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     0.348 
##  6 Gua    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     0.0988
##  7 Gua    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 0.155 
##  8 Gua    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.177 
##  9 Gua    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     0.205 
## 10 Gua    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.219 
## 11 Int    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     1.24  
## 12 Int    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 1.47  
## 13 Int    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      1.51  
## 14 Int    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 1.57  
## 15 Int    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     2.06  
## 16 Pic    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.297 
## 17 Pic    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.446 
## 18 Pic    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     1.04  
## 19 Pic    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 1.07  
## 20 Pic    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     1.30  
## # i 2 more variables: forecast <list>, key <chr>
Bank_test_key <- Bank_test %>%
                      select(Bancos, key, actual = test, forecast) %>%
                      spread(key, forecast) %>%
                      gather(key, value, -Bancos)
Bank_test_unnest <- Bank_test_key %>%
                    unnest(value) %>%
                    mutate(Valores = rec_revert(Valores, recipe, Bancos))

Con el resultado en formato tbl, comparamos el pronóstico con la información observada

library(plotly)
## Warning: package 'plotly' was built under R version 4.1.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p2 <- Bank_test_unnest %>%
  ggplot(aes(x = Fecha, y = Valores, colour = key)) +
  geom_line(data = Bank_test_unnest %>% 
              filter(key == "actual"),aes(y = Valores),alpha = 0.3,size = 0.8)+
   geom_line(data = Bank_test_unnest %>% 
               filter(key != "actual"),aes(frame = key,col = key)) +
  labs(x = "", y = "Valores",title = "Model Prediction Comparison", frame = "Model") +
   facet_wrap(~ Bancos, scale = "free_y", ncol = 1) +
    tidyquant::theme_tq() +
    theme(legend.position = "none",axis.title.y = element_text(size = 12), axis.text.y = element_text(size = 7))
## Warning in geom_line(data = Bank_test_unnest %>% filter(key != "actual"), :
## Ignoring unknown aesthetics: frame
ggplotly(p2)
## Warning in p$x$data[firstFrame] <- p$x$frames[[1]]$data: número de items para
## para sustituir no es un múltiplo de la longitud del reemplazo

Modelo de selección automatizado

Bank_min_error <- Bank_error %>%
                  select(-fitted) %>% # remove unused
                  group_by(Bancos) %>%
                  filter(error == min(error)) %>%
                  ungroup()

Bank_min_error   #same selection with subarea_best_model
## # A tibble: 4 x 8
##   Bancos test             train    func_name func   model_name model   error
##   <chr>  <list>           <list>   <chr>     <list> <chr>      <list>  <dbl>
## 1 Di     <tibble [7 x 2]> <tibble> ts        <fn>   auto.arima <fn>   0.164 
## 2 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   tbats      <fn>   0.0988
## 3 Int    <tibble [7 x 2]> <tibble> ts        <fn>   stlm       <fn>   1.24  
## 4 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   ets        <fn>   0.297

Desempeño final del modelo

Bank_combine <- Bank_min_error %>%
                mutate(fulldata = map2(train, test, ~ bind_rows(.x, .y))) %>%
                select(Bancos, fulldata, everything(), -train, -test)
Bank_combine_nest <- Bank_combine %>%
  mutate(
    params = map(fulldata, ~ list(x = .x)),
    data = invoke_map(func, params),
    params = map(data, ~ list(x = .x)),
    fitted = invoke_map(model, params)
  ) %>%
  select(-data, -params)

Se puede incluir nivel de confianza al 95%, así como también el límite inferior y superior de la estimación

Bank_combine_forecast <- Bank_combine_nest %>%
  mutate(forecast =
    map(fitted, ~ forecast(.x, h = 6,level=95)) %>%
    map2(fulldata, ~ tibble(
      Fecha = timetk::tk_make_future_timeseries(.y$Fecha, 6),
      Inferior= as.vector(.x$lower),
      Valores = as.vector(.x$mean),
      Superior= as.vector(.x$upper)
    ))
  )

Data.frame los resultados

Bank_combine_forecast_unnest <- Bank_combine_forecast %>% 
                                select(Bancos, actual = fulldata, forecast) %>%
                                gather(key, value, -Bancos) %>%
                                unnest(value) %>%
                                mutate(Valores = rec_revert(Valores, recipe, Bancos))

Método gráfico

d <- Bank_combine_forecast_unnest %>%
  ggplot(aes(x = Fecha, y = Valores, colour = key)) +
    geom_line() +
    labs(x = NULL, y = NULL, colour = NULL) +
    facet_wrap(~ Bancos, scale = "free", ncol = 2) +
    theme_light() +
    tidyquant::scale_colour_tq()

ggplotly(d)

MAE results

mae_test_result <- Bank_error %>%
                   group_by(Bancos) %>% 
                   select(Bancos, error) %>% 
                   arrange(error) %>% 
                   slice(1) %>% 
                   ungroup()

mae_test_result <- mae_test_result %>%
                   summarise(Bancos = "Todos los bancos",
                              error = mean(error)) %>%
                    bind_rows(mae_test_result, .)

mae_test_result
## # A tibble: 5 x 2
##   Bancos            error
##   <chr>             <dbl>
## 1 Di               0.164 
## 2 Gua              0.0988
## 3 Int              1.24  
## 4 Pic              0.297 
## 5 Todos los bancos 0.450

Library modeltime

Referencias

# https://github.com/spsanderson/healthyverse_tsa
# https://business-science.github.io/modeltime/articles/nested-forecasting.html
id Store Dept Date Weekly_Sales IsHoliday Type Size Temperature Fuel_Price MarkDown1 MarkDown2 MarkDown3 MarkDown4 MarkDown5 CPI Unemployment
1_13 1 13 2011-02-25 37838.40 FALSE A 151315 62.90 3.065 NA NA NA NA NA 213.5356 7.742
1_93 1 93 2010-11-26 81660.77 TRUE A 151315 64.52 2.735 NA NA NA NA NA 211.7484 7.838
1_93 1 93 2011-09-02 73692.87 FALSE A 151315 87.83 3.533 NA NA NA NA NA 215.7971 7.962
1_93 1 93 2012-03-23 66710.40 FALSE A 151315 65.93 3.787 6118.56 9.48 4.97 426.72 3657.22 221.2864 7.348
1_13 1 13 2012-07-27 37423.21 FALSE A 151315 82.66 3.407 7146.90 389.02 1.59 10267.54 4325.19 221.9413 6.908

Paso I: seleccionar variables y cambiar nombres de variables

Con la opción set_names, es posible cambiar el nombre de las variables, de una forma más sencilla.

data_A <- walmart_sales_weekly %>%
          select(id, Date, Weekly_Sales) %>%
          set_names(c("ID", "Fecha", "Valores"))

Paso II: Gráfico

plot_time_series proviene de la libreria timetk

data_A %>%
  group_by(ID) %>%
  plot_time_series(Fecha, Valores, .interactive = T, .facet_ncol = 2, .title = "Series de Tiempo",
                   .smooth = FALSE)

Paso III: Preparación del conjunto de datos

Para este paso, consideramos las siguientes funciones que provienen de la librería modeltime.

  • extend_timeseries: “Defines how far into the future to extend the time series by each time series group.”

  • nest_timeseries: “Defines which observations should be split into the .future_data.”

  • split_nested_timeseries: “A wrapper for timetk::time_series_split() that generates training/testing splits from the .actual_data column.”

nested_data_tbl <- data_A %>%
    
    # 1. Extending: We'll predict 52 weeks into the future.
    extend_timeseries(
        .id_var        = ID,
        .date_var      = Fecha,
        .length_future = 52
    ) %>%
    
    # 2. Nesting: We'll group by ID, and create a future dataset
    #    that forecasts 52 weeks of extended data and
    #    an actual dataset that contains 104 weeks (2-years of data)
    nest_timeseries(
        .id_var        = ID,
        .length_future = 52,
        .length_actual = 52*2
    ) %>%
    
   # 3. Splitting: We'll take the actual data and create splits
   #    for accuracy and confidence interval estimation of 52 weeks (test)
   #    and the rest is training data
    split_nested_timeseries(
        .length_test = 52
    )

nested_data_tbl
## # A tibble: 7 x 4
##   ID    .actual_data       .future_data      .splits        
##   <fct> <list>             <list>            <list>         
## 1 1_1   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 2 1_3   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 3 1_8   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 4 1_13  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 5 1_38  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 6 1_93  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 7 1_95  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>

Paso IV:Crear flujos de trabajo - Tidymodels Workflows

Arima

rec_arima <- recipe(Valores ~ Fecha, extract_nested_train_split(nested_data_tbl)) 

wflw_auto_arima <- workflow() %>%
                   add_model(arima_reg(mode = "regression") %>% 
                             set_engine(engine = "auto_arima")) %>%
                   add_recipe(rec_arima)

Exponential

rec_exp <- recipe(Valores ~ Fecha, extract_nested_train_split(nested_data_tbl)) 

wflw_auto_exp <- workflow() %>%
                 add_model(exp_smoothing(mode = "regression") %>% 
                             set_engine(engine = "ets")) %>%
                   add_recipe(rec_exp)

Prophet

rec_prophet <- recipe(Valores ~ Fecha, extract_nested_train_split(nested_data_tbl)) 

wflw_prophet <- workflow() %>%
    add_model(
      prophet_reg("regression", seasonality_yearly = TRUE) %>% 
        set_engine("prophet")
    ) %>%
    add_recipe(rec_prophet)

XGBoost

step_timeseries_signature proviene de la libreria timetk. step_timeseries_signature creates a a specification of a recipe step that will convert date or date-time data into many features that can aid in machine learning with time-series data.

rec_xgb <-  recipe(Valores ~ ., extract_nested_train_split(nested_data_tbl)) %>%
            step_timeseries_signature(Fecha) %>%
            step_rm(Fecha) %>%
            step_zv(all_predictors()) %>%
            step_dummy(all_nominal_predictors(), one_hot = TRUE)

wflw_xgb <- workflow() %>%
            add_model(boost_tree("regression") %>% set_engine("xgboost")) %>%
            add_recipe(rec_xgb)

Paso V:Crear flujos de trabajo - Versión II

nested_modeltime_tbl <- modeltime_nested_fit(
  # Nested data 
  nested_data = nested_data_tbl,
  
  # Add workflows
  wflw_auto_arima,  
  wflw_auto_exp, 
  wflw_prophet,
  wflw_xgb
)
## 
Fitting models on training data... ====>-------------------------- 14% |
## ET...

Fitting models on training data... =========>--------------------- 29% |
## ET...

Fitting models on training data... =============>----------------- 43% |
## ET...

Fitting models on training data... =================>------------- 57% |
## ET...

Fitting models on training data... =====================>--------- 71% |
## ET...

Fitting models on training data... ==========================>---- 86% |
## ET...

 
nested_modeltime_tbl
## # Nested Modeltime Table
## 
## Trained on: .splits | Model Errors: [0]
## # A tibble: 7 x 5
##   ID    .actual_data       .future_data      .splits         .modeltime_tables 
##   <fct> <list>             <list>            <list>          <list>            
## 1 1_1   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 2 1_3   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 3 1_8   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 4 1_13  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 5 1_38  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 6 1_93  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 7 1_95  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>

Paso VI:Atributos registrados

Extract Nested Test Accuracy

nested_modeltime_tbl %>% 
  extract_nested_test_accuracy() %>%
  table_modeltime_accuracy(.interactive = F)
Accuracy Table
ID .model_id .model_desc .type mae mape mase smape rmse rsq
1_1 1 ARIMA Test 12316.89 61.60 2.43 46.58 13078.90 NA
1_1 2 ETSMNN Test 12316.32 61.60 2.43 46.58 13078.35 NA
1_1 3 PROPHET Test 10071.47 45.88 1.99 59.97 11776.87 0.38
1_1 4 XGBOOST Test 6232.23 25.28 1.23 24.55 9014.73 0.19
1_3 1 ARIMA Test 4798.16 30.21 1.86 30.47 8716.55 0.02
1_3 2 ETSMNN Test 4791.42 23.75 1.86 29.91 9573.41 NA
1_3 3 PROPHET Test 3539.80 29.87 1.37 25.46 4707.77 0.80
1_3 4 XGBOOST Test 3071.72 18.69 1.19 20.26 5078.50 0.79
1_8 1 ARIMA Test 3558.18 9.19 1.51 9.76 4112.95 NA
1_8 2 ETSMAN Test 4976.42 12.91 2.12 14.01 5567.39 0.12
1_8 3 PROPHET Test 4282.96 11.15 1.82 11.96 4845.08 0.00
1_8 4 XGBOOST Test 3608.13 9.39 1.53 9.95 4025.27 0.30
1_13 1 ARIMA Test 2308.38 5.68 0.85 5.85 3066.11 0.06
1_13 2 ETSANN Test 2601.22 6.37 0.96 6.62 3369.45 NA
1_13 3 PROPHET Test 6861.13 17.02 2.53 18.78 7309.61 0.15
1_13 4 XGBOOST Test 2758.50 6.85 1.02 7.12 3119.72 0.53
1_38 1 ARIMA Test 7922.19 9.78 0.68 10.05 10647.73 NA
1_38 2 ETSMNN Test 7924.30 9.80 0.68 10.05 10624.30 NA
1_38 3 PROPHET Test 26007.21 32.57 2.22 39.60 27938.83 0.08
1_38 4 XGBOOST Test 7492.22 9.23 0.64 9.58 9437.51 0.33
1_93 1 ARIMA Test 8292.72 10.42 0.83 11.02 10365.73 0.06
1_93 2 ETSANN Test 8703.63 10.89 0.88 11.44 10338.66 NA
1_93 3 PROPHET Test 17165.30 21.37 1.73 24.46 19123.17 0.03
1_93 4 XGBOOST Test 7140.45 8.98 0.72 9.53 8859.37 0.49
1_95 1 ARIMA Test 8945.63 7.10 1.08 7.29 11410.65 0.01
1_95 2 ETSMNN Test 8713.50 6.95 1.05 7.10 11224.78 NA
1_95 3 PROPHET Test 22836.06 18.30 2.75 20.37 24094.49 0.48
1_95 4 XGBOOST Test 10794.54 8.49 1.30 8.93 13102.82 0.11

Extract Nested Test Forecast

nested_modeltime_tbl %>% 
  extract_nested_test_forecast() %>%
  group_by(ID) %>%
  plot_modeltime_forecast(
    .facet_ncol  = 2,
    .interactive = FALSE
  )

Extract Nested Error Logs

No se observan errores en los modelos propuestos

nested_modeltime_tbl %>% 
  extract_nested_error_report()
## # A tibble: 0 x 4
## # i 4 variables: ID <fct>, .model_id <int>, .model_desc <chr>,
## #   .error_desc <chr>

Selección del mejor modelo

best_nested_modeltime_tbl <-  nested_modeltime_tbl %>%
                              modeltime_nested_select_best(
                                metric                = "rmse", 
                                minimize              = TRUE, 
                                filter_test_forecasts = TRUE)

Extract Nested Best Model Report

best_nested_modeltime_tbl %>%
  extract_nested_best_model_report()
## # Nested Modeltime Table
## 
## Trained on: .splits | Model Errors: [0]
## # A tibble: 7 x 10
##   ID    .model_id .model_desc .type   mae  mape  mase smape   rmse     rsq
##   <fct>     <int> <chr>       <chr> <dbl> <dbl> <dbl> <dbl>  <dbl>   <dbl>
## 1 1_1           4 XGBOOST     Test  6232. 25.3  1.23  24.5   9015.  0.191 
## 2 1_3           3 PROPHET     Test  3540. 29.9  1.37  25.5   4708.  0.796 
## 3 1_8           4 XGBOOST     Test  3608.  9.39 1.53   9.95  4025.  0.304 
## 4 1_13          1 ARIMA       Test  2308.  5.68 0.850  5.85  3066.  0.0649
## 5 1_38          4 XGBOOST     Test  7492.  9.23 0.640  9.58  9438.  0.332 
## 6 1_93          4 XGBOOST     Test  7140.  8.98 0.719  9.53  8859.  0.490 
## 7 1_95          2 ETSMNN      Test  8714.  6.95 1.05   7.10 11225. NA

Extract Nested Best Test Forecasts

best_nested_modeltime_tbl %>%
  extract_nested_test_forecast() %>%
  group_by(ID) %>%
  plot_modeltime_forecast(
    .facet_ncol  = 2,
    .interactive = TRUE
  )

Re ajustando el modelo

nested_modeltime_refit_tbl <- best_nested_modeltime_tbl %>%
                              modeltime_nested_refit(control = control_nested_refit(verbose = TRUE))
## i [1/7] Starting Modeltime Table: ID 1_1...
## v Model 4 Passed XGBOOST.
## v [1/7] Finished Modeltime Table: ID 1_1
## i [2/7] Starting Modeltime Table: ID 1_3...
## v Model 3 Passed PROPHET.
## v [2/7] Finished Modeltime Table: ID 1_3
## i [3/7] Starting Modeltime Table: ID 1_8...
## v Model 4 Passed XGBOOST.
## v [3/7] Finished Modeltime Table: ID 1_8
## i [4/7] Starting Modeltime Table: ID 1_13...
## v Model 1 Passed ARIMA(3,1,0)(0,0,1)[12].
## v [4/7] Finished Modeltime Table: ID 1_13
## i [5/7] Starting Modeltime Table: ID 1_38...
## v Model 4 Passed XGBOOST.
## v [5/7] Finished Modeltime Table: ID 1_38
## i [6/7] Starting Modeltime Table: ID 1_93...
## v Model 4 Passed XGBOOST.
## v [6/7] Finished Modeltime Table: ID 1_93
## i [7/7] Starting Modeltime Table: ID 1_95...
## v Model 2 Passed ETS(M,N,N).
## v [7/7] Finished Modeltime Table: ID 1_95
## Finished in: 7.304214 secs.
nested_modeltime_refit_tbl
## # Nested Modeltime Table
## 
## Trained on: .actual_data | Model Errors: [0]
## # A tibble: 7 x 5
##   ID    .actual_data       .future_data      .splits         .modeltime_tables 
##   <fct> <list>             <list>            <list>          <list>            
## 1 1_1   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 2 1_3   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 3 1_8   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 4 1_13  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 5 1_38  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 6 1_93  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 7 1_95  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>

Extracción de pronostico futuro

nested_modeltime_refit_tbl %>%
    extract_nested_future_forecast() %>%
    group_by(ID) %>%
    plot_modeltime_forecast(
      .interactive = FALSE,
      .facet_ncol  = 2
    )

Case II: One time series applied to multiple models

For this step, first at all we have to work with this libraries.

library(pacman)
p_load("modeltime","modeltime.resample","plotly","tidymodels","timetk")
date value
2010-01-01 999.9930
2010-02-01 1000.3700
2010-03-01 1001.0245
2010-04-01 1000.0097
2010-05-01 1001.3507
2010-06-01 999.4062
2010-07-01 999.3950
2010-08-01 999.5441

Split Data into Training & Test Sets

Split into training (80%) and test (20%). As Rob J Hyndman and George Athanasopoulos mentioned: “The size of the test is typically about 20% of the total sample, although this value depends on how long the sample is and how far ahead you want to forecast”.

time_series_split(): A convenience function to return a single time series split containing a training/testing sample. This function comes from the library timetk.

ff: Name of the data frame that will be analized.

assess: The length of the test set.

splits <- time_series_split(
  ff, 
  assess = "12 months",  # Test set = last 12 months
  cumulative = TRUE      # Training set = all data before test
)
## Using date_var: date

The tk_time_series_cv_plan(): function provides a simple interface to prepare a time series resample specification (rset) of either rolling_origin or time_series_cv class.

# Visualize split (optional)
splits %>%
  tk_time_series_cv_plan() %>%
  plot_time_series_cv_plan(date, value,
                           .title = "Time Series Cross Validation Plan")

Define & Train Multiple Models

arima_reg(): Is a way to generate a specification of an ARIMA model. exp_smoothing(): Is a way to generate a specification of an Exponential Smoothing model. prophet_reg(): Is a way to generate a specification of a PROPHET model.

# Model 1: Auto ARIMA
model_arima <-  arima_reg() %>%
                set_engine("auto_arima") %>%
                fit(value ~ date, training(splits))

# Model 2: ETS
model_ets <- exp_smoothing() %>%
             set_engine("ets") %>%
             fit(value ~ date, training(splits))

# Model 3: Prophet
model_prophet <- prophet_reg() %>%
                 set_engine("prophet") %>%
                 fit(value ~ date, training(splits))

Time Series Cross-Validation

Create a modeltime table

Creates a table of models

model_table <- modeltime_table(model_arima,
                               model_ets,
                               model_prophet)

Cross-validate on training data

Create rsample cross validation sets for time series. This function produces a sampling plan starting with the most recent time series observations, rolling backwards.

initial=60: Use the first 60 observations (e.g., 5 years if monthly data) to train the model.Ensures the model learns from a sufficiently long history.

assess=12: Evaluate the model on the next 12 observations (e.g., 1 year if monthly data). Simulates real-world forecasting where you predict future periods.Asses must be the same as set before.

resamples <- time_series_cv(
  training(splits),
  initial = 60,   # 5 years
  assess = 12,    # 1 year
  skip = 6        # 6-month increments
)
## Using date_var: date

Evaluate models

Resampled predictions are commonly used for: Analyzing accuracy and stability of models

resample_results <- model_table %>%
                    modeltime_fit_resamples(resamples,
                                            control = control_resamples(verbose = FALSE))

Rank models by RMSE

modeltime_resample_accuracy(): This is a wrapper for yardstick that simplifies time series regression accuracy metric calculations.

modeltime::table_modeltime_accuracy(): To format the results for reporting.

resample_results %>%
  modeltime_resample_accuracy() %>%
  table_modeltime_accuracy()

Select the Best Model & Forecast

best_model <- model_ets 

Forecast on test data

Step I: modeltime_calibrate Calibrates the model on the test set to generate residuals and confidence intervals. Takes your trained best_model. Uses the testing(splits) data (held-out data from initial_time_split()) to:

*Generate predictions

*Compare predictions to actual values (actual_data).

*Compute residuals (errors) and confidence intervals.

Step II: modeltime_forecast(new_data = testing(splits), actual_data = ff)

Generates forecasts for the test set (new_data) and combines them with actual data (actual_data) for visualization/analysis.

forecast_test <- best_model %>%
                 modeltime_calibrate(testing(splits)) %>%
                 modeltime_forecast(new_data = testing(splits),
                                    actual_data = ff)
## Converting to Modeltime Table.

Visualize forecast

forecast_test %>%
  plot_modeltime_forecast(.interactive = TRUE)

Final Forecast (Future Data)

future_df <- ff %>%
             future_frame(.length_out = 12, .date_var = date)

future_forecast <- best_model %>%
                   modeltime_calibrate(ff) %>%  # Refit on full dataset
                   modeltime_forecast(new_data = future_df,
                                      actual_data = ff)
## Converting to Modeltime Table.
future_forecast %>%
  plot_modeltime_forecast(
    .interactive = FALSE,  # Set TRUE for Plotly (zoomable)
    .conf_interval_show = TRUE,  # Default
    .conf_interval_alpha = 0.2   # Transparency
  )

Introducción libreria TSstudio

date value
2010-01-01 999.4895
2010-02-01 999.8698
2010-03-01 1001.7087
2010-04-01 1000.2705
2010-05-01 1000.3793

Ploting time series object

am <- ts_plot(df, 
        title = "Exportaciones cacao",
        Ytitle = "En USD", color = "#0C769E")

am

Seasonality analysis

ap <- ts_seasonal(df, type = "all")

ap

Training forecasting models

First at all, we have to transform data.frame as ts object.

ts_data <- ts(df$value, start = c(2010, 1), frequency = 12)

In second place, we Setting training and testing partitions

df_s  <- ts_split(ts.obj = ts_data, sample.out = 12)
train <- df_s$train
test  <- df_s$test

Forecasting with auto.arima

md <- auto.arima(train)
fc <- forecast(md, h = 12)

Plotting actual vs. fitted and forecasted

test_forecast(actual = ts_data, forecast.obj = fc, test = test)

Plotting the forecast

plot_forecast(fc,
               title = "Gráfico de pronóstico", color = "#0D3512")

Apply multiple models

methods <- list(ets1 = list(method = "ets",
                            method_arg = list(opt.crit = "lik"),
                            notes = "ETS model with opt.crit = lik"),
                ets2 = list(method = "ets",
                            method_arg = list(opt.crit = "amse"),
                            notes = "ETS model with opt.crit = amse"),
                arima1 = list(method = "arima",
                              method_arg = list(order = c(1,1,1)),
                              notes = "ARIMA(1,1,1)"),
                   hw = list(method = "HoltWinters",
                          method_arg = NULL,
                          notes = "HoltWinters Model"),
                tslm = list(method = "tslm",
                            method_arg = list(formula = input ~ trend + season),
                            notes = "tslm model with trend and seasonal components"))

Training the models with backtesting

train_model: Method for train test and compare multiple time series models using either one partition (i.e., sample out) or multipe partitions (backtesting)

input: A univariate time series object (ts class).

md <- train_model(input = ts_data,
                  methods = methods,
                  train_method = list(partitions = 6, 
                                      sample.out = 12, 
                                      space = 3),
                  horizon = 12,
                  error = "MAPE")
## # A tibble: 5 x 7
##   model_id model   notes avg_mape avg_rmse `avg_coverage_80%` `avg_coverage_95%`
##   <chr>    <chr>   <chr>    <dbl>    <dbl>              <dbl>              <dbl>
## 1 tslm     tslm    tslm~ 0.000629    0.810              0.875              0.986
## 2 ets2     ets     ETS ~ 0.000630    0.794              0.861              0.958
## 3 ets1     ets     ETS ~ 0.000638    0.795              0.931              0.958
## 4 arima1   arima   ARIM~ 0.000693    0.920              0.847              0.958
## 5 hw       HoltWi~ Holt~ 0.000705    0.892              0.931              0.986
plot_model(md)