library(pacman)

p_load("lubridate","tidyverse", "data.table", "forecast", "scales", "kableExtra","recipes","sweep",   "tidyquant", "timetk", "magrittr", "yardstick", "modeltime", "tidymodels","TSstudio")

Introducción

set.seed(345)

n <- 200

datos <- tibble    (Paris   = cumsum(sample(c(500, 3500), n, TRUE)),
                    Madrid  = cumsum(sample(c(700, 4500), n, TRUE)),
                    Rio     = cumsum(sample(c(900, 6500), n, TRUE)))

Pronóstico simúltaneo de tres bases de datos

La línea Arima_Model_Forecast permite obtener la proyección de las tres bases de datos, junto con los intervalos de confianza al 95% nivel de confianza, y los guarda en una lista por cada base de datos.
La línea Forecast permite obtener la proyección de las tres bases de datos, junto con los intervalos de confianza al 95% nivel de confianza.

#Pronóstico

Base_A <- ts(datos[1:3],start=c(2011),frequency=12)

forecasting_function_ARIMA <- function(Z, hrz = 16) {
                timeseries <- msts(Z, start = 2011, seasonal.periods = 12)
                forecast   <- auto.arima(timeseries)
}

Forecasting_list_ARIMA <- lapply(X = Base_A, forecasting_function_ARIMA)
Arima_Model_Forecast   <- lapply(Forecasting_list_ARIMA, forecast,h=6,level=95)

Forecast      <- lapply(Forecasting_list_ARIMA, forecast,h=6,level=95)%>%data.frame()

Del ejercicio anterior, si queremos modificar la estrutura de los encabezados de la matriz de proyecciones, aplicamos la siguiente rutina:

Forecast      <- lapply(Forecasting_list_ARIMA, forecast,h=6,level=95)%>%data.frame()%>%
                  rownames_to_column(var = "FECHA")%>% as_tibble()%>%
                  rename_with(stringr::str_replace, pattern = ".Point.Forecast", replacement = " Pronóstico", 
                              matches(".Point.Forecast"))%>%
                  rename_with(stringr::str_replace, pattern = ".Lo.95", replacement = " Límite Inf", 
                              matches(".Lo.95"))%>%
                  rename_with(stringr::str_replace, pattern = ".Hi.95", replacement = " Límite Sup", 
                              matches(".Hi.95"))%>%
                  rename_at(1:4,~ str_to_title(.))

Gráficos simultáneos

y <- datos
colnames <- dimnames(y)[[2]]

TEMAD <- theme(axis.line   = element_line(size=0.5,  colour = "black"),
               plot.title  = element_text(hjust=0.5, size = 12, face = "bold"),
               legend.text = element_text(size = 20),
               plot.caption       =  element_text(size = 8),
               legend.position    = 'none',
               legend.title       = element_blank(),
               axis.text.x        = element_text(size = 11),
               axis.text.y        = element_text(size = 11),
               text               = element_text(size = 11),
               panel.grid.major   = element_blank(),
               panel.grid.minor   = element_blank(),
               panel.border       = element_blank(),
               panel.background   = element_blank())

## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## i Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

GRA <- lapply(seq_along(datos),function(i) {
       autoplot(fit <- forecast(auto.arima(ts(datos[[i]],start=c(2011,1),
                                       end = c(2027,8),frequency = 12)),6,
                                level=95),
                ts.colour = "dodgerblue4",predict.colour = "#999999", 
                predict.linetype = "dashed")+
    labs(x = 'Años', y = "Número de atenciones") +
    ggtitle(colnames[i])+ 
    theme(plot.title = element_text(hjust = 0.5))+
    scale_y_continuous(labels=comma) + TEMAD+
    coord_cartesian(xlim = c(2011, 2027.25))
  
} 
)

GRA <- set_names(GRA, colnames)
GRA

## $Paris

## 
## $Madrid

## 
## $Rio

Creación de fechas:

Ejemplo I:

n <- 6

set <- tibble(Vinces  = cumsum(sample(c(500, 3500), n, TRUE)))%>%
       mutate(FECHA   = seq(as.Date("2022-8-1"), as.Date("2023-1-1"), by = "months"))  

set

## # A tibble: 6 x 2
##   Vinces FECHA     
##    <dbl> <date>    
## 1   3500 2022-08-01
## 2   7000 2022-09-01
## 3   7500 2022-10-01
## 4   8000 2022-11-01
## 5  11500 2022-12-01
## 6  15000 2023-01-01

Ejemplo II:

n <- 20

set_A <- tibble(Babahoyo  = cumsum(sample(c(800, 4500), n, TRUE)))%>%
         mutate(FECHA= seq(as.Date("2018-1-1"),by = "months",length.out = 20))
head(set_A,5)

## # A tibble: 5 x 2
##   Babahoyo FECHA     
##      <dbl> <date>    
## 1      800 2018-01-01
## 2     1600 2018-02-01
## 3     6100 2018-03-01
## 4    10600 2018-04-01
## 5    11400 2018-05-01

Ejemplo III:

Crear variables Fecha que comience con el día final de cada mes

n <- 20

set_A <- tibble(Napo  = cumsum(sample(c(800, 4500), n, TRUE)))%>%
         mutate(FECHA= seq(as.Date("2018-1-1"),by = "months",length.out = 20)-1)
head(set_A,5)

## # A tibble: 5 x 2
##    Napo FECHA     
##   <dbl> <date>    
## 1  4500 2017-12-31
## 2  9000 2018-01-31
## 3  9800 2018-02-28
## 4 10600 2018-03-31
## 5 15100 2018-04-30

Ejemplo IV:

Creación de fechas con la librería lubridate

fecha_de_analisis <- dmy("31-12-2022")

Ejemplo VI:

cambiar Formato fechas

## # A tibble: 3 x 2
##    Napo FECHA     
##   <dbl> <date>    
## 1   800 2017-12-31
## 2  1600 2018-01-31
## 3  6100 2018-02-28

Para formato de fechas, se recomienda el siguiente link: https://www.geeksforgeeks.org/how-to-use-date-formats-in-r/

%b: Abreviación de mes %y: Abreviación de año

AA_set <- A_set %>%
          mutate(FECHA=label_date(format = "%b - %y")(FECHA))

head(AA_set)

## # A tibble: 3 x 2
##    Napo FECHA    
##   <dbl> <chr>    
## 1   800 dic. - 17
## 2  1600 ene. - 18
## 3  6100 feb. - 18

Ejercicio Completo

Jamaica	Canada	Honduras
-0.3465840	6.303960	11.321528
0.6276359	3.953276	12.066658
0.6437831	4.370569	11.091397
-0.3125563	4.856155	12.298446
1.0578810	5.592618	11.263103
0.3202071	4.931182	13.604049
-0.0607272	4.375798	12.169801
1.4481094	5.346821	13.492133
0.5840688	6.112750	11.751682
-0.5442221	4.188722	13.107554
0.9309310	4.616367	12.809937
-0.2657284	6.157338	11.740550
0.5602174	3.981209	11.738767
-0.1732012	5.980474	12.988104
-0.1593099	5.141921	12.105388
-0.2498411	4.057758	12.214577
1.1909899	4.062254	11.143715
-1.6425621	5.903888	11.533752
-1.2030146	5.369082	13.552865
-0.5381974	5.312796	12.183695
-0.5510144	6.083338	12.100343
-0.1480458	5.207964	14.040965
-0.5208616	4.447765	13.231016
-1.5645956	5.358313	13.398592
0.8559413	6.631799	9.574225
-0.6693584	5.021633	11.259087
-1.1820668	5.756193	12.744493
0.8058029	3.638682	11.141806
-1.2877233	4.391235	11.619835
-0.2914974	4.788253	12.831156

Creamos una función, que identifique valores atípicos y lo transforme por NA. Es importante recalcar que está función es disponible si el conjunto de datos es data.frame, no funciona para tibble.

atipicos = function(df) {
    for (i in 1:ncol(df)) {
        dat = df[which(df[,i] > quantile(df[,i], .1) & df[,i] < quantile(df[,i], .9)),i]
        mean = mean(dat)
        sd = sd(dat)
        df[which(   abs((df[,i]) - mean) > (sd * 3)), i] = NA
    }
    return(df)
}

Transformamos a lista y por data.frame eliminamos NA. En este caso la base de datos denominado Honduras tendrá una observación menos.

AA <- atipicos(country) %>%
      pivot_longer(cols  = Jamaica:Honduras,
                   names_to = c("Países"),
                   values_to = "Saldos")

AB <- split(AA, AA$Países)

AB <- map(AB, ~filter(.x,if_all(everything(),~!is.na(.))))

Para correr auto.arima por listas, se aplica la siguiente rutina

mod <- function(x){
  x$Saldos %>%
    ts(frequency = 12, start = c(2019,1)) %>%
    auto.arima() %>%
    forecast(h=8, level=95) %>%
    as.data.frame()%>%
    mutate_if(is.numeric,round,digits=2)%>%
    rownames_to_column(var="Fecha") %>%
    as_tibble()
}

AC <- lapply(AB, mod)

Introducción libreria sweep

Aplicación de la libreria sweep

## New names:
## * `` -> `...1`
## * `` -> `...225`
## * `` -> `...226`

## # A tibble: 5 x 2
##   FECHA       Saldos
##   <date>       <dbl>
## 1 2014-12-01 103787.
## 2 2015-01-01 101090.
## 3 2015-02-01 107776.
## 4 2015-03-01 115545.
## 5 2015-04-01 119154.

El primer paso consiste en tener la base de datos junto con una variable de tipo Date

BA_ts <- tk_ts(BA, start = 2014, freq = 12, silent = TRUE)

fit_ets <- BA_ts %>%
    ets()

Retorna los parámetros del modelo

library(sweep)

sw_tidy(fit_ets)

## # A tibble: 4 x 2
##   term       estimate
##   <chr>         <dbl>
## 1 alpha      1.00    
## 2 beta       0.000100
## 3 l     107325.      
## 4 b       1500.

Retorna la calidad del modelo

sw_glance(fit_ets)

## # A tibble: 1 x 12
##   model.desc sigma logLik   AIC   BIC    ME  RMSE   MAE    MPE  MAPE  MASE
##   <chr>      <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1 ETS(A,A,N) 3802.  -745. 1501. 1512. -116. 3694. 2928. -0.194  2.13 0.192
## # i 1 more variable: ACF1 <dbl>

w_augment() retorna los valores reales, ajustados y residuos del modelo

augment_fit_ets <- sw_augment(fit_ets)
head(augment_fit_ets,5)

## # A tibble: 5 x 4
##   index     .actual .fitted .resid
##   <yearmon>   <dbl>   <dbl>  <dbl>
## 1 ene. 2014 103787. 108825. -5038.
## 2 feb. 2014 101090. 105288. -4198.
## 3 mar. 2014 107776. 102589.  5186.
## 4 abr. 2014 115545. 109275.  6270.
## 5 may. 2014 119154. 117045.  2109.

Pronóstico del modelo

fcast_ets <- fit_ets %>%
             forecast(h = 12, level=95)

Tidy the forecast object

Fore <- sw_sweep(fcast_ets, fitted = TRUE)

Gráfico

sw_sweep(fcast_ets, timetk_idx = TRUE) %>%
    ggplot(aes(x = index, y = Saldos, color = key)) +
    geom_ribbon(aes(ymin = lo.95, ymax = hi.95), 
                fill = "#D5DBFF", color = NA, size = 0) +
    #geom_ribbon(aes(ymin = lo.80, ymax = hi.80, fill = key), 
     #           fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
    geom_line(size = 1) +
    labs(title = "Pronóstico- Obligaciones con el Público", x = "", y = "USD", 
         subtitle = "Irregular Time Index") +
    scale_y_continuous(labels = scales::dollar) +
    scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
    scale_color_tq() +
    scale_fill_tq() +
    theme_tq()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Aplicación de Varios modelos.

Tomando el mismo modelo del punto anterior

BA_ts_II <- BA %>%
            tk_ts(select = -FECHA, start = c(1990, 3), freq = 4)

Creamos una lista por cada uno de los modelos a emplear

models_list <- list(
    auto.arima = list(
        y = BA_ts_II
        ),
    ets = list(
        y = BA_ts_II,
        damped = TRUE
    ),
    bats = list(
        y = BA_ts_II
    )
)

Convertimos a datos la lista previamente creada

models_tbl <- enframe(models_list, name = "f", value = "params")

models_tbl_fit <- models_tbl %>%
                  mutate(fit = invoke_map(f, params))

## Warning: There was 1 warning in `mutate()`.
## i In argument: `fit = invoke_map(f, params)`.
## Caused by warning:
## ! `invoke_map()` was deprecated in purrr 1.0.0.
## i Please use map() + exec() instead.

Inspección del Modelo

AA <- models_tbl_fit %>%
      mutate(tidy = map(fit, sw_tidy)) %>%
      unnest(tidy) %>%
      spread(key = f, value = estimate)

SW glance

AB <- models_tbl_fit %>%
      mutate(glance = map(fit, sw_glance)) %>%
      unnest(glance, .drop = TRUE)

## Warning: The `.drop` argument of `unnest()` is deprecated as of tidyr 1.0.0.
## i All list-columns are now preserved.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Pronóstico del Modelo

models_tbl_fcast <- models_tbl_fit %>%
    mutate(fcast = map(fit, forecast, h = 6, level=95))
models_tbl_fcast

## # A tibble: 3 x 4
##   f          params           fit        fcast     
##   <chr>      <list>           <list>     <list>    
## 1 auto.arima <named list [1]> <fr_ARIMA> <forecast>
## 2 ets        <named list [2]> <ets>      <forecast>
## 3 bats       <named list [1]> <bats>     <forecast>

Tidyng the forcast

models_tbl_fcast_tidy <- models_tbl_fcast %>%
                          mutate(sweep = map(fcast, sw_sweep, fitted = FALSE, timetk_idx = TRUE, rename_index = "date"))
models_tbl_fcast_tidy

## # A tibble: 3 x 5
##   f          params           fit        fcast      sweep            
##   <chr>      <list>           <list>     <list>     <list>           
## 1 auto.arima <named list [1]> <fr_ARIMA> <forecast> <tibble [78 x 5]>
## 2 ets        <named list [2]> <ets>      <forecast> <tibble [78 x 5]>
## 3 bats       <named list [1]> <bats>     <forecast> <tibble [78 x 5]>

Obtenemos el resultado por data.frame

AD <- models_tbl_fcast_tidy %>%
                        unnest(sweep)

Gráficamos el Modelo

models_tbl_fcast_tidy %>%
    unnest(sweep) %>%
    ggplot(aes(x = date, y = Saldos, color = key, group = f)) +
    geom_ribbon(aes(ymin = lo.95, ymax = hi.95),
                fill = "#D5DBFF", color = NA, size = 0) +
 #   geom_ribbon(aes(ymin = lo.80, ymax = hi.80, fill = key),
 #               fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
    geom_line(size = 1) +
    facet_wrap(~f, nrow = 3) +
    labs(title = "Pronóstico Saldos",
         subtitle = "Pronóstico aplicando varios modelos: ARIMA, BATS, ETS",
         x = "", y = "Price") +
    scale_y_continuous(labels = scales::dollar) +
    #scale_x_date(date_breaks = "5 years", date_labels = "%Y") +
    theme_tq() +
    scale_color_tq()

Un modelo para varios series de tiempo

Previo al ejercicio de pronóstico, aplicamos un pequeño ajuste a la base de datos.

files <- list.files(pattern=".xlsx")
files <- files[c(1:4)]
#print(files)

GH <- list()

for (i in seq_along(files)) {
     GH[[i]] <- files[i] %>%
       map_df(
         ~read_xlsx(path=files[i], sheet= "BAL",skip = 4))
}

GH <- set_names(GH, files)

GHA <- map(GH, ~select(.x,CÓDIGO, CUENTA, `43131`:`44804`))    %>%
      map(~dplyr::filter(.x,CUENTA == "CARTERA DE CRÉDITOS")) %>%
      map(~pivot_longer(.x,cols    = `43131`:`44804`,
                         names_to  = c("Años"),
                         values_to = "Créditos"))%>%
      map(~mutate(.x,Fecha= seq(as.Date("2018-1-1"),by =  "months",length.out = nrow(.))))%>%
      map(~select(.x,Fecha,Créditos))

# Ponemos en cada data.frame, el nombre de cada entidad

Nammes <- c("Di", "Gua", "Int", "Pic")

GHA <- map2(GHA, Nammes, ~ .x %>% 
            mutate(Nammes = .y, .before = 1)) 

Bank <- bind_rows(GHA)

#save(Bank, file = "Bank.RData")

Paso 1: Nest data.frame

Bank_nest <- Bank %>%
             group_by(Nammes) %>%
             nest()

Paso 2: Declaramos a la base como time series (ts)

Bank_cat2_ts <- Bank_nest %>%
                mutate(data.ts = map(.x       = data, 
                                     .f       = tk_ts, 
                                     select   = -Fecha, 
                                     start    = 2018,
                                     freq     = 12))

Paso 3: Modelamos la Serie de Tiempo

Para el presente ejercicio aplicamos un modelo Holt-winters.

Bank_cat2_ts_fit <- Bank_cat2_ts %>%
                    mutate(fit.HW = map(data.ts, HoltWinters))

sw_tidy

Esta opción, nos permite ver los parámetros del modelo. En la opción key se debe anotar la variable categórica.

PM <- Bank_cat2_ts_fit  %>%
      mutate(tidy = map(fit.HW, sw_tidy)) %>%
      unnest(tidy) %>%
      spread(key = Nammes, value = estimate)

sw_glance

PSW <- Bank_cat2_ts_fit %>%
       mutate(glance = map(fit.HW, sw_glance)) %>%
       unnest(glance)
PSW

## # A tibble: 4 x 16
## # Groups:   Nammes [4]
##   Nammes data     data.ts       fit.HW     model.desc   sigma logLik AIC   BIC  
##   <chr>  <list>   <list>        <list>     <chr>        <dbl> <lgl>  <lgl> <lgl>
## 1 Di     <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 3.14e5 NA     NA    NA   
## 2 Gua    <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 3.68e5 NA     NA    NA   
## 3 Int    <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 2.24e5 NA     NA    NA   
## 4 Pic    <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 9.37e5 NA     NA    NA   
## # i 7 more variables: ME <dbl>, RMSE <dbl>, MAE <dbl>, MPE <dbl>, MAPE <dbl>,
## #   MASE <dbl>, ACF1 <dbl>

sw_tidy_decomp

PSWT <- Bank_cat2_ts_fit %>%
        mutate(decomp = map(fit.HW, sw_tidy_decomp, timetk_idx = TRUE,   rename_index = "date")) %>%
        unnest(decomp)

Pronóstico del Modelo

Bank_cat2_fcast <- Bank_cat2_ts_fit %>%
                   mutate(fcast.HW = map(fit.HW, forecast, h = 6, level=95))

Pronóstico del Modelo: Tidy

Bank_cat2_fcast <- Bank_cat2_fcast %>%
    mutate(sweep = map(fcast.HW, sw_sweep, fitted = FALSE, 
                       timetk_idx = TRUE)) %>%
    unnest(sweep)

Bank_cat2_fcast %>%
    ggplot(aes(x = index, y = Créditos, color = key, group = Nammes)) +
    geom_ribbon(aes(ymin = lo.95, ymax = hi.95), 
                fill = "#D5DBFF", color = NA, size = 0) +
    geom_line() +
    labs(title = "Pronóstico Cartera de Crédito",
         subtitle = "Modelo Holt-Winters",
         x = "", y = "Units") +
    scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
    scale_color_tq() +
    scale_fill_tq() +
    facet_wrap(~ Nammes, scales = "free_y", ncol = 3) +
    theme_tq() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Bank_cat2_fcast

## # A tibble: 248 x 10
## # Groups:   Nammes [4]
##    Nammes data     data.ts       fit.HW     fcast.HW   index      key   Créditos
##    <chr>  <list>   <list>        <list>     <list>     <date>     <chr>    <dbl>
##  1 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-01-01 actu~ 1335810.
##  2 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-02-01 actu~ 1336984.
##  3 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-03-01 actu~ 1400660.
##  4 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-04-01 actu~ 1428374.
##  5 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-05-01 actu~ 1458558.
##  6 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-06-01 actu~ 1488693.
##  7 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-07-01 actu~ 1524040.
##  8 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-08-01 actu~ 1633440.
##  9 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-09-01 actu~ 1696293.
## 10 Di     <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-10-01 actu~ 1717122.
## # i 238 more rows
## # i 2 more variables: lo.95 <dbl>, hi.95 <dbl>

Varios Modelos - Varias Series de Tiempo

load("D:/Documentos/Estadisticos/R/R_studio/Time_Series/Bank.RData")

#glimpse(Bank)
Bank %>%
ggplot(mapping = aes(x =Fecha, y= Créditos))+
  geom_line()+
  facet_wrap(~Nammes, scale ="free", ncol =2)+
  tidyquant::theme_tq()

Validación Cruzada

# get the min-max of the time index for each sample
test_end   <- max(Bank$Fecha)
test_start <- dmy("01-02-2022")

train_end   <- test_start %m-% months(1)
train_start <- min(Bank$Fecha)

interval_train   <- interval(start = train_start, end = train_end)
interval_intest  <- interval(start = test_start, end = test_end)

Con este gráfico, se identifica dentro de la base, aquella clasificada como train y aquella como test

Bank %>%
  mutate(sample = case_when(
    Fecha %within% interval_train ~ "train",
    Fecha %within% interval_intest ~ "test"
  )) %>%
  drop_na() %>%
  mutate(sample = factor(sample, levels = c("train", "test"))) %>%
  ggplot(aes(x = Fecha, y = Créditos, colour = sample)) +
    geom_line() +
    labs(x = NULL, y = NULL, colour = NULL) +
    facet_wrap(~ Nammes, scale = "free", ncol = 2) +
    tidyquant::theme_tq() +
    tidyquant::scale_colour_tq()

Procesamiento de la Información

Con el objetivo de prevenir que valores atípicos afecten al modelo, se rescala la información aplicando para este fin el paquete recipes.

Bank_I <- Bank %>%
           spread(Nammes, Créditos)

recipe <- recipe(~., filter(Bank_I, Fecha %within% interval_train)) %>% 
          step_sqrt(all_numeric()) %>%
          step_center(all_numeric()) %>%
          step_scale(all_numeric()) %>%
          prep()

Bank_I <- bake(recipe, Bank_I)

Luego, cambiamos la data a la forma original.

Bank_II <- Bank_I %>%
           gather(Bancos, Valores, -Fecha)

Procesamiento de la Base de datos usando la libreria recipe

# revert back function
rec_revert <- function(vector, recipe, varname) {

# store recipe values
rec_center <- recipe$steps[[2]]$means[varname]
rec_scale <-  recipe$steps[[3]]$sds[varname]

# convert back based on the recipe
results <- (vector * rec_scale + rec_center) ^ 2

# add additional adjustment if necessary
results <- round(results)

# return the results
results

}

Modelamiento y Pronóstico

# adjust by sample
Bank_III <- Bank_II %>%
            mutate(sample = case_when(
              Fecha %within% interval_train ~ "train",
              Fecha %within% interval_intest ~ "test"
            )) %>%
            drop_na()

# nest the data train
Bank_IV <- Bank_III %>% 
           group_by(Bancos, sample) %>%
           nest(.key = "data") %>%
           spread(sample, data)

Función

data_funs <- list(
  ts = function(x) ts(x$Valores, frequency = 12)
  )

data_funs <- data_funs %>% 
  rep(length(unique(Bank_III$Bancos))) %>%
  enframe("func_name", "func") %>%
  mutate(
    Bancos = sort(rep(unique(Bank_III$Bancos), length(unique(.$func_name))))
  )

Bank_V <- Bank_IV %>%
     left_join(data_funs)

## Joining with `by = join_by(Bancos)`

Preparación del Modelo de Serie de Tiempo

Corremos la lista con cinco modelos: auto.arima, ets, stlm. tbats y holt.winters

models <- list(
  auto.arima = function(x) auto.arima(x),
  ets = function(x) ets(x),
  stlm = function(x) stlm(x),
  tbats = function(x) tbats(x, use.box.cox = FALSE),
  holt.winter = function (x) HoltWinters(x, seasonal = "additive")
)

Es posible convertirlo en formato agrupado como el anterior ejemplo.

models <- models %>%
  rep(length(unique(Bank_III$Bancos))) %>%
  enframe("model_name", "model") %>%
  mutate(Bancos =
    sort(rep(unique(Bank_III$Bancos), length(unique(.$model_name))))
  )

Combinación de Modelos

Bank_VI <- Bank_V %>% 
           left_join(models)

## Joining with `by = join_by(Bancos)`

Ejecución del Modelo

Bank_VII <- Bank_VI %>% 
            mutate(
              params = map(train, ~ list(x = .x)),
              data = invoke_map(func, params),
              params = map(data, ~ list(x = .x)),
              fitted = invoke_map(model, params)
            ) %>%
            select(-data, -params)

Bank_VII

## # A tibble: 20 x 8
## # Groups:   Bancos [4]
##    Bancos test             train    func_name func   model_name model fitted    
##    <chr>  <list>           <list>   <chr>     <list> <chr>      <lis> <list>    
##  1 Di     <tibble [7 x 2]> <tibble> ts        <fn>   auto.arima <fn>  <fr_ARIMA>
##  2 Di     <tibble [7 x 2]> <tibble> ts        <fn>   ets        <fn>  <ets>     
##  3 Di     <tibble [7 x 2]> <tibble> ts        <fn>   stlm       <fn>  <stlm>    
##  4 Di     <tibble [7 x 2]> <tibble> ts        <fn>   tbats      <fn>  <tbats>   
##  5 Di     <tibble [7 x 2]> <tibble> ts        <fn>   holt.wint~ <fn>  <HltWntrs>
##  6 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   auto.arima <fn>  <fr_ARIMA>
##  7 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   ets        <fn>  <ets>     
##  8 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   stlm       <fn>  <stlm>    
##  9 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   tbats      <fn>  <bats>    
## 10 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   holt.wint~ <fn>  <HltWntrs>
## 11 Int    <tibble [7 x 2]> <tibble> ts        <fn>   auto.arima <fn>  <fr_ARIMA>
## 12 Int    <tibble [7 x 2]> <tibble> ts        <fn>   ets        <fn>  <ets>     
## 13 Int    <tibble [7 x 2]> <tibble> ts        <fn>   stlm       <fn>  <stlm>    
## 14 Int    <tibble [7 x 2]> <tibble> ts        <fn>   tbats      <fn>  <bats>    
## 15 Int    <tibble [7 x 2]> <tibble> ts        <fn>   holt.wint~ <fn>  <HltWntrs>
## 16 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   auto.arima <fn>  <fr_ARIMA>
## 17 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   ets        <fn>  <ets>     
## 18 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   stlm       <fn>  <stlm>    
## 19 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   tbats      <fn>  <bats>    
## 20 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   holt.wint~ <fn>  <HltWntrs>

Cálculo de los errores

Activar librería yardstick

Bank_error <- Bank_VII %>%
              mutate(error =
                            map(fitted, ~ forecast(.x, h = 7)) %>%
                            map2_dbl(test, ~ mae_vec(truth = .y$Valores, estimate = .x$mean))
                          ) %>%
                    arrange(Bancos, error) #arrange based on smallest error for each area

Bank_error

## # A tibble: 20 x 9
## # Groups:   Bancos [4]
##    Bancos test     train    func_name func   model_name  model fitted      error
##    <chr>  <list>   <list>   <chr>     <list> <chr>       <lis> <list>      <dbl>
##  1 Di     <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 0.164 
##  2 Di     <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.206 
##  3 Di     <tibble> <tibble> ts        <fn>   tbats       <fn>  <tbats>    0.303 
##  4 Di     <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.347 
##  5 Di     <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     0.348 
##  6 Gua    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     0.0988
##  7 Gua    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 0.155 
##  8 Gua    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.177 
##  9 Gua    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     0.205 
## 10 Gua    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.219 
## 11 Int    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     1.24  
## 12 Int    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 1.47  
## 13 Int    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      1.51  
## 14 Int    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 1.57  
## 15 Int    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     2.06  
## 16 Pic    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.297 
## 17 Pic    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.446 
## 18 Pic    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     1.04  
## 19 Pic    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 1.07  
## 20 Pic    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     1.30

Escogemos el modelo que tenga el menor error

Bank_best_model <-  Bank_error %>%
                    group_by(Bancos) %>% 
                    arrange(error) %>% 
                    slice(1) %>%  #choose the smallest error & best model for each area directly
                    ungroup() %>% 
                    select(Bancos, ends_with("_name"),error)

Bank_best_model <- Bank_best_model %>% 
                    select(-error) %>% 
                    left_join(Bank_error)%>% 
                    select(-error)

## Joining with `by = join_by(Bancos, func_name, model_name)`

Pasar a de lista a data.frame

Bank_test <-  Bank_error %>% 
              mutate(
                forecast =
                  map(fitted, ~ forecast(.x, h = 7)) %>%
                  map2(test, ~ tibble(
                    Fecha = .y$Fecha,
                    Valores = as.vector(.x$mean)
                  )),
                key = paste(func_name, model_name, sep = "-")
              )
  
Bank_test

## # A tibble: 20 x 11
## # Groups:   Bancos [4]
##    Bancos test     train    func_name func   model_name  model fitted      error
##    <chr>  <list>   <list>   <chr>     <list> <chr>       <lis> <list>      <dbl>
##  1 Di     <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 0.164 
##  2 Di     <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.206 
##  3 Di     <tibble> <tibble> ts        <fn>   tbats       <fn>  <tbats>    0.303 
##  4 Di     <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.347 
##  5 Di     <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     0.348 
##  6 Gua    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     0.0988
##  7 Gua    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 0.155 
##  8 Gua    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.177 
##  9 Gua    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     0.205 
## 10 Gua    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.219 
## 11 Int    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     1.24  
## 12 Int    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 1.47  
## 13 Int    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      1.51  
## 14 Int    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 1.57  
## 15 Int    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     2.06  
## 16 Pic    <tibble> <tibble> ts        <fn>   ets         <fn>  <ets>      0.297 
## 17 Pic    <tibble> <tibble> ts        <fn>   holt.winter <fn>  <HltWntrs> 0.446 
## 18 Pic    <tibble> <tibble> ts        <fn>   stlm        <fn>  <stlm>     1.04  
## 19 Pic    <tibble> <tibble> ts        <fn>   auto.arima  <fn>  <fr_ARIMA> 1.07  
## 20 Pic    <tibble> <tibble> ts        <fn>   tbats       <fn>  <bats>     1.30  
## # i 2 more variables: forecast <list>, key <chr>

Bank_test_key <- Bank_test %>%
                      select(Bancos, key, actual = test, forecast) %>%
                      spread(key, forecast) %>%
                      gather(key, value, -Bancos)

Bank_test_unnest <- Bank_test_key %>%
                    unnest(value) %>%
                    mutate(Valores = rec_revert(Valores, recipe, Bancos))

Con el resultado en formato tbl, comparamos el pronóstico con la información observada

library(plotly)

## Warning: package 'plotly' was built under R version 4.1.3

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

p2 <- Bank_test_unnest %>%
  ggplot(aes(x = Fecha, y = Valores, colour = key)) +
  geom_line(data = Bank_test_unnest %>% 
              filter(key == "actual"),aes(y = Valores),alpha = 0.3,size = 0.8)+
   geom_line(data = Bank_test_unnest %>% 
               filter(key != "actual"),aes(frame = key,col = key)) +
  labs(x = "", y = "Valores",title = "Model Prediction Comparison", frame = "Model") +
   facet_wrap(~ Bancos, scale = "free_y", ncol = 1) +
    tidyquant::theme_tq() +
    theme(legend.position = "none",axis.title.y = element_text(size = 12), axis.text.y = element_text(size = 7))

## Warning in geom_line(data = Bank_test_unnest %>% filter(key != "actual"), :
## Ignoring unknown aesthetics: frame

ggplotly(p2)

## Warning in p$x$data[firstFrame] <- p$x$frames[[1]]$data: número de items para
## para sustituir no es un múltiplo de la longitud del reemplazo

Modelo de selección automatizado

Bank_min_error <- Bank_error %>%
                  select(-fitted) %>% # remove unused
                  group_by(Bancos) %>%
                  filter(error == min(error)) %>%
                  ungroup()

Bank_min_error   #same selection with subarea_best_model

## # A tibble: 4 x 8
##   Bancos test             train    func_name func   model_name model   error
##   <chr>  <list>           <list>   <chr>     <list> <chr>      <list>  <dbl>
## 1 Di     <tibble [7 x 2]> <tibble> ts        <fn>   auto.arima <fn>   0.164 
## 2 Gua    <tibble [7 x 2]> <tibble> ts        <fn>   tbats      <fn>   0.0988
## 3 Int    <tibble [7 x 2]> <tibble> ts        <fn>   stlm       <fn>   1.24  
## 4 Pic    <tibble [7 x 2]> <tibble> ts        <fn>   ets        <fn>   0.297

Desempeño final del modelo

Bank_combine <- Bank_min_error %>%
                mutate(fulldata = map2(train, test, ~ bind_rows(.x, .y))) %>%
                select(Bancos, fulldata, everything(), -train, -test)

Bank_combine_nest <- Bank_combine %>%
  mutate(
    params = map(fulldata, ~ list(x = .x)),
    data = invoke_map(func, params),
    params = map(data, ~ list(x = .x)),
    fitted = invoke_map(model, params)
  ) %>%
  select(-data, -params)

Se puede incluir nivel de confianza al 95%, así como también el límite inferior y superior de la estimación

Bank_combine_forecast <- Bank_combine_nest %>%
  mutate(forecast =
    map(fitted, ~ forecast(.x, h = 6,level=95)) %>%
    map2(fulldata, ~ tibble(
      Fecha = timetk::tk_make_future_timeseries(.y$Fecha, 6),
      Inferior= as.vector(.x$lower),
      Valores = as.vector(.x$mean),
      Superior= as.vector(.x$upper)
    ))
  )

Data.frame los resultados

Bank_combine_forecast_unnest <- Bank_combine_forecast %>% 
                                select(Bancos, actual = fulldata, forecast) %>%
                                gather(key, value, -Bancos) %>%
                                unnest(value) %>%
                                mutate(Valores = rec_revert(Valores, recipe, Bancos))

Método gráfico

d <- Bank_combine_forecast_unnest %>%
  ggplot(aes(x = Fecha, y = Valores, colour = key)) +
    geom_line() +
    labs(x = NULL, y = NULL, colour = NULL) +
    facet_wrap(~ Bancos, scale = "free", ncol = 2) +
    theme_light() +
    tidyquant::scale_colour_tq()

ggplotly(d)

MAE results

mae_test_result <- Bank_error %>%
                   group_by(Bancos) %>% 
                   select(Bancos, error) %>% 
                   arrange(error) %>% 
                   slice(1) %>% 
                   ungroup()

mae_test_result <- mae_test_result %>%
                   summarise(Bancos = "Todos los bancos",
                              error = mean(error)) %>%
                    bind_rows(mae_test_result, .)

mae_test_result

## # A tibble: 5 x 2
##   Bancos            error
##   <chr>             <dbl>
## 1 Di               0.164 
## 2 Gua              0.0988
## 3 Int              1.24  
## 4 Pic              0.297 
## 5 Todos los bancos 0.450

Library modeltime

Referencias

# https://github.com/spsanderson/healthyverse_tsa
# https://business-science.github.io/modeltime/articles/nested-forecasting.html

id	Store	Dept	Date	Weekly_Sales	IsHoliday	Type	Size	Temperature	Fuel_Price	MarkDown1	MarkDown2	MarkDown3	MarkDown4	MarkDown5	CPI	Unemployment
1_13	1	13	2011-02-25	37838.40	FALSE	A	151315	62.90	3.065	NA	NA	NA	NA	NA	213.5356	7.742
1_93	1	93	2010-11-26	81660.77	TRUE	A	151315	64.52	2.735	NA	NA	NA	NA	NA	211.7484	7.838
1_93	1	93	2011-09-02	73692.87	FALSE	A	151315	87.83	3.533	NA	NA	NA	NA	NA	215.7971	7.962
1_93	1	93	2012-03-23	66710.40	FALSE	A	151315	65.93	3.787	6118.56	9.48	4.97	426.72	3657.22	221.2864	7.348
1_13	1	13	2012-07-27	37423.21	FALSE	A	151315	82.66	3.407	7146.90	389.02	1.59	10267.54	4325.19	221.9413	6.908

Paso I: seleccionar variables y cambiar nombres de variables

Con la opción set_names, es posible cambiar el nombre de las variables, de una forma más sencilla.

data_A <- walmart_sales_weekly %>%
          select(id, Date, Weekly_Sales) %>%
          set_names(c("ID", "Fecha", "Valores"))

Paso II: Gráfico

plot_time_series proviene de la libreria timetk

data_A %>%
  group_by(ID) %>%
  plot_time_series(Fecha, Valores, .interactive = T, .facet_ncol = 2, .title = "Series de Tiempo",
                   .smooth = FALSE)

Paso III: Preparación del conjunto de datos

Para este paso, consideramos las siguientes funciones que provienen de la librería modeltime.

extend_timeseries: “Defines how far into the future to extend the time series by each time series group.”
nest_timeseries: “Defines which observations should be split into the .future_data.”
split_nested_timeseries: “A wrapper for timetk::time_series_split() that generates training/testing splits from the .actual_data column.”

nested_data_tbl <- data_A %>%
    
    # 1. Extending: We'll predict 52 weeks into the future.
    extend_timeseries(
        .id_var        = ID,
        .date_var      = Fecha,
        .length_future = 52
    ) %>%
    
    # 2. Nesting: We'll group by ID, and create a future dataset
    #    that forecasts 52 weeks of extended data and
    #    an actual dataset that contains 104 weeks (2-years of data)
    nest_timeseries(
        .id_var        = ID,
        .length_future = 52,
        .length_actual = 52*2
    ) %>%
    
   # 3. Splitting: We'll take the actual data and create splits
   #    for accuracy and confidence interval estimation of 52 weeks (test)
   #    and the rest is training data
    split_nested_timeseries(
        .length_test = 52
    )

nested_data_tbl

## # A tibble: 7 x 4
##   ID    .actual_data       .future_data      .splits        
##   <fct> <list>             <list>            <list>         
## 1 1_1   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 2 1_3   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 3 1_8   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 4 1_13  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 5 1_38  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 6 1_93  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 7 1_95  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>

Paso IV:Crear flujos de trabajo - Tidymodels Workflows

Arima

rec_arima <- recipe(Valores ~ Fecha, extract_nested_train_split(nested_data_tbl)) 

wflw_auto_arima <- workflow() %>%
                   add_model(arima_reg(mode = "regression") %>% 
                             set_engine(engine = "auto_arima")) %>%
                   add_recipe(rec_arima)

Exponential

rec_exp <- recipe(Valores ~ Fecha, extract_nested_train_split(nested_data_tbl)) 

wflw_auto_exp <- workflow() %>%
                 add_model(exp_smoothing(mode = "regression") %>% 
                             set_engine(engine = "ets")) %>%
                   add_recipe(rec_exp)

Prophet

rec_prophet <- recipe(Valores ~ Fecha, extract_nested_train_split(nested_data_tbl)) 

wflw_prophet <- workflow() %>%
    add_model(
      prophet_reg("regression", seasonality_yearly = TRUE) %>% 
        set_engine("prophet")
    ) %>%
    add_recipe(rec_prophet)

XGBoost

step_timeseries_signature proviene de la libreria timetk. step_timeseries_signature creates a a specification of a recipe step that will convert date or date-time data into many features that can aid in machine learning with time-series data.

rec_xgb <-  recipe(Valores ~ ., extract_nested_train_split(nested_data_tbl)) %>%
            step_timeseries_signature(Fecha) %>%
            step_rm(Fecha) %>%
            step_zv(all_predictors()) %>%
            step_dummy(all_nominal_predictors(), one_hot = TRUE)

wflw_xgb <- workflow() %>%
            add_model(boost_tree("regression") %>% set_engine("xgboost")) %>%
            add_recipe(rec_xgb)

Paso V:Crear flujos de trabajo - Versión II

nested_modeltime_tbl <- modeltime_nested_fit(
  # Nested data 
  nested_data = nested_data_tbl,
  
  # Add workflows
  wflw_auto_arima,  
  wflw_auto_exp, 
  wflw_prophet,
  wflw_xgb
)

## 
Fitting models on training data... ====>-------------------------- 14% |
## ET...

Fitting models on training data... =========>--------------------- 29% |
## ET...

Fitting models on training data... =============>----------------- 43% |
## ET...

Fitting models on training data... =================>------------- 57% |
## ET...

Fitting models on training data... =====================>--------- 71% |
## ET...

Fitting models on training data... ==========================>---- 86% |
## ET...

nested_modeltime_tbl

## # Nested Modeltime Table
##

## Trained on: .splits | Model Errors: [0]

## # A tibble: 7 x 5
##   ID    .actual_data       .future_data      .splits         .modeltime_tables 
##   <fct> <list>             <list>            <list>          <list>            
## 1 1_1   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 2 1_3   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 3 1_8   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 4 1_13  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 5 1_38  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 6 1_93  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 7 1_95  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>

Paso VI:Atributos registrados

Extract Nested Test Accuracy

nested_modeltime_tbl %>% 
  extract_nested_test_accuracy() %>%
  table_modeltime_accuracy(.interactive = F)

Accuracy Table
ID	.model_id	.model_desc	.type	mae	mape	mase	smape	rmse	rsq
1_1	1	ARIMA	Test	12316.89	61.60	2.43	46.58	13078.90	NA
1_1	2	ETSMNN	Test	12316.32	61.60	2.43	46.58	13078.35	NA
1_1	3	PROPHET	Test	10071.47	45.88	1.99	59.97	11776.87	0.38
1_1	4	XGBOOST	Test	6232.23	25.28	1.23	24.55	9014.73	0.19
1_3	1	ARIMA	Test	4798.16	30.21	1.86	30.47	8716.55	0.02
1_3	2	ETSMNN	Test	4791.42	23.75	1.86	29.91	9573.41	NA
1_3	3	PROPHET	Test	3539.80	29.87	1.37	25.46	4707.77	0.80
1_3	4	XGBOOST	Test	3071.72	18.69	1.19	20.26	5078.50	0.79
1_8	1	ARIMA	Test	3558.18	9.19	1.51	9.76	4112.95	NA
1_8	2	ETSMAN	Test	4976.42	12.91	2.12	14.01	5567.39	0.12
1_8	3	PROPHET	Test	4282.96	11.15	1.82	11.96	4845.08	0.00
1_8	4	XGBOOST	Test	3608.13	9.39	1.53	9.95	4025.27	0.30
1_13	1	ARIMA	Test	2308.38	5.68	0.85	5.85	3066.11	0.06
1_13	2	ETSANN	Test	2601.22	6.37	0.96	6.62	3369.45	NA
1_13	3	PROPHET	Test	6861.13	17.02	2.53	18.78	7309.61	0.15
1_13	4	XGBOOST	Test	2758.50	6.85	1.02	7.12	3119.72	0.53
1_38	1	ARIMA	Test	7922.19	9.78	0.68	10.05	10647.73	NA
1_38	2	ETSMNN	Test	7924.30	9.80	0.68	10.05	10624.30	NA
1_38	3	PROPHET	Test	26007.21	32.57	2.22	39.60	27938.83	0.08
1_38	4	XGBOOST	Test	7492.22	9.23	0.64	9.58	9437.51	0.33
1_93	1	ARIMA	Test	8292.72	10.42	0.83	11.02	10365.73	0.06
1_93	2	ETSANN	Test	8703.63	10.89	0.88	11.44	10338.66	NA
1_93	3	PROPHET	Test	17165.30	21.37	1.73	24.46	19123.17	0.03
1_93	4	XGBOOST	Test	7140.45	8.98	0.72	9.53	8859.37	0.49
1_95	1	ARIMA	Test	8945.63	7.10	1.08	7.29	11410.65	0.01
1_95	2	ETSMNN	Test	8713.50	6.95	1.05	7.10	11224.78	NA
1_95	3	PROPHET	Test	22836.06	18.30	2.75	20.37	24094.49	0.48
1_95	4	XGBOOST	Test	10794.54	8.49	1.30	8.93	13102.82	0.11

Extract Nested Test Forecast

nested_modeltime_tbl %>% 
  extract_nested_test_forecast() %>%
  group_by(ID) %>%
  plot_modeltime_forecast(
    .facet_ncol  = 2,
    .interactive = FALSE
  )

Extract Nested Error Logs

No se observan errores en los modelos propuestos

nested_modeltime_tbl %>% 
  extract_nested_error_report()

## # A tibble: 0 x 4
## # i 4 variables: ID <fct>, .model_id <int>, .model_desc <chr>,
## #   .error_desc <chr>

Selección del mejor modelo

best_nested_modeltime_tbl <-  nested_modeltime_tbl %>%
                              modeltime_nested_select_best(
                                metric                = "rmse", 
                                minimize              = TRUE, 
                                filter_test_forecasts = TRUE)

Extract Nested Best Model Report

best_nested_modeltime_tbl %>%
  extract_nested_best_model_report()

## # Nested Modeltime Table
##

## Trained on: .splits | Model Errors: [0]

## # A tibble: 7 x 10
##   ID    .model_id .model_desc .type   mae  mape  mase smape   rmse     rsq
##   <fct>     <int> <chr>       <chr> <dbl> <dbl> <dbl> <dbl>  <dbl>   <dbl>
## 1 1_1           4 XGBOOST     Test  6232. 25.3  1.23  24.5   9015.  0.191 
## 2 1_3           3 PROPHET     Test  3540. 29.9  1.37  25.5   4708.  0.796 
## 3 1_8           4 XGBOOST     Test  3608.  9.39 1.53   9.95  4025.  0.304 
## 4 1_13          1 ARIMA       Test  2308.  5.68 0.850  5.85  3066.  0.0649
## 5 1_38          4 XGBOOST     Test  7492.  9.23 0.640  9.58  9438.  0.332 
## 6 1_93          4 XGBOOST     Test  7140.  8.98 0.719  9.53  8859.  0.490 
## 7 1_95          2 ETSMNN      Test  8714.  6.95 1.05   7.10 11225. NA

Extract Nested Best Test Forecasts

best_nested_modeltime_tbl %>%
  extract_nested_test_forecast() %>%
  group_by(ID) %>%
  plot_modeltime_forecast(
    .facet_ncol  = 2,
    .interactive = TRUE
  )

Re ajustando el modelo

nested_modeltime_refit_tbl <- best_nested_modeltime_tbl %>%
                              modeltime_nested_refit(control = control_nested_refit(verbose = TRUE))

## i [1/7] Starting Modeltime Table: ID 1_1...

## v Model 4 Passed XGBOOST.

## v [1/7] Finished Modeltime Table: ID 1_1

## i [2/7] Starting Modeltime Table: ID 1_3...

## v Model 3 Passed PROPHET.

## v [2/7] Finished Modeltime Table: ID 1_3

## i [3/7] Starting Modeltime Table: ID 1_8...

## v Model 4 Passed XGBOOST.

## v [3/7] Finished Modeltime Table: ID 1_8

## i [4/7] Starting Modeltime Table: ID 1_13...

## v Model 1 Passed ARIMA(3,1,0)(0,0,1)[12].

## v [4/7] Finished Modeltime Table: ID 1_13

## i [5/7] Starting Modeltime Table: ID 1_38...

## v Model 4 Passed XGBOOST.

## v [5/7] Finished Modeltime Table: ID 1_38

## i [6/7] Starting Modeltime Table: ID 1_93...

## v Model 4 Passed XGBOOST.

## v [6/7] Finished Modeltime Table: ID 1_93

## i [7/7] Starting Modeltime Table: ID 1_95...

## v Model 2 Passed ETS(M,N,N).

## v [7/7] Finished Modeltime Table: ID 1_95

## Finished in: 7.304214 secs.

nested_modeltime_refit_tbl

## # Nested Modeltime Table
##

## Trained on: .actual_data | Model Errors: [0]

## # A tibble: 7 x 5
##   ID    .actual_data       .future_data      .splits         .modeltime_tables 
##   <fct> <list>             <list>            <list>          <list>            
## 1 1_1   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 2 1_3   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 3 1_8   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 4 1_13  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 5 1_38  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 6 1_93  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 7 1_95  <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>

Extracción de pronostico futuro

nested_modeltime_refit_tbl %>%
    extract_nested_future_forecast() %>%
    group_by(ID) %>%
    plot_modeltime_forecast(
      .interactive = FALSE,
      .facet_ncol  = 2
    )

Case II: One time series applied to multiple models

For this step, first at all we have to work with this libraries.

library(pacman)
p_load("modeltime","modeltime.resample","plotly","tidymodels","timetk")

date	value
2010-01-01	999.9930
2010-02-01	1000.3700
2010-03-01	1001.0245
2010-04-01	1000.0097
2010-05-01	1001.3507
2010-06-01	999.4062
2010-07-01	999.3950
2010-08-01	999.5441

Split Data into Training & Test Sets

Split into training (80%) and test (20%). As Rob J Hyndman and George Athanasopoulos mentioned: “The size of the test is typically about 20% of the total sample, although this value depends on how long the sample is and how far ahead you want to forecast”.

time_series_split(): A convenience function to return a single time series split containing a training/testing sample. This function comes from the library timetk.

ff: Name of the data frame that will be analized.

assess: The length of the test set.

splits <- time_series_split(
  ff, 
  assess = "12 months",  # Test set = last 12 months
  cumulative = TRUE      # Training set = all data before test
)

## Using date_var: date

The tk_time_series_cv_plan(): function provides a simple interface to prepare a time series resample specification (rset) of either rolling_origin or time_series_cv class.

# Visualize split (optional)
splits %>%
  tk_time_series_cv_plan() %>%
  plot_time_series_cv_plan(date, value,
                           .title = "Time Series Cross Validation Plan")

Define & Train Multiple Models

arima_reg(): Is a way to generate a specification of an ARIMA model. exp_smoothing(): Is a way to generate a specification of an Exponential Smoothing model. prophet_reg(): Is a way to generate a specification of a PROPHET model.

# Model 1: Auto ARIMA
model_arima <-  arima_reg() %>%
                set_engine("auto_arima") %>%
                fit(value ~ date, training(splits))

# Model 2: ETS
model_ets <- exp_smoothing() %>%
             set_engine("ets") %>%
             fit(value ~ date, training(splits))

# Model 3: Prophet
model_prophet <- prophet_reg() %>%
                 set_engine("prophet") %>%
                 fit(value ~ date, training(splits))

Time Series Cross-Validation

Create a modeltime table

Creates a table of models

model_table <- modeltime_table(model_arima,
                               model_ets,
                               model_prophet)

Cross-validate on training data

Create rsample cross validation sets for time series. This function produces a sampling plan starting with the most recent time series observations, rolling backwards.

initial=60: Use the first 60 observations (e.g., 5 years if monthly data) to train the model.Ensures the model learns from a sufficiently long history.

assess=12: Evaluate the model on the next 12 observations (e.g., 1 year if monthly data). Simulates real-world forecasting where you predict future periods.Asses must be the same as set before.

resamples <- time_series_cv(
  training(splits),
  initial = 60,   # 5 years
  assess = 12,    # 1 year
  skip = 6        # 6-month increments
)

## Using date_var: date

Evaluate models

Resampled predictions are commonly used for: Analyzing accuracy and stability of models

resample_results <- model_table %>%
                    modeltime_fit_resamples(resamples,
                                            control = control_resamples(verbose = FALSE))

Rank models by RMSE

modeltime_resample_accuracy(): This is a wrapper for yardstick that simplifies time series regression accuracy metric calculations.

modeltime::table_modeltime_accuracy(): To format the results for reporting.

resample_results %>%
  modeltime_resample_accuracy() %>%
  table_modeltime_accuracy()

Select the Best Model & Forecast

best_model <- model_ets

Forecast on test data

Step I: modeltime_calibrate Calibrates the model on the test set to generate residuals and confidence intervals. Takes your trained best_model. Uses the testing(splits) data (held-out data from initial_time_split()) to:

*Generate predictions

*Compare predictions to actual values (actual_data).

*Compute residuals (errors) and confidence intervals.

Step II: modeltime_forecast(new_data = testing(splits), actual_data = ff)

Generates forecasts for the test set (new_data) and combines them with actual data (actual_data) for visualization/analysis.

forecast_test <- best_model %>%
                 modeltime_calibrate(testing(splits)) %>%
                 modeltime_forecast(new_data = testing(splits),
                                    actual_data = ff)

## Converting to Modeltime Table.

Visualize forecast

forecast_test %>%
  plot_modeltime_forecast(.interactive = TRUE)

Final Forecast (Future Data)

future_df <- ff %>%
             future_frame(.length_out = 12, .date_var = date)

future_forecast <- best_model %>%
                   modeltime_calibrate(ff) %>%  # Refit on full dataset
                   modeltime_forecast(new_data = future_df,
                                      actual_data = ff)

## Converting to Modeltime Table.

future_forecast %>%
  plot_modeltime_forecast(
    .interactive = FALSE,  # Set TRUE for Plotly (zoomable)
    .conf_interval_show = TRUE,  # Default
    .conf_interval_alpha = 0.2   # Transparency
  )

Introducción libreria TSstudio

date	value
2010-01-01	999.4895
2010-02-01	999.8698
2010-03-01	1001.7087
2010-04-01	1000.2705
2010-05-01	1000.3793

Ploting time series object

am <- ts_plot(df, 
        title = "Exportaciones cacao",
        Ytitle = "En USD", color = "#0C769E")

am

Seasonality analysis

ap <- ts_seasonal(df, type = "all")

ap

Training forecasting models

First at all, we have to transform data.frame as ts object.

ts_data <- ts(df$value, start = c(2010, 1), frequency = 12)

In second place, we Setting training and testing partitions

df_s  <- ts_split(ts.obj = ts_data, sample.out = 12)
train <- df_s$train
test  <- df_s$test

Forecasting with auto.arima

md <- auto.arima(train)
fc <- forecast(md, h = 12)

Plotting actual vs. fitted and forecasted

test_forecast(actual = ts_data, forecast.obj = fc, test = test)

Plotting the forecast

plot_forecast(fc,
               title = "Gráfico de pronóstico", color = "#0D3512")

Apply multiple models

methods <- list(ets1 = list(method = "ets",
                            method_arg = list(opt.crit = "lik"),
                            notes = "ETS model with opt.crit = lik"),
                ets2 = list(method = "ets",
                            method_arg = list(opt.crit = "amse"),
                            notes = "ETS model with opt.crit = amse"),
                arima1 = list(method = "arima",
                              method_arg = list(order = c(1,1,1)),
                              notes = "ARIMA(1,1,1)"),
                   hw = list(method = "HoltWinters",
                          method_arg = NULL,
                          notes = "HoltWinters Model"),
                tslm = list(method = "tslm",
                            method_arg = list(formula = input ~ trend + season),
                            notes = "tslm model with trend and seasonal components"))

Training the models with backtesting

train_model: Method for train test and compare multiple time series models using either one partition (i.e., sample out) or multipe partitions (backtesting)

input: A univariate time series object (ts class).

md <- train_model(input = ts_data,
                  methods = methods,
                  train_method = list(partitions = 6, 
                                      sample.out = 12, 
                                      space = 3),
                  horizon = 12,
                  error = "MAPE")

## # A tibble: 5 x 7
##   model_id model   notes avg_mape avg_rmse `avg_coverage_80%` `avg_coverage_95%`
##   <chr>    <chr>   <chr>    <dbl>    <dbl>              <dbl>              <dbl>
## 1 tslm     tslm    tslm~ 0.000629    0.810              0.875              0.986
## 2 ets2     ets     ETS ~ 0.000630    0.794              0.861              0.958
## 3 ets1     ets     ETS ~ 0.000638    0.795              0.931              0.958
## 4 arima1   arima   ARIM~ 0.000693    0.920              0.847              0.958
## 5 hw       HoltWi~ Holt~ 0.000705    0.892              0.931              0.986

plot_model(md)

Time Series Manual

Autor: René Díaz Flores

Introducción

Pronóstico simúltaneo de tres bases de datos

Gráficos simultáneos

Creación de fechas:

Ejemplo I:

Ejemplo II:

Ejemplo III:

Ejemplo IV:

Ejemplo VI:

Ejercicio Completo

Introducción libreria sweep

Pronóstico del modelo

Tidy the forecast object

Gráfico

Aplicación de Varios modelos.

Inspección del Modelo

SW glance

Pronóstico del Modelo

Tidyng the forcast

Gráficamos el Modelo

Un modelo para varios series de tiempo

Paso 1: Nest data.frame

Paso 2: Declaramos a la base como time series (ts)

Paso 3: Modelamos la Serie de Tiempo

sw_tidy

sw_glance

sw_tidy_decomp

Pronóstico del Modelo

Pronóstico del Modelo: Tidy

Varios Modelos - Varias Series de Tiempo

Validación Cruzada

Procesamiento de la Información

Procesamiento de la Base de datos usando la libreria recipe

Modelamiento y Pronóstico

Función

Preparación del Modelo de Serie de Tiempo

Combinación de Modelos

Ejecución del Modelo

Cálculo de los errores

Pasar a de lista a data.frame

Modelo de selección automatizado

Desempeño final del modelo

Data.frame los resultados

Método gráfico

MAE results

Library modeltime

Referencias

Paso I: seleccionar variables y cambiar nombres de variables

Paso II: Gráfico

Paso III: Preparación del conjunto de datos

Paso IV:Crear flujos de trabajo - Tidymodels Workflows

Arima

Exponential

Prophet

XGBoost

Paso V:Crear flujos de trabajo - Versión II

Paso VI:Atributos registrados

Extract Nested Test Accuracy

Extract Nested Test Forecast

Extract Nested Error Logs

Selección del mejor modelo

Extract Nested Best Model Report

Extract Nested Best Test Forecasts

Re ajustando el modelo

Extracción de pronostico futuro

Case II: One time series applied to multiple models

Split Data into Training & Test Sets

Define & Train Multiple Models

Time Series Cross-Validation

Create a modeltime table

Cross-validate on training data

Evaluate models

Rank models by RMSE

Select the Best Model & Forecast

Forecast on test data

Visualize forecast

Final Forecast (Future Data)

Introducción libreria TSstudio