library(pacman)
p_load("lubridate","tidyverse", "data.table", "forecast", "scales", "kableExtra","recipes","sweep", "tidyquant", "timetk", "magrittr", "yardstick", "modeltime", "tidymodels","TSstudio")set.seed(345)
n <- 200
datos <- tibble (Paris = cumsum(sample(c(500, 3500), n, TRUE)),
Madrid = cumsum(sample(c(700, 4500), n, TRUE)),
Rio = cumsum(sample(c(900, 6500), n, TRUE)))#Pronóstico
Base_A <- ts(datos[1:3],start=c(2011),frequency=12)
forecasting_function_ARIMA <- function(Z, hrz = 16) {
timeseries <- msts(Z, start = 2011, seasonal.periods = 12)
forecast <- auto.arima(timeseries)
}
Forecasting_list_ARIMA <- lapply(X = Base_A, forecasting_function_ARIMA)
Arima_Model_Forecast <- lapply(Forecasting_list_ARIMA, forecast,h=6,level=95)
Forecast <- lapply(Forecasting_list_ARIMA, forecast,h=6,level=95)%>%data.frame()Del ejercicio anterior, si queremos modificar la estrutura de los encabezados de la matriz de proyecciones, aplicamos la siguiente rutina:
Forecast <- lapply(Forecasting_list_ARIMA, forecast,h=6,level=95)%>%data.frame()%>%
rownames_to_column(var = "FECHA")%>% as_tibble()%>%
rename_with(stringr::str_replace, pattern = ".Point.Forecast", replacement = " Pronóstico",
matches(".Point.Forecast"))%>%
rename_with(stringr::str_replace, pattern = ".Lo.95", replacement = " Límite Inf",
matches(".Lo.95"))%>%
rename_with(stringr::str_replace, pattern = ".Hi.95", replacement = " Límite Sup",
matches(".Hi.95"))%>%
rename_at(1:4,~ str_to_title(.))y <- datos
colnames <- dimnames(y)[[2]]
TEMAD <- theme(axis.line = element_line(size=0.5, colour = "black"),
plot.title = element_text(hjust=0.5, size = 12, face = "bold"),
legend.text = element_text(size = 20),
plot.caption = element_text(size = 8),
legend.position = 'none',
legend.title = element_blank(),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
text = element_text(size = 11),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## i Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
GRA <- lapply(seq_along(datos),function(i) {
autoplot(fit <- forecast(auto.arima(ts(datos[[i]],start=c(2011,1),
end = c(2027,8),frequency = 12)),6,
level=95),
ts.colour = "dodgerblue4",predict.colour = "#999999",
predict.linetype = "dashed")+
labs(x = 'Años', y = "Número de atenciones") +
ggtitle(colnames[i])+
theme(plot.title = element_text(hjust = 0.5))+
scale_y_continuous(labels=comma) + TEMAD+
coord_cartesian(xlim = c(2011, 2027.25))
}
)
GRA <- set_names(GRA, colnames)
GRA## $Paris
##
## $Madrid
##
## $Rio
n <- 6
set <- tibble(Vinces = cumsum(sample(c(500, 3500), n, TRUE)))%>%
mutate(FECHA = seq(as.Date("2022-8-1"), as.Date("2023-1-1"), by = "months"))
set## # A tibble: 6 x 2
## Vinces FECHA
## <dbl> <date>
## 1 3500 2022-08-01
## 2 7000 2022-09-01
## 3 7500 2022-10-01
## 4 8000 2022-11-01
## 5 11500 2022-12-01
## 6 15000 2023-01-01
n <- 20
set_A <- tibble(Babahoyo = cumsum(sample(c(800, 4500), n, TRUE)))%>%
mutate(FECHA= seq(as.Date("2018-1-1"),by = "months",length.out = 20))
head(set_A,5)## # A tibble: 5 x 2
## Babahoyo FECHA
## <dbl> <date>
## 1 800 2018-01-01
## 2 1600 2018-02-01
## 3 6100 2018-03-01
## 4 10600 2018-04-01
## 5 11400 2018-05-01
Crear variables Fecha que comience con el día final de cada mes
n <- 20
set_A <- tibble(Napo = cumsum(sample(c(800, 4500), n, TRUE)))%>%
mutate(FECHA= seq(as.Date("2018-1-1"),by = "months",length.out = 20)-1)
head(set_A,5)## # A tibble: 5 x 2
## Napo FECHA
## <dbl> <date>
## 1 4500 2017-12-31
## 2 9000 2018-01-31
## 3 9800 2018-02-28
## 4 10600 2018-03-31
## 5 15100 2018-04-30
cambiar Formato fechas
## # A tibble: 3 x 2
## Napo FECHA
## <dbl> <date>
## 1 800 2017-12-31
## 2 1600 2018-01-31
## 3 6100 2018-02-28
Para formato de fechas, se recomienda el siguiente link: https://www.geeksforgeeks.org/how-to-use-date-formats-in-r/
%b: Abreviación de mes %y: Abreviación de año
## # A tibble: 3 x 2
## Napo FECHA
## <dbl> <chr>
## 1 800 dic. - 17
## 2 1600 ene. - 18
## 3 6100 feb. - 18
| Jamaica | Canada | Honduras |
|---|---|---|
| -0.3465840 | 6.303960 | 11.321528 |
| 0.6276359 | 3.953276 | 12.066658 |
| 0.6437831 | 4.370569 | 11.091397 |
| -0.3125563 | 4.856155 | 12.298446 |
| 1.0578810 | 5.592618 | 11.263103 |
| 0.3202071 | 4.931182 | 13.604049 |
| -0.0607272 | 4.375798 | 12.169801 |
| 1.4481094 | 5.346821 | 13.492133 |
| 0.5840688 | 6.112750 | 11.751682 |
| -0.5442221 | 4.188722 | 13.107554 |
| 0.9309310 | 4.616367 | 12.809937 |
| -0.2657284 | 6.157338 | 11.740550 |
| 0.5602174 | 3.981209 | 11.738767 |
| -0.1732012 | 5.980474 | 12.988104 |
| -0.1593099 | 5.141921 | 12.105388 |
| -0.2498411 | 4.057758 | 12.214577 |
| 1.1909899 | 4.062254 | 11.143715 |
| -1.6425621 | 5.903888 | 11.533752 |
| -1.2030146 | 5.369082 | 13.552865 |
| -0.5381974 | 5.312796 | 12.183695 |
| -0.5510144 | 6.083338 | 12.100343 |
| -0.1480458 | 5.207964 | 14.040965 |
| -0.5208616 | 4.447765 | 13.231016 |
| -1.5645956 | 5.358313 | 13.398592 |
| 0.8559413 | 6.631799 | 9.574225 |
| -0.6693584 | 5.021633 | 11.259087 |
| -1.1820668 | 5.756193 | 12.744493 |
| 0.8058029 | 3.638682 | 11.141806 |
| -1.2877233 | 4.391235 | 11.619835 |
| -0.2914974 | 4.788253 | 12.831156 |
Creamos una función, que identifique valores atípicos y lo transforme por NA. Es importante recalcar que está función es disponible si el conjunto de datos es data.frame, no funciona para tibble.
atipicos = function(df) {
for (i in 1:ncol(df)) {
dat = df[which(df[,i] > quantile(df[,i], .1) & df[,i] < quantile(df[,i], .9)),i]
mean = mean(dat)
sd = sd(dat)
df[which( abs((df[,i]) - mean) > (sd * 3)), i] = NA
}
return(df)
}Transformamos a lista y por data.frame eliminamos NA. En este caso la base de datos denominado Honduras tendrá una observación menos.
AA <- atipicos(country) %>%
pivot_longer(cols = Jamaica:Honduras,
names_to = c("Países"),
values_to = "Saldos")
AB <- split(AA, AA$Países)
AB <- map(AB, ~filter(.x,if_all(everything(),~!is.na(.))))Para correr auto.arima por listas, se aplica la siguiente rutina
Aplicación de la libreria sweep
## New names:
## * `` -> `...1`
## * `` -> `...225`
## * `` -> `...226`
## # A tibble: 5 x 2
## FECHA Saldos
## <date> <dbl>
## 1 2014-12-01 103787.
## 2 2015-01-01 101090.
## 3 2015-02-01 107776.
## 4 2015-03-01 115545.
## 5 2015-04-01 119154.
El primer paso consiste en tener la base de datos junto con una variable de tipo Date
Retorna los parámetros del modelo
## # A tibble: 4 x 2
## term estimate
## <chr> <dbl>
## 1 alpha 1.00
## 2 beta 0.000100
## 3 l 107325.
## 4 b 1500.
Retorna la calidad del modelo
## # A tibble: 1 x 12
## model.desc sigma logLik AIC BIC ME RMSE MAE MPE MAPE MASE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(A,A,N) 3802. -745. 1501. 1512. -116. 3694. 2928. -0.194 2.13 0.192
## # i 1 more variable: ACF1 <dbl>
w_augment() retorna los valores reales, ajustados y residuos del modelo
## # A tibble: 5 x 4
## index .actual .fitted .resid
## <yearmon> <dbl> <dbl> <dbl>
## 1 ene. 2014 103787. 108825. -5038.
## 2 feb. 2014 101090. 105288. -4198.
## 3 mar. 2014 107776. 102589. 5186.
## 4 abr. 2014 115545. 109275. 6270.
## 5 may. 2014 119154. 117045. 2109.
sw_sweep(fcast_ets, timetk_idx = TRUE) %>%
ggplot(aes(x = index, y = Saldos, color = key)) +
geom_ribbon(aes(ymin = lo.95, ymax = hi.95),
fill = "#D5DBFF", color = NA, size = 0) +
#geom_ribbon(aes(ymin = lo.80, ymax = hi.80, fill = key),
# fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
geom_line(size = 1) +
labs(title = "Pronóstico- Obligaciones con el Público", x = "", y = "USD",
subtitle = "Irregular Time Index") +
scale_y_continuous(labels = scales::dollar) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
scale_color_tq() +
scale_fill_tq() +
theme_tq() ## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Tomando el mismo modelo del punto anterior
Creamos una lista por cada uno de los modelos a emplear
models_list <- list(
auto.arima = list(
y = BA_ts_II
),
ets = list(
y = BA_ts_II,
damped = TRUE
),
bats = list(
y = BA_ts_II
)
)Convertimos a datos la lista previamente creada
## Warning: There was 1 warning in `mutate()`.
## i In argument: `fit = invoke_map(f, params)`.
## Caused by warning:
## ! `invoke_map()` was deprecated in purrr 1.0.0.
## i Please use map() + exec() instead.
AA <- models_tbl_fit %>%
mutate(tidy = map(fit, sw_tidy)) %>%
unnest(tidy) %>%
spread(key = f, value = estimate)## Warning: The `.drop` argument of `unnest()` is deprecated as of tidyr 1.0.0.
## i All list-columns are now preserved.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
models_tbl_fcast <- models_tbl_fit %>%
mutate(fcast = map(fit, forecast, h = 6, level=95))
models_tbl_fcast## # A tibble: 3 x 4
## f params fit fcast
## <chr> <list> <list> <list>
## 1 auto.arima <named list [1]> <fr_ARIMA> <forecast>
## 2 ets <named list [2]> <ets> <forecast>
## 3 bats <named list [1]> <bats> <forecast>
models_tbl_fcast_tidy <- models_tbl_fcast %>%
mutate(sweep = map(fcast, sw_sweep, fitted = FALSE, timetk_idx = TRUE, rename_index = "date"))
models_tbl_fcast_tidy## # A tibble: 3 x 5
## f params fit fcast sweep
## <chr> <list> <list> <list> <list>
## 1 auto.arima <named list [1]> <fr_ARIMA> <forecast> <tibble [78 x 5]>
## 2 ets <named list [2]> <ets> <forecast> <tibble [78 x 5]>
## 3 bats <named list [1]> <bats> <forecast> <tibble [78 x 5]>
Obtenemos el resultado por data.frame
models_tbl_fcast_tidy %>%
unnest(sweep) %>%
ggplot(aes(x = date, y = Saldos, color = key, group = f)) +
geom_ribbon(aes(ymin = lo.95, ymax = hi.95),
fill = "#D5DBFF", color = NA, size = 0) +
# geom_ribbon(aes(ymin = lo.80, ymax = hi.80, fill = key),
# fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
geom_line(size = 1) +
facet_wrap(~f, nrow = 3) +
labs(title = "Pronóstico Saldos",
subtitle = "Pronóstico aplicando varios modelos: ARIMA, BATS, ETS",
x = "", y = "Price") +
scale_y_continuous(labels = scales::dollar) +
#scale_x_date(date_breaks = "5 years", date_labels = "%Y") +
theme_tq() +
scale_color_tq()Previo al ejercicio de pronóstico, aplicamos un pequeño ajuste a la base de datos.
files <- list.files(pattern=".xlsx")
files <- files[c(1:4)]
#print(files)
GH <- list()
for (i in seq_along(files)) {
GH[[i]] <- files[i] %>%
map_df(
~read_xlsx(path=files[i], sheet= "BAL",skip = 4))
}
GH <- set_names(GH, files)
GHA <- map(GH, ~select(.x,CÓDIGO, CUENTA, `43131`:`44804`)) %>%
map(~dplyr::filter(.x,CUENTA == "CARTERA DE CRÉDITOS")) %>%
map(~pivot_longer(.x,cols = `43131`:`44804`,
names_to = c("Años"),
values_to = "Créditos"))%>%
map(~mutate(.x,Fecha= seq(as.Date("2018-1-1"),by = "months",length.out = nrow(.))))%>%
map(~select(.x,Fecha,Créditos))
# Ponemos en cada data.frame, el nombre de cada entidad
Nammes <- c("Di", "Gua", "Int", "Pic")
GHA <- map2(GHA, Nammes, ~ .x %>%
mutate(Nammes = .y, .before = 1))
Bank <- bind_rows(GHA)
#save(Bank, file = "Bank.RData")Para el presente ejercicio aplicamos un modelo Holt-winters.
Esta opción, nos permite ver los parámetros del modelo. En la opción key se debe anotar la variable categórica.
## # A tibble: 4 x 16
## # Groups: Nammes [4]
## Nammes data data.ts fit.HW model.desc sigma logLik AIC BIC
## <chr> <list> <list> <list> <chr> <dbl> <lgl> <lgl> <lgl>
## 1 Di <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 3.14e5 NA NA NA
## 2 Gua <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 3.68e5 NA NA NA
## 3 Int <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 2.24e5 NA NA NA
## 4 Pic <tibble> <ts [56 x 1]> <HltWntrs> HoltWinters 9.37e5 NA NA NA
## # i 7 more variables: ME <dbl>, RMSE <dbl>, MAE <dbl>, MPE <dbl>, MAPE <dbl>,
## # MASE <dbl>, ACF1 <dbl>
Bank_cat2_fcast <- Bank_cat2_fcast %>%
mutate(sweep = map(fcast.HW, sw_sweep, fitted = FALSE,
timetk_idx = TRUE)) %>%
unnest(sweep)Bank_cat2_fcast %>%
ggplot(aes(x = index, y = Créditos, color = key, group = Nammes)) +
geom_ribbon(aes(ymin = lo.95, ymax = hi.95),
fill = "#D5DBFF", color = NA, size = 0) +
geom_line() +
labs(title = "Pronóstico Cartera de Crédito",
subtitle = "Modelo Holt-Winters",
x = "", y = "Units") +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
scale_color_tq() +
scale_fill_tq() +
facet_wrap(~ Nammes, scales = "free_y", ncol = 3) +
theme_tq() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))## # A tibble: 248 x 10
## # Groups: Nammes [4]
## Nammes data data.ts fit.HW fcast.HW index key Créditos
## <chr> <list> <list> <list> <list> <date> <chr> <dbl>
## 1 Di <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-01-01 actu~ 1335810.
## 2 Di <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-02-01 actu~ 1336984.
## 3 Di <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-03-01 actu~ 1400660.
## 4 Di <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-04-01 actu~ 1428374.
## 5 Di <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-05-01 actu~ 1458558.
## 6 Di <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-06-01 actu~ 1488693.
## 7 Di <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-07-01 actu~ 1524040.
## 8 Di <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-08-01 actu~ 1633440.
## 9 Di <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-09-01 actu~ 1696293.
## 10 Di <tibble> <ts [56 x 1]> <HltWntrs> <forecast> 2018-10-01 actu~ 1717122.
## # i 238 more rows
## # i 2 more variables: lo.95 <dbl>, hi.95 <dbl>
load("D:/Documentos/Estadisticos/R/R_studio/Time_Series/Bank.RData")
#glimpse(Bank)
Bank %>%
ggplot(mapping = aes(x =Fecha, y= Créditos))+
geom_line()+
facet_wrap(~Nammes, scale ="free", ncol =2)+
tidyquant::theme_tq()# get the min-max of the time index for each sample
test_end <- max(Bank$Fecha)
test_start <- dmy("01-02-2022")
train_end <- test_start %m-% months(1)
train_start <- min(Bank$Fecha)
interval_train <- interval(start = train_start, end = train_end)
interval_intest <- interval(start = test_start, end = test_end)Con este gráfico, se identifica dentro de la base, aquella clasificada como train y aquella como test
Bank %>%
mutate(sample = case_when(
Fecha %within% interval_train ~ "train",
Fecha %within% interval_intest ~ "test"
)) %>%
drop_na() %>%
mutate(sample = factor(sample, levels = c("train", "test"))) %>%
ggplot(aes(x = Fecha, y = Créditos, colour = sample)) +
geom_line() +
labs(x = NULL, y = NULL, colour = NULL) +
facet_wrap(~ Nammes, scale = "free", ncol = 2) +
tidyquant::theme_tq() +
tidyquant::scale_colour_tq()Con el objetivo de prevenir que valores atípicos afecten al modelo, se rescala la información aplicando para este fin el paquete recipes.
recipe <- recipe(~., filter(Bank_I, Fecha %within% interval_train)) %>%
step_sqrt(all_numeric()) %>%
step_center(all_numeric()) %>%
step_scale(all_numeric()) %>%
prep()
Bank_I <- bake(recipe, Bank_I)Luego, cambiamos la data a la forma original.
# revert back function
rec_revert <- function(vector, recipe, varname) {
# store recipe values
rec_center <- recipe$steps[[2]]$means[varname]
rec_scale <- recipe$steps[[3]]$sds[varname]
# convert back based on the recipe
results <- (vector * rec_scale + rec_center) ^ 2
# add additional adjustment if necessary
results <- round(results)
# return the results
results
}# adjust by sample
Bank_III <- Bank_II %>%
mutate(sample = case_when(
Fecha %within% interval_train ~ "train",
Fecha %within% interval_intest ~ "test"
)) %>%
drop_na()
# nest the data train
Bank_IV <- Bank_III %>%
group_by(Bancos, sample) %>%
nest(.key = "data") %>%
spread(sample, data)Corremos la lista con cinco modelos: auto.arima, ets, stlm. tbats y holt.winters
models <- list(
auto.arima = function(x) auto.arima(x),
ets = function(x) ets(x),
stlm = function(x) stlm(x),
tbats = function(x) tbats(x, use.box.cox = FALSE),
holt.winter = function (x) HoltWinters(x, seasonal = "additive")
)Es posible convertirlo en formato agrupado como el anterior ejemplo.
## Joining with `by = join_by(Bancos)`
Bank_VII <- Bank_VI %>%
mutate(
params = map(train, ~ list(x = .x)),
data = invoke_map(func, params),
params = map(data, ~ list(x = .x)),
fitted = invoke_map(model, params)
) %>%
select(-data, -params)
Bank_VII## # A tibble: 20 x 8
## # Groups: Bancos [4]
## Bancos test train func_name func model_name model fitted
## <chr> <list> <list> <chr> <list> <chr> <lis> <list>
## 1 Di <tibble [7 x 2]> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA>
## 2 Di <tibble [7 x 2]> <tibble> ts <fn> ets <fn> <ets>
## 3 Di <tibble [7 x 2]> <tibble> ts <fn> stlm <fn> <stlm>
## 4 Di <tibble [7 x 2]> <tibble> ts <fn> tbats <fn> <tbats>
## 5 Di <tibble [7 x 2]> <tibble> ts <fn> holt.wint~ <fn> <HltWntrs>
## 6 Gua <tibble [7 x 2]> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA>
## 7 Gua <tibble [7 x 2]> <tibble> ts <fn> ets <fn> <ets>
## 8 Gua <tibble [7 x 2]> <tibble> ts <fn> stlm <fn> <stlm>
## 9 Gua <tibble [7 x 2]> <tibble> ts <fn> tbats <fn> <bats>
## 10 Gua <tibble [7 x 2]> <tibble> ts <fn> holt.wint~ <fn> <HltWntrs>
## 11 Int <tibble [7 x 2]> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA>
## 12 Int <tibble [7 x 2]> <tibble> ts <fn> ets <fn> <ets>
## 13 Int <tibble [7 x 2]> <tibble> ts <fn> stlm <fn> <stlm>
## 14 Int <tibble [7 x 2]> <tibble> ts <fn> tbats <fn> <bats>
## 15 Int <tibble [7 x 2]> <tibble> ts <fn> holt.wint~ <fn> <HltWntrs>
## 16 Pic <tibble [7 x 2]> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA>
## 17 Pic <tibble [7 x 2]> <tibble> ts <fn> ets <fn> <ets>
## 18 Pic <tibble [7 x 2]> <tibble> ts <fn> stlm <fn> <stlm>
## 19 Pic <tibble [7 x 2]> <tibble> ts <fn> tbats <fn> <bats>
## 20 Pic <tibble [7 x 2]> <tibble> ts <fn> holt.wint~ <fn> <HltWntrs>
Activar librería yardstick
Bank_error <- Bank_VII %>%
mutate(error =
map(fitted, ~ forecast(.x, h = 7)) %>%
map2_dbl(test, ~ mae_vec(truth = .y$Valores, estimate = .x$mean))
) %>%
arrange(Bancos, error) #arrange based on smallest error for each area
Bank_error## # A tibble: 20 x 9
## # Groups: Bancos [4]
## Bancos test train func_name func model_name model fitted error
## <chr> <list> <list> <chr> <list> <chr> <lis> <list> <dbl>
## 1 Di <tibble> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA> 0.164
## 2 Di <tibble> <tibble> ts <fn> holt.winter <fn> <HltWntrs> 0.206
## 3 Di <tibble> <tibble> ts <fn> tbats <fn> <tbats> 0.303
## 4 Di <tibble> <tibble> ts <fn> ets <fn> <ets> 0.347
## 5 Di <tibble> <tibble> ts <fn> stlm <fn> <stlm> 0.348
## 6 Gua <tibble> <tibble> ts <fn> tbats <fn> <bats> 0.0988
## 7 Gua <tibble> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA> 0.155
## 8 Gua <tibble> <tibble> ts <fn> ets <fn> <ets> 0.177
## 9 Gua <tibble> <tibble> ts <fn> stlm <fn> <stlm> 0.205
## 10 Gua <tibble> <tibble> ts <fn> holt.winter <fn> <HltWntrs> 0.219
## 11 Int <tibble> <tibble> ts <fn> stlm <fn> <stlm> 1.24
## 12 Int <tibble> <tibble> ts <fn> holt.winter <fn> <HltWntrs> 1.47
## 13 Int <tibble> <tibble> ts <fn> ets <fn> <ets> 1.51
## 14 Int <tibble> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA> 1.57
## 15 Int <tibble> <tibble> ts <fn> tbats <fn> <bats> 2.06
## 16 Pic <tibble> <tibble> ts <fn> ets <fn> <ets> 0.297
## 17 Pic <tibble> <tibble> ts <fn> holt.winter <fn> <HltWntrs> 0.446
## 18 Pic <tibble> <tibble> ts <fn> stlm <fn> <stlm> 1.04
## 19 Pic <tibble> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA> 1.07
## 20 Pic <tibble> <tibble> ts <fn> tbats <fn> <bats> 1.30
Escogemos el modelo que tenga el menor error
Bank_best_model <- Bank_error %>%
group_by(Bancos) %>%
arrange(error) %>%
slice(1) %>% #choose the smallest error & best model for each area directly
ungroup() %>%
select(Bancos, ends_with("_name"),error)
Bank_best_model <- Bank_best_model %>%
select(-error) %>%
left_join(Bank_error)%>%
select(-error)## Joining with `by = join_by(Bancos, func_name, model_name)`
Bank_test <- Bank_error %>%
mutate(
forecast =
map(fitted, ~ forecast(.x, h = 7)) %>%
map2(test, ~ tibble(
Fecha = .y$Fecha,
Valores = as.vector(.x$mean)
)),
key = paste(func_name, model_name, sep = "-")
)
Bank_test## # A tibble: 20 x 11
## # Groups: Bancos [4]
## Bancos test train func_name func model_name model fitted error
## <chr> <list> <list> <chr> <list> <chr> <lis> <list> <dbl>
## 1 Di <tibble> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA> 0.164
## 2 Di <tibble> <tibble> ts <fn> holt.winter <fn> <HltWntrs> 0.206
## 3 Di <tibble> <tibble> ts <fn> tbats <fn> <tbats> 0.303
## 4 Di <tibble> <tibble> ts <fn> ets <fn> <ets> 0.347
## 5 Di <tibble> <tibble> ts <fn> stlm <fn> <stlm> 0.348
## 6 Gua <tibble> <tibble> ts <fn> tbats <fn> <bats> 0.0988
## 7 Gua <tibble> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA> 0.155
## 8 Gua <tibble> <tibble> ts <fn> ets <fn> <ets> 0.177
## 9 Gua <tibble> <tibble> ts <fn> stlm <fn> <stlm> 0.205
## 10 Gua <tibble> <tibble> ts <fn> holt.winter <fn> <HltWntrs> 0.219
## 11 Int <tibble> <tibble> ts <fn> stlm <fn> <stlm> 1.24
## 12 Int <tibble> <tibble> ts <fn> holt.winter <fn> <HltWntrs> 1.47
## 13 Int <tibble> <tibble> ts <fn> ets <fn> <ets> 1.51
## 14 Int <tibble> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA> 1.57
## 15 Int <tibble> <tibble> ts <fn> tbats <fn> <bats> 2.06
## 16 Pic <tibble> <tibble> ts <fn> ets <fn> <ets> 0.297
## 17 Pic <tibble> <tibble> ts <fn> holt.winter <fn> <HltWntrs> 0.446
## 18 Pic <tibble> <tibble> ts <fn> stlm <fn> <stlm> 1.04
## 19 Pic <tibble> <tibble> ts <fn> auto.arima <fn> <fr_ARIMA> 1.07
## 20 Pic <tibble> <tibble> ts <fn> tbats <fn> <bats> 1.30
## # i 2 more variables: forecast <list>, key <chr>
Bank_test_key <- Bank_test %>%
select(Bancos, key, actual = test, forecast) %>%
spread(key, forecast) %>%
gather(key, value, -Bancos)Bank_test_unnest <- Bank_test_key %>%
unnest(value) %>%
mutate(Valores = rec_revert(Valores, recipe, Bancos))Con el resultado en formato tbl, comparamos el pronóstico con la información observada
## Warning: package 'plotly' was built under R version 4.1.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p2 <- Bank_test_unnest %>%
ggplot(aes(x = Fecha, y = Valores, colour = key)) +
geom_line(data = Bank_test_unnest %>%
filter(key == "actual"),aes(y = Valores),alpha = 0.3,size = 0.8)+
geom_line(data = Bank_test_unnest %>%
filter(key != "actual"),aes(frame = key,col = key)) +
labs(x = "", y = "Valores",title = "Model Prediction Comparison", frame = "Model") +
facet_wrap(~ Bancos, scale = "free_y", ncol = 1) +
tidyquant::theme_tq() +
theme(legend.position = "none",axis.title.y = element_text(size = 12), axis.text.y = element_text(size = 7))## Warning in geom_line(data = Bank_test_unnest %>% filter(key != "actual"), :
## Ignoring unknown aesthetics: frame
## Warning in p$x$data[firstFrame] <- p$x$frames[[1]]$data: número de items para
## para sustituir no es un múltiplo de la longitud del reemplazo
Bank_min_error <- Bank_error %>%
select(-fitted) %>% # remove unused
group_by(Bancos) %>%
filter(error == min(error)) %>%
ungroup()
Bank_min_error #same selection with subarea_best_model## # A tibble: 4 x 8
## Bancos test train func_name func model_name model error
## <chr> <list> <list> <chr> <list> <chr> <list> <dbl>
## 1 Di <tibble [7 x 2]> <tibble> ts <fn> auto.arima <fn> 0.164
## 2 Gua <tibble [7 x 2]> <tibble> ts <fn> tbats <fn> 0.0988
## 3 Int <tibble [7 x 2]> <tibble> ts <fn> stlm <fn> 1.24
## 4 Pic <tibble [7 x 2]> <tibble> ts <fn> ets <fn> 0.297
Bank_combine <- Bank_min_error %>%
mutate(fulldata = map2(train, test, ~ bind_rows(.x, .y))) %>%
select(Bancos, fulldata, everything(), -train, -test)Bank_combine_nest <- Bank_combine %>%
mutate(
params = map(fulldata, ~ list(x = .x)),
data = invoke_map(func, params),
params = map(data, ~ list(x = .x)),
fitted = invoke_map(model, params)
) %>%
select(-data, -params)Se puede incluir nivel de confianza al 95%, así como también el límite inferior y superior de la estimación
mae_test_result <- Bank_error %>%
group_by(Bancos) %>%
select(Bancos, error) %>%
arrange(error) %>%
slice(1) %>%
ungroup()
mae_test_result <- mae_test_result %>%
summarise(Bancos = "Todos los bancos",
error = mean(error)) %>%
bind_rows(mae_test_result, .)
mae_test_result## # A tibble: 5 x 2
## Bancos error
## <chr> <dbl>
## 1 Di 0.164
## 2 Gua 0.0988
## 3 Int 1.24
## 4 Pic 0.297
## 5 Todos los bancos 0.450
# https://github.com/spsanderson/healthyverse_tsa
# https://business-science.github.io/modeltime/articles/nested-forecasting.html| id | Store | Dept | Date | Weekly_Sales | IsHoliday | Type | Size | Temperature | Fuel_Price | MarkDown1 | MarkDown2 | MarkDown3 | MarkDown4 | MarkDown5 | CPI | Unemployment |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1_13 | 1 | 13 | 2011-02-25 | 37838.40 | FALSE | A | 151315 | 62.90 | 3.065 | NA | NA | NA | NA | NA | 213.5356 | 7.742 |
| 1_93 | 1 | 93 | 2010-11-26 | 81660.77 | TRUE | A | 151315 | 64.52 | 2.735 | NA | NA | NA | NA | NA | 211.7484 | 7.838 |
| 1_93 | 1 | 93 | 2011-09-02 | 73692.87 | FALSE | A | 151315 | 87.83 | 3.533 | NA | NA | NA | NA | NA | 215.7971 | 7.962 |
| 1_93 | 1 | 93 | 2012-03-23 | 66710.40 | FALSE | A | 151315 | 65.93 | 3.787 | 6118.56 | 9.48 | 4.97 | 426.72 | 3657.22 | 221.2864 | 7.348 |
| 1_13 | 1 | 13 | 2012-07-27 | 37423.21 | FALSE | A | 151315 | 82.66 | 3.407 | 7146.90 | 389.02 | 1.59 | 10267.54 | 4325.19 | 221.9413 | 6.908 |
Con la opción set_names, es posible cambiar el nombre de las variables, de una forma más sencilla.
plot_time_series proviene de la libreria timetk
Para este paso, consideramos las siguientes funciones que provienen de la librería modeltime.
extend_timeseries: “Defines how far into the future to extend the time series by each time series group.”
nest_timeseries: “Defines which observations should be split into the .future_data.”
split_nested_timeseries: “A wrapper for timetk::time_series_split() that generates training/testing splits from the .actual_data column.”
nested_data_tbl <- data_A %>%
# 1. Extending: We'll predict 52 weeks into the future.
extend_timeseries(
.id_var = ID,
.date_var = Fecha,
.length_future = 52
) %>%
# 2. Nesting: We'll group by ID, and create a future dataset
# that forecasts 52 weeks of extended data and
# an actual dataset that contains 104 weeks (2-years of data)
nest_timeseries(
.id_var = ID,
.length_future = 52,
.length_actual = 52*2
) %>%
# 3. Splitting: We'll take the actual data and create splits
# for accuracy and confidence interval estimation of 52 weeks (test)
# and the rest is training data
split_nested_timeseries(
.length_test = 52
)
nested_data_tbl## # A tibble: 7 x 4
## ID .actual_data .future_data .splits
## <fct> <list> <list> <list>
## 1 1_1 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 2 1_3 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 3 1_8 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 4 1_13 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 5 1_38 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 6 1_93 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
## 7 1_95 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]>
step_timeseries_signature proviene de la libreria timetk. step_timeseries_signature creates a a specification of a recipe step that will convert date or date-time data into many features that can aid in machine learning with time-series data.
rec_xgb <- recipe(Valores ~ ., extract_nested_train_split(nested_data_tbl)) %>%
step_timeseries_signature(Fecha) %>%
step_rm(Fecha) %>%
step_zv(all_predictors()) %>%
step_dummy(all_nominal_predictors(), one_hot = TRUE)
wflw_xgb <- workflow() %>%
add_model(boost_tree("regression") %>% set_engine("xgboost")) %>%
add_recipe(rec_xgb)nested_modeltime_tbl <- modeltime_nested_fit(
# Nested data
nested_data = nested_data_tbl,
# Add workflows
wflw_auto_arima,
wflw_auto_exp,
wflw_prophet,
wflw_xgb
)##
Fitting models on training data... ====>-------------------------- 14% |
## ET...
Fitting models on training data... =========>--------------------- 29% |
## ET...
Fitting models on training data... =============>----------------- 43% |
## ET...
Fitting models on training data... =================>------------- 57% |
## ET...
Fitting models on training data... =====================>--------- 71% |
## ET...
Fitting models on training data... ==========================>---- 86% |
## ET...
## # Nested Modeltime Table
##
## Trained on: .splits | Model Errors: [0]
## # A tibble: 7 x 5
## ID .actual_data .future_data .splits .modeltime_tables
## <fct> <list> <list> <list> <list>
## 1 1_1 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 2 1_3 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 3 1_8 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 4 1_13 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 5 1_38 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 6 1_93 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
## 7 1_95 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [4 x 5]>
nested_modeltime_tbl %>%
extract_nested_test_accuracy() %>%
table_modeltime_accuracy(.interactive = F)| Accuracy Table | |||||||||
|---|---|---|---|---|---|---|---|---|---|
| ID | .model_id | .model_desc | .type | mae | mape | mase | smape | rmse | rsq |
| 1_1 | 1 | ARIMA | Test | 12316.89 | 61.60 | 2.43 | 46.58 | 13078.90 | NA |
| 1_1 | 2 | ETSMNN | Test | 12316.32 | 61.60 | 2.43 | 46.58 | 13078.35 | NA |
| 1_1 | 3 | PROPHET | Test | 10071.47 | 45.88 | 1.99 | 59.97 | 11776.87 | 0.38 |
| 1_1 | 4 | XGBOOST | Test | 6232.23 | 25.28 | 1.23 | 24.55 | 9014.73 | 0.19 |
| 1_3 | 1 | ARIMA | Test | 4798.16 | 30.21 | 1.86 | 30.47 | 8716.55 | 0.02 |
| 1_3 | 2 | ETSMNN | Test | 4791.42 | 23.75 | 1.86 | 29.91 | 9573.41 | NA |
| 1_3 | 3 | PROPHET | Test | 3539.80 | 29.87 | 1.37 | 25.46 | 4707.77 | 0.80 |
| 1_3 | 4 | XGBOOST | Test | 3071.72 | 18.69 | 1.19 | 20.26 | 5078.50 | 0.79 |
| 1_8 | 1 | ARIMA | Test | 3558.18 | 9.19 | 1.51 | 9.76 | 4112.95 | NA |
| 1_8 | 2 | ETSMAN | Test | 4976.42 | 12.91 | 2.12 | 14.01 | 5567.39 | 0.12 |
| 1_8 | 3 | PROPHET | Test | 4282.96 | 11.15 | 1.82 | 11.96 | 4845.08 | 0.00 |
| 1_8 | 4 | XGBOOST | Test | 3608.13 | 9.39 | 1.53 | 9.95 | 4025.27 | 0.30 |
| 1_13 | 1 | ARIMA | Test | 2308.38 | 5.68 | 0.85 | 5.85 | 3066.11 | 0.06 |
| 1_13 | 2 | ETSANN | Test | 2601.22 | 6.37 | 0.96 | 6.62 | 3369.45 | NA |
| 1_13 | 3 | PROPHET | Test | 6861.13 | 17.02 | 2.53 | 18.78 | 7309.61 | 0.15 |
| 1_13 | 4 | XGBOOST | Test | 2758.50 | 6.85 | 1.02 | 7.12 | 3119.72 | 0.53 |
| 1_38 | 1 | ARIMA | Test | 7922.19 | 9.78 | 0.68 | 10.05 | 10647.73 | NA |
| 1_38 | 2 | ETSMNN | Test | 7924.30 | 9.80 | 0.68 | 10.05 | 10624.30 | NA |
| 1_38 | 3 | PROPHET | Test | 26007.21 | 32.57 | 2.22 | 39.60 | 27938.83 | 0.08 |
| 1_38 | 4 | XGBOOST | Test | 7492.22 | 9.23 | 0.64 | 9.58 | 9437.51 | 0.33 |
| 1_93 | 1 | ARIMA | Test | 8292.72 | 10.42 | 0.83 | 11.02 | 10365.73 | 0.06 |
| 1_93 | 2 | ETSANN | Test | 8703.63 | 10.89 | 0.88 | 11.44 | 10338.66 | NA |
| 1_93 | 3 | PROPHET | Test | 17165.30 | 21.37 | 1.73 | 24.46 | 19123.17 | 0.03 |
| 1_93 | 4 | XGBOOST | Test | 7140.45 | 8.98 | 0.72 | 9.53 | 8859.37 | 0.49 |
| 1_95 | 1 | ARIMA | Test | 8945.63 | 7.10 | 1.08 | 7.29 | 11410.65 | 0.01 |
| 1_95 | 2 | ETSMNN | Test | 8713.50 | 6.95 | 1.05 | 7.10 | 11224.78 | NA |
| 1_95 | 3 | PROPHET | Test | 22836.06 | 18.30 | 2.75 | 20.37 | 24094.49 | 0.48 |
| 1_95 | 4 | XGBOOST | Test | 10794.54 | 8.49 | 1.30 | 8.93 | 13102.82 | 0.11 |
best_nested_modeltime_tbl <- nested_modeltime_tbl %>%
modeltime_nested_select_best(
metric = "rmse",
minimize = TRUE,
filter_test_forecasts = TRUE)## # Nested Modeltime Table
##
## Trained on: .splits | Model Errors: [0]
## # A tibble: 7 x 10
## ID .model_id .model_desc .type mae mape mase smape rmse rsq
## <fct> <int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1_1 4 XGBOOST Test 6232. 25.3 1.23 24.5 9015. 0.191
## 2 1_3 3 PROPHET Test 3540. 29.9 1.37 25.5 4708. 0.796
## 3 1_8 4 XGBOOST Test 3608. 9.39 1.53 9.95 4025. 0.304
## 4 1_13 1 ARIMA Test 2308. 5.68 0.850 5.85 3066. 0.0649
## 5 1_38 4 XGBOOST Test 7492. 9.23 0.640 9.58 9438. 0.332
## 6 1_93 4 XGBOOST Test 7140. 8.98 0.719 9.53 8859. 0.490
## 7 1_95 2 ETSMNN Test 8714. 6.95 1.05 7.10 11225. NA
nested_modeltime_refit_tbl <- best_nested_modeltime_tbl %>%
modeltime_nested_refit(control = control_nested_refit(verbose = TRUE))## i [1/7] Starting Modeltime Table: ID 1_1...
## v Model 4 Passed XGBOOST.
## v [1/7] Finished Modeltime Table: ID 1_1
## i [2/7] Starting Modeltime Table: ID 1_3...
## v Model 3 Passed PROPHET.
## v [2/7] Finished Modeltime Table: ID 1_3
## i [3/7] Starting Modeltime Table: ID 1_8...
## v Model 4 Passed XGBOOST.
## v [3/7] Finished Modeltime Table: ID 1_8
## i [4/7] Starting Modeltime Table: ID 1_13...
## v Model 1 Passed ARIMA(3,1,0)(0,0,1)[12].
## v [4/7] Finished Modeltime Table: ID 1_13
## i [5/7] Starting Modeltime Table: ID 1_38...
## v Model 4 Passed XGBOOST.
## v [5/7] Finished Modeltime Table: ID 1_38
## i [6/7] Starting Modeltime Table: ID 1_93...
## v Model 4 Passed XGBOOST.
## v [6/7] Finished Modeltime Table: ID 1_93
## i [7/7] Starting Modeltime Table: ID 1_95...
## v Model 2 Passed ETS(M,N,N).
## v [7/7] Finished Modeltime Table: ID 1_95
## Finished in: 7.304214 secs.
## # Nested Modeltime Table
##
## Trained on: .actual_data | Model Errors: [0]
## # A tibble: 7 x 5
## ID .actual_data .future_data .splits .modeltime_tables
## <fct> <list> <list> <list> <list>
## 1 1_1 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 2 1_3 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 3 1_8 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 4 1_13 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 5 1_38 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 6 1_93 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
## 7 1_95 <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_tm_t [1 x 5]>
For this step, first at all we have to work with this libraries.
| date | value |
|---|---|
| 2010-01-01 | 999.9930 |
| 2010-02-01 | 1000.3700 |
| 2010-03-01 | 1001.0245 |
| 2010-04-01 | 1000.0097 |
| 2010-05-01 | 1001.3507 |
| 2010-06-01 | 999.4062 |
| 2010-07-01 | 999.3950 |
| 2010-08-01 | 999.5441 |
Split into training (80%) and test (20%). As Rob J Hyndman and George Athanasopoulos mentioned: “The size of the test is typically about 20% of the total sample, although this value depends on how long the sample is and how far ahead you want to forecast”.
time_series_split(): A convenience function to return a single time series split containing a training/testing sample. This function comes from the library timetk.
ff: Name of the data frame that will be analized.
assess: The length of the test set.
splits <- time_series_split(
ff,
assess = "12 months", # Test set = last 12 months
cumulative = TRUE # Training set = all data before test
)## Using date_var: date
The tk_time_series_cv_plan(): function provides a simple interface to prepare a time series resample specification (rset) of either rolling_origin or time_series_cv class.
arima_reg(): Is a way to generate a specification of an ARIMA model. exp_smoothing(): Is a way to generate a specification of an Exponential Smoothing model. prophet_reg(): Is a way to generate a specification of a PROPHET model.
# Model 1: Auto ARIMA
model_arima <- arima_reg() %>%
set_engine("auto_arima") %>%
fit(value ~ date, training(splits))
# Model 2: ETS
model_ets <- exp_smoothing() %>%
set_engine("ets") %>%
fit(value ~ date, training(splits))
# Model 3: Prophet
model_prophet <- prophet_reg() %>%
set_engine("prophet") %>%
fit(value ~ date, training(splits))Creates a table of models
Create rsample cross validation sets for time series. This function produces a sampling plan starting with the most recent time series observations, rolling backwards.
initial=60: Use the first 60 observations (e.g., 5 years if monthly data) to train the model.Ensures the model learns from a sufficiently long history.
assess=12: Evaluate the model on the next 12 observations (e.g., 1 year if monthly data). Simulates real-world forecasting where you predict future periods.Asses must be the same as set before.
resamples <- time_series_cv(
training(splits),
initial = 60, # 5 years
assess = 12, # 1 year
skip = 6 # 6-month increments
)## Using date_var: date
Resampled predictions are commonly used for: Analyzing accuracy and stability of models
modeltime_resample_accuracy(): This is a wrapper for yardstick that simplifies time series regression accuracy metric calculations.
modeltime::table_modeltime_accuracy(): To format the results for reporting.
Step I: modeltime_calibrate Calibrates the model on the test set to generate residuals and confidence intervals. Takes your trained best_model. Uses the testing(splits) data (held-out data from initial_time_split()) to:
*Generate predictions
*Compare predictions to actual values (actual_data).
*Compute residuals (errors) and confidence intervals.
Step II: modeltime_forecast(new_data = testing(splits), actual_data = ff)
Generates forecasts for the test set (new_data) and combines them with actual data (actual_data) for visualization/analysis.
forecast_test <- best_model %>%
modeltime_calibrate(testing(splits)) %>%
modeltime_forecast(new_data = testing(splits),
actual_data = ff)## Converting to Modeltime Table.
future_df <- ff %>%
future_frame(.length_out = 12, .date_var = date)
future_forecast <- best_model %>%
modeltime_calibrate(ff) %>% # Refit on full dataset
modeltime_forecast(new_data = future_df,
actual_data = ff)## Converting to Modeltime Table.
future_forecast %>%
plot_modeltime_forecast(
.interactive = FALSE, # Set TRUE for Plotly (zoomable)
.conf_interval_show = TRUE, # Default
.conf_interval_alpha = 0.2 # Transparency
)| date | value |
|---|---|
| 2010-01-01 | 999.4895 |
| 2010-02-01 | 999.8698 |
| 2010-03-01 | 1001.7087 |
| 2010-04-01 | 1000.2705 |
| 2010-05-01 | 1000.3793 |
First at all, we have to transform data.frame as ts object.
In second place, we Setting training and testing partitions
methods <- list(ets1 = list(method = "ets",
method_arg = list(opt.crit = "lik"),
notes = "ETS model with opt.crit = lik"),
ets2 = list(method = "ets",
method_arg = list(opt.crit = "amse"),
notes = "ETS model with opt.crit = amse"),
arima1 = list(method = "arima",
method_arg = list(order = c(1,1,1)),
notes = "ARIMA(1,1,1)"),
hw = list(method = "HoltWinters",
method_arg = NULL,
notes = "HoltWinters Model"),
tslm = list(method = "tslm",
method_arg = list(formula = input ~ trend + season),
notes = "tslm model with trend and seasonal components"))train_model: Method for train test and compare multiple time series models using either one partition (i.e., sample out) or multipe partitions (backtesting)
input: A univariate time series object (ts class).
md <- train_model(input = ts_data,
methods = methods,
train_method = list(partitions = 6,
sample.out = 12,
space = 3),
horizon = 12,
error = "MAPE")## # A tibble: 5 x 7
## model_id model notes avg_mape avg_rmse `avg_coverage_80%` `avg_coverage_95%`
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 tslm tslm tslm~ 0.000629 0.810 0.875 0.986
## 2 ets2 ets ETS ~ 0.000630 0.794 0.861 0.958
## 3 ets1 ets ETS ~ 0.000638 0.795 0.931 0.958
## 4 arima1 arima ARIM~ 0.000693 0.920 0.847 0.958
## 5 hw HoltWi~ Holt~ 0.000705 0.892 0.931 0.986