library(tidymodels)
library(modeltime)
library(tidyverse)
library(lubridate)
library(timetk)
library(forecast)
library(modeltime.resample)
data_tbl <- read_csv2("data_11_23.csv")
data_tbl <- data_tbl %>%
mutate(Fecha = dmy(Mes)) %>%
select(-Mes)
Logaritmo del PPCord:
data_tbl %>%
plot_time_series(.date_var = Fecha,
.value = log(PPCord),
.smooth = FALSE,
.title = "Log PPCord Patagónico"
)
data_transformed_tbl <- data_tbl %>%
mutate(log_PPCord = log(PPCord)) %>%
select(-PPCord)
st_mean <-mean(data_transformed_tbl$log_PPCord)
st_sd <-sd(data_transformed_tbl$log_PPCord)
data_transformed_tbl <- data_transformed_tbl %>%
mutate(PPCord_trans = standardize_vec(log_PPCord)) %>%
select(-log_PPCord)
## Standardization Parameters
## mean: 5.16910803360126
## standard deviation: 1.10873014074664
data_transformed_tbl %>% head()
## # A tibble: 6 × 2
## Fecha PPCord_trans
## <date> <dbl>
## 1 2014-04-01 -1.49
## 2 2014-05-01 -1.49
## 3 2014-06-01 -1.50
## 4 2014-07-01 -1.46
## 5 2014-08-01 -1.40
## 6 2014-09-01 -1.38
horizon <- 12
data_prepared_full_tbl <- data_transformed_tbl %>%
bind_rows(
future_frame(.data = ., .date_var = Fecha, .length_out = horizon)
)
data_prepared_tbl <- data_prepared_full_tbl %>%
filter(!is.na(PPCord_trans))
forecast_tbl <- data_prepared_full_tbl %>%
filter(is.na(PPCord_trans))
splits <- time_series_split(data_prepared_tbl,
assess = "24 months",
cumulative = TRUE # para que trabaje con origen fijo
)
## Using date_var: Fecha
splits
## <Analysis/Assess/Total>
## <92/24/116>
mod_fit_arima <- arima_reg(
non_seasonal_ar = 1,
non_seasonal_differences = 1,
seasonal_period = 12,
seasonal_ar = 1,
seasonal_differences = 1,
seasonal_ma = 0
) %>%
set_engine("arima") %>%
fit(PPCord_trans ~ Fecha, training(splits))
modeltime_table(
mod_fit_arima
) %>%
modeltime_calibrate(testing(splits)) %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = data_prepared_tbl
) %>%
plot_modeltime_forecast(.conf_interval_show = FALSE,.title = NULL)
modeltime_table(
mod_fit_arima
) %>%
modeltime_calibrate(testing(splits)) %>%
modeltime_accuracy(
metric_set = metric_set(mae, rmse, mape)
)
## # A tibble: 1 × 6
## .model_id .model_desc .type mae rmse mape
## <int> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 ARIMA(1,1,0)(1,1,0)[12] Test 0.136 0.199 7.88
set.seed(123)
num_series <- 100
# Generamos 100 series simuladas
simulated_series <- map_dfr(1:num_series, function(i) {
noise <- rnorm(n = nrow(data_transformed_tbl), mean = 0, sd = 0.03)
data_transformed_tbl %>%
mutate(
Serie_Simulada = PPCord_trans + noise,
Simulation_ID = paste0("Sim_", i)
)
})
simulated_series %>%
filter(Simulation_ID %in% c("Sim_1", "Sim_2", "Sim_3")) %>%
bind_rows(
data_transformed_tbl %>%
mutate(
Serie_Simulada = PPCord_trans,
Simulation_ID = "Log_PPCord"
)
) %>%
plot_time_series(Fecha, Serie_Simulada,
.facet_vars = Simulation_ID,
.smooth = FALSE,
.title = NULL,
.facet_ncol = 2,
.interactive = FALSE)
# Método 1: Origen fijo y testeo deslizante
cv_fixed_origin <- time_series_cv(
data = simulated_series,
# Primeros 4 años como entrenamiento inicial
assess = "12 months", # 1 año de testeo cada vez
skip = "12 months", # Deslizar 1 año cada vez
slice_limit = 4,
cumulative = TRUE
) # el grupo de entrenamiento minimo tiene 43 obs y
# el maximo tiene 92 obs. Luego el tamaño promedio es 67
# Método 2: Ventanas deslizantes
cv_sliding_windows <- time_series_cv(
data = simulated_series,
initial = "66 months", # usamos el tamano promedio del anterior
assess = "12 months", # 1 año de testeo en cada deslizamiento
skip = "12 months", # Deslizar 1 año cada vez
slice_limit = 4
)
# Seleccionamos una serie simulada
selected_series <- simulated_series %>%
filter(Simulation_ID == "Sim_1")
cv_fixed_origin_selected <- time_series_cv(
data = selected_series,
assess = "12 months",
skip = "12 months",
slice_limit = 4,
cumulative = TRUE
)
## Using date_var: Fecha
# Representación gráfica
cv_fixed_origin_selected %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(Fecha, Serie_Simulada, .facet_ncol = 1)
# Seleccionamos una serie simulada
selected_series <- simulated_series %>%
filter(Simulation_ID == "Sim_1")
cv_sliding_windows_selected <- time_series_cv(
data = selected_series,
initial = "67 months",
assess = "12 months",
skip = "12 months",
slice_limit = 4,
cumulative = FALSE
)
## Using date_var: Fecha
# Representación gráfica
cv_sliding_windows_selected %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(Fecha, Serie_Simulada, .facet_ncol = 1)
# Crear la función para entrenar y evaluar el modelo ARIMA
train_arima <- function(data) {
arima_model <- arima_reg(
non_seasonal_ar = 1,
non_seasonal_differences = 1,
seasonal_period = 12,
seasonal_ar = 1,
seasonal_differences = 1,
seasonal_ma = 0
) %>%
set_engine("arima") %>%
fit(Serie_Simulada ~ Fecha, data = data)
return(arima_model)
}
# Crear tabla de modelos usando train_arima
model_tbl_fixed_origin <- map_dfr(unique(simulated_series$Simulation_ID), function(sim_id) {
sim_data <- simulated_series %>% filter(Simulation_ID == sim_id)
# Entrenar el modelo ARIMA
mod_fit_arima <- train_arima(sim_data)
# Estructurarlo en una modeltime_table
model_tbl <- modeltime_table(mod_fit_arima)
return(model_tbl)
})
# Verificamos que se haya creado correctamente la tabla de modelos
model_tbl_fixed_origin
## # Modeltime Table
## # A tibble: 100 × 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 2 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 3 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 4 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 5 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 6 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 7 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 8 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 9 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 10 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## # … with 90 more rows
# Evaluar el rendimiento del ARIMA en las series simuladas usando el método de origen fijo
model_resamples_tbl_fixed_origin <- model_tbl_fixed_origin %>%
modeltime_fit_resamples(
resamples = cv_fixed_origin, # Utilizamos los resamples del método de origen fijo
control = control_resamples(verbose = TRUE, allow_par = TRUE)
)
## ── Fitting Resamples ────────────────────────────────────────────
##
## 299.226 sec elapsed
# Calcular las métricas de evaluación
accuracy_metrics_fixed_origin <- model_resamples_tbl_fixed_origin %>%
modeltime_resample_accuracy(
metric_set = metric_set(mae, mape, rmse),
summary_fns = list(mean = mean, sd = sd)
) %>%
arrange(mae_mean)
# Visualización de los resultados
model_resamples_tbl_fixed_origin %>%
plot_modeltime_resamples(
.metric_set = metric_set(mae, mape, rmse),
.point_size = 2,
.point_alpha = 0.75,
.facet_ncol = 2
)
accuracy_metrics_fixed_origin
## # A tibble: 1 × 10
## .model_id .model_desc .type n mae_mean mae_sd mape_mean mape_sd rmse_mean
## <int> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 ARIMA(1,1,0… Resa… 4 1.39 1.17 325. 483. 1.59
## # ℹ 1 more variable: rmse_sd <dbl>
# Crear tabla de modelos usando train_arima para el método de ventanas deslizantes
model_tbl_sliding_windows <- map_dfr(unique(simulated_series$Simulation_ID), function(sim_id) {
sim_data <- simulated_series %>% filter(Simulation_ID == sim_id)
# Entrenar el modelo ARIMA
mod_fit_arima <- train_arima(sim_data)
# Estructurarlo en una modeltime_table
model_tbl <- modeltime_table(mod_fit_arima)
return(model_tbl)
})
# Verificamos que se haya creado correctamente la tabla de modelos
model_tbl_sliding_windows
## # Modeltime Table
## # A tibble: 100 × 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 2 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 3 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 4 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 5 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 6 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 7 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 8 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 9 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## 10 1 <fit[+]> ARIMA(1,1,0)(1,1,0)[12]
## # … with 90 more rows
# Evaluar el rendimiento del ARIMA en las series simuladas usando el método de ventanas deslizantes
model_resamples_tbl_sliding_windows <- model_tbl_sliding_windows %>%
modeltime_fit_resamples(
resamples = cv_sliding_windows, # Utilizamos los resamples del método de ventanas deslizantes
control = control_resamples(verbose = TRUE, allow_par = TRUE)
)
## ── Fitting Resamples ────────────────────────────────────────────
##
## 248.555 sec elapsed
# Calcular las métricas de evaluación
accuracy_metrics_sliding_windows <- model_resamples_tbl_sliding_windows %>%
modeltime_resample_accuracy(
metric_set = metric_set(mae, mape, rmse),
summary_fns = list(mean = mean, sd = sd)
) %>%
arrange(mae_mean)
# Visualización de los resultados
model_resamples_tbl_sliding_windows %>%
plot_modeltime_resamples(
.metric_set = metric_set(mae, mape, rmse),
.point_size = 2,
.point_alpha = 0.75,
.facet_ncol = 2
)
accuracy_metrics_sliding_windows
## # A tibble: 1 × 10
## .model_id .model_desc .type n mae_mean mae_sd mape_mean mape_sd rmse_mean
## <int> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 ARIMA(1,1,0… Resa… 4 1.39 1.16 326. 485. 1.59
## # ℹ 1 more variable: rmse_sd <dbl>