Versión rápida del informe RF: mismos resultados esperados (R², RMSE, MAE, pronóstico 24h), pero con menos costo computacional para publicar en RPubs.
if (!require("pacman")) install.packages("pacman")
## Cargando paquete requerido: pacman
pacman::p_load(
tidyverse,
lubridate,
caret,
randomForest,
ggplot2
)
url <- "https://raw.githubusercontent.com/vneumannufprbr/TrabajosRStudio/main/energy_dataset.csv"
data <- read.csv(url, stringsAsFactors = FALSE) %>%
mutate(time = lubridate::ymd_hms(time)) %>%
arrange(time) %>%
select(time,
generation.solar,
generation.wind.onshore,
total.load.actual) %>%
na.omit()
targets <- c("generation.solar", "generation.wind.onshore", "total.load.actual")
window_size <- 24
test_size <- 24
forecast_horizon <- 24
create_features <- function(serie, window) {
n <- length(serie)
stopifnot(n > window)
X <- matrix(NA_real_, nrow = n - window, ncol = window)
for (i in 1:window) X[, i] <- serie[i:(n - window + i - 1)]
y <- serie[(window + 1):n]
colnames(X) <- paste0("X", seq_len(window))
as.data.frame(cbind(X, target = y))
}
safe_calculate_metrics <- function(actual, predicted) {
if (length(actual) != length(predicted) || length(actual) < 2) {
return(tibble::tibble(R2 = NA_real_, RMSE = NA_real_, MAE = NA_real_))
}
valid <- is.finite(actual) & is.finite(predicted)
if (sum(valid) < 2) return(tibble::tibble(R2 = NA_real_, RMSE = NA_real_, MAE = NA_real_))
a <- actual[valid]; p <- predicted[valid]
rmse <- sqrt(mean((a - p)^2))
mae <- mean(abs(a - p))
ss_res <- sum((a - p)^2)
ss_tot <- sum((a - mean(a))^2)
r2 <- ifelse(ss_tot < .Machine$double.eps, NA_real_, 1 - ss_res/ss_tot)
tibble::tibble(R2 = r2, RMSE = rmse, MAE = mae)
}
set.seed(1912)
# CV más liviana
train_control <- trainControl(
method = "cv",
number = 3,
allowParallel = TRUE
)
results_forecast <- list()
results_metrics <- list()
for (target_var in targets) {
serie <- data[[target_var]] %>% as.numeric()
n <- length(serie)
train_series <- serie[1:(n - test_size)]
test_series <- serie[(n - test_size + 1):n]
train_data <- create_features(train_series, window_size)
test_data <- create_features(c(tail(train_series, window_size), test_series), window_size)
# ntree más bajo
rf_model <- train(
target ~ .,
data = train_data,
method = "rf",
trControl = train_control,
ntree = 100
)
test_preds <- predict(rf_model, newdata = dplyr::select(test_data, -target))
results_metrics[[target_var]] <- safe_calculate_metrics(test_data$target, test_preds) %>%
dplyr::mutate(Variable = target_var, Conjunto = "Test", Modelo = "RandomForest (Fast)")
full_data <- create_features(serie, window_size)
rf_full <- randomForest::randomForest(
x = dplyr::select(full_data, -target),
y = full_data$target,
ntree = 100,
mtry = rf_model$bestTune$mtry
)
last_window <- tail(serie, window_size)
future_preds <- numeric(forecast_horizon)
for (i in 1:forecast_horizon) {
input <- as.data.frame(t(last_window))
colnames(input) <- paste0("X", seq_len(window_size))
pred <- predict(rf_full, newdata = input)
future_preds[i] <- as.numeric(pred)
last_window <- c(last_window[-1], pred)
}
results_forecast[[target_var]] <- future_preds
}
metrics_df <- dplyr::bind_rows(results_metrics) %>%
dplyr::select(Modelo, Variable, Conjunto, R2, RMSE, MAE) %>%
dplyr::mutate(
Variable = dplyr::recode(Variable,
"generation.solar" = "Solar",
"generation.wind.onshore" = "Eólica",
"total.load.actual" = "Carga"
)
)
metrics_df
last_time <- max(data$time)
future_dates <- seq(from = last_time + lubridate::hours(1),
by = "1 hour",
length.out = forecast_horizon)
forecast_df <- tibble::tibble(
time = rep(future_dates, times = length(targets)),
variable = rep(c("Solar", "Eólica", "Carga"), each = forecast_horizon),
value = c(results_forecast[["generation.solar"]],
results_forecast[["generation.wind.onshore"]],
results_forecast[["total.load.actual"]])
)
ggplot(forecast_df, aes(x = time, y = value, color = variable)) +
geom_line(linewidth = 0.9) +
facet_wrap(~variable, scales = "free_y", ncol = 1) +
labs(title = "Pronóstico 24 horas con Random Forest (Versión Rápida)",
x = "Tiempo", y = "Valor") +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
metrics_long <- tidyr::pivot_longer(metrics_df, c(R2, RMSE, MAE),
names_to = "Métrica", values_to = "Valor")
ggplot(metrics_long, aes(x = Variable, y = Valor, fill = Métrica)) +
geom_col(position = "dodge") +
facet_wrap(~Métrica, scales = "free_y") +
labs(title = "Métricas en Test - RF (Versión Rápida)", x = NULL, y = NULL) +
theme_minimal(base_size = 13)