📦 Instalación y carga de paquetes
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
tidyverse,
lubridate,
caret,
randomForest,
ggplot2
)
🔍 Lectura y preparación de datos
url <- "https://raw.githubusercontent.com/vneumannufprbr/TrabajosRStudio/main/energy_dataset.csv"
data <- read.csv(url, stringsAsFactors = FALSE) %>%
mutate(time = ymd_hms(time)) %>%
arrange(time) %>%
select(time,
generation.solar,
generation.wind.onshore,
total.load.actual) %>%
na.omit()
targets <- c("generation.solar", "generation.wind.onshore", "total.load.actual")
window_size <- 24
test_size <- 48
forecast_horizon <- 24
🛠️ Funciones auxiliares
create_features <- function(serie, window) {
n <- length(serie)
features <- matrix(NA, nrow = n - window, ncol = window)
for (i in 1:window) {
features[, i] <- serie[i:(n - window + i - 1)]
}
target <- serie[(window + 1):n]
colnames(features) <- paste0("X", 1:window)
return(data.frame(features, target))
}
safe_calculate_metrics <- function(actual, predicted) {
valid <- is.finite(actual) & is.finite(predicted)
actual <- actual[valid]
predicted <- predicted[valid]
rmse <- sqrt(mean((actual - predicted)^2))
mae <- mean(abs(actual - predicted))
ss_res <- sum((actual - predicted)^2)
ss_tot <- sum((actual - mean(actual))^2)
r2 <- ifelse(ss_tot < .Machine$double.eps, NA, 1 - (ss_res / ss_tot))
data.frame(R2 = r2, RMSE = rmse, MAE = mae)
}
🧠 Entrenamiento con Random Forest
results <- list()
metrics <- list()
train_control <- trainControl(method = "cv", number = 5)
for (target_var in targets) {
serie <- data[[target_var]] %>% as.numeric()
n <- length(serie)
train_series <- serie[1:(n - test_size)]
test_series <- serie[(n - test_size + 1):n]
train_data <- create_features(train_series, window_size)
test_data <- create_features(test_series, window_size)
set.seed(1912)
rf_model <- train(
target ~ ., data = train_data,
method = "rf",
trControl = train_control,
ntree = 100
)
test_preds <- predict(rf_model, newdata = test_data %>% select(-target))
metrics[[target_var]] <- safe_calculate_metrics(test_data$target, test_preds)
# Pronóstico futuro
full_data <- create_features(serie, window_size)
rf_full <- train(
target ~ ., data = full_data,
method = "rf",
trControl = trainControl(method = "none"),
tuneGrid = rf_model$bestTune,
ntree = 100
)
last_window <- tail(serie, window_size)
future_preds <- numeric(forecast_horizon)
for (i in 1:forecast_horizon) {
input <- as.data.frame(matrix(last_window, nrow = 1))
colnames(input) <- paste0("X", 1:window_size)
pred <- predict(rf_full, newdata = input)
future_preds[i] <- pred
last_window <- c(last_window[-1], pred)
}
results[[target_var]] <- future_preds
}
📊 Gráfico 1: Pronóstico de 24 horas
last_date <- tail(data$time, 1)
future_dates <- seq(last_date + hours(1), by = "hour", length.out = forecast_horizon)
forecast_df <- bind_rows(
data.frame(time = future_dates, variable = "Solar", value = results$generation.solar),
data.frame(time = future_dates, variable = "Eólica", value = results$generation.wind.onshore),
data.frame(time = future_dates, variable = "Carga", value = results$total.load.actual)
)
ggplot(forecast_df, aes(x = time, y = value, color = variable)) +
geom_line(linewidth = 1) +
facet_wrap(~variable, scales = "free_y", ncol = 1) +
labs(
title = "Pronóstico de 24 horas con Random Forest",
x = "Fecha", y = "Valor"
) +
theme_minimal() +
theme(legend.position = "none")

📊 Gráfico 2: Comparación de R2
metrics_rf_solar <- metrics$generation.solar$R2
metrics_rf_wind <- metrics$generation.wind.onshore$R2
metrics_rf_load <- metrics$total.load.actual$R2
metrics_comp_df <- data.frame(
Algoritmo = rep(c("KNN", "SVM","XGBoost","Random Forest"), each = 3),
Variable = rep(c("Solar", "Eólica", "Carga"), 4),
R2 = c(
0.292213, -0.9057994, -0.228126,
0.503, -0.698, -0.334,
0.988, 0.938134, 0.9765706,
metrics_rf_solar, metrics_rf_wind, metrics_rf_load
)
)
ggplot(metrics_comp_df, aes(x = Variable, y = R2, fill = Algoritmo)) +
geom_col(position = "dodge") +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
labs(title = "Comparación de R² entre Algoritmos", y = "R²") +
theme_minimal()

📊 Gráfico 3: Comparación completa (R², RMSE, MAE)
metrics_rf_solar_all <- metrics$generation.solar
metrics_rf_wind_all <- metrics$generation.wind.onshore
metrics_rf_load_all <- metrics$total.load.actual
metrics_comp_df_full <- data.frame(
Algoritmo = rep(c("KNN", "SVM", "XGBoost", "Random Forest"), each = 9),
Variable = rep(rep(c("Solar", "Eólica", "Carga"), each = 3), 4),
Metrica = rep(c("R2", "RMSE", "MAE"), 12),
Valor = c(
0.292213, 1556.813, 1101.061,
-0.9057994, 3953.569, 3243.733,
-0.228126, 4468.826, 3584.862,
0.503, 1304.703, 966.0549,
-0.698, 3731.805, 2952.332,
-0.334, 4657.628, 3913.831,
0.988, 201.488, 137.3568,
0.938134, 640.2372, 380.0813,
0.9765706, 563.9765, 408.3985,
metrics_rf_solar_all$R2, metrics_rf_solar_all$RMSE, metrics_rf_solar_all$MAE,
metrics_rf_wind_all$R2, metrics_rf_wind_all$RMSE, metrics_rf_wind_all$MAE,
metrics_rf_load_all$R2, metrics_rf_load_all$RMSE, metrics_rf_load_all$MAE
)
)
ggplot(metrics_comp_df_full, aes(x = Variable, y = Valor, fill = Algoritmo)) +
geom_col(position = "dodge") +
facet_wrap(~Metrica, scales = "free_y") +
geom_hline(data = subset(metrics_comp_df_full, Metrica == "R2"), aes(yintercept = 0),
linetype = "dashed", color = "red") +
labs(title = "Comparación de R², RMSE y MAE entre Algoritmos") +
theme_minimal(base_size = 13)
