Instalación y carga de paquetes
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
tidyverse,
lubridate,
xgboost,
ggplot2
)
## package 'xgboost' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\a4ama\AppData\Local\Temp\RtmpGA2kR7\downloaded_packages
Lectura y preparación de datos
url <- "https://raw.githubusercontent.com/vneumannufprbr/TrabajosRStudio/main/energy_dataset.csv"
data <- read.csv(url, stringsAsFactors = FALSE) %>%
mutate(time = ymd_hms(time)) %>%
arrange(time) %>%
select(time, generation.solar, generation.wind.onshore, total.load.actual) %>%
na.omit()
Configuración de parámetros
targets <- c("generation.solar", "generation.wind.onshore", "total.load.actual")
window_size <- 24
test_size <- 48
forecast_horizon <- 24
Funciones auxiliares
create_features <- function(serie, window) {
n <- length(serie)
features <- matrix(NA, nrow = n - window, ncol = window)
for (i in 1:window) {
features[, i] <- serie[i:(n - window + i - 1)]
}
target <- serie[(window + 1):n]
return(data.frame(features, target))
}
safe_calculate_metrics <- function(actual, predicted) {
if (length(actual) != length(predicted) || length(actual) == 0) {
return(data.frame(R2 = NA, RMSE = NA, MAE = NA))
}
rmse <- sqrt(mean((actual - predicted)^2))
mae <- mean(abs(actual - predicted))
ss_res <- sum((actual - predicted)^2)
ss_tot <- sum((actual - mean(actual))^2)
r_squared <- ifelse(ss_tot == 0, NA, 1 - (ss_res/ss_tot))
return(data.frame(R2 = r_squared, RMSE = rmse, MAE = mae))
}
Modelado y pronóstico
results <- list()
metrics <- list()
for (target_var in targets) {
serie <- data[[target_var]] %>% as.numeric()
n <- length(serie)
train_series <- serie[1:(n - test_size)]
test_series <- serie[(n - test_size + 1):n]
train_data <- create_features(train_series, window_size)
test_data <- create_features(test_series, window_size)
dtrain <- xgb.DMatrix(data = as.matrix(train_data[, 1:window_size]), label = train_data$target)
dtest <- xgb.DMatrix(data = as.matrix(test_data[, 1:window_size]), label = test_data$target)
params <- list(objective = "reg:squarederror", max_depth = 6, eta = 0.1,
subsample = 0.8, colsample_bytree = 0.8, eval_metric = "rmse")
xgb_model <- xgb.train(params, dtrain, nrounds = 500,
watchlist = list(train = dtrain, test = dtest),
early_stopping_rounds = 20, verbose = 0)
test_preds <- predict(xgb_model, as.matrix(test_data[, 1:window_size]))
metrics[[target_var]] <- safe_calculate_metrics(test_data$target, test_preds)
full_data <- create_features(serie, window_size)
dfull <- xgb.DMatrix(data = as.matrix(full_data[, 1:window_size]), label = full_data$target)
xgb_full <- xgb.train(params, dfull, nrounds = xgb_model$best_iteration, verbose = 0)
last_window <- tail(serie, window_size)
future_preds <- numeric(forecast_horizon)
for (i in 1:forecast_horizon) {
current_input <- matrix(last_window, nrow = 1)
future_preds[i] <- predict(xgb_full, current_input)
last_window <- c(last_window[-1], future_preds[i])
}
results[[target_var]] <- future_preds
}
Resultados y visualización
metrics_df <- bind_rows(metrics, .id = "variable") %>%
mutate(variable = case_when(
variable == "generation.solar" ~ "Solar",
variable == "generation.wind.onshore" ~ "Eólica",
variable == "total.load.actual" ~ "Carga"
), Algoritmo = "XGBoost")
metrics_df
## variable R2 RMSE MAE Algoritmo
## 1 Solar 0.9804853 185.0957 145.4345 XGBoost
## 2 Eólica 0.9193023 448.4580 359.3936 XGBoost
## 3 Carga 0.9791759 411.9730 308.3706 XGBoost
Gráfico 1: Pronóstico por variable
last_date <- tail(data$time, 1)
future_dates <- seq(last_date + hours(1), by = "hour", length.out = forecast_horizon)
forecast_df <- data.frame(
time = future_dates,
solar = results$generation.solar,
wind = results$generation.wind.onshore,
load = results$total.load.actual
) %>% pivot_longer(-time, names_to = "variable", values_to = "value") %>%
mutate(variable = case_when(
variable == "solar" ~ "Solar",
variable == "wind" ~ "Eólica",
variable == "load" ~ "Carga"
))
ggplot(forecast_df, aes(x = time, y = value, color = variable)) +
geom_line(linewidth = 1) +
facet_wrap(~variable, scales = "free_y", ncol = 1) +
labs(title = "Pronóstico a 1 día usando XGBoost",
x = "Fecha", y = "Valor", color = "Variable") +
theme_minimal() +
theme(legend.position = "none")

Gráfico 2: Comparación de R²
r2_comparacion <- data.frame(
Algoritmo = rep(c("KNN", "SVM", "XGBoost"), each = 3),
Variable = rep(c("Solar", "Eólica", "Carga"), 3),
R2 = c(0.292, -0.906, -0.228, 0.503, -0.698, -0.334,
metrics_df$R2)
)
ggplot(r2_comparacion, aes(x = Variable, y = R2, fill = Algoritmo)) +
geom_col(position = "dodge") +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
labs(title = "Comparación de R² entre Algoritmos",
y = "R²") +
theme_minimal()

Gráfico 3: Comparación de todas las métricas
metricas_todas <- metrics_df %>%
pivot_longer(cols = c("R2", "RMSE", "MAE"), names_to = "Métrica", values_to = "Valor")
otros_algoritmos <- data.frame(
Algoritmo = rep(c("KNN", "SVM"), each = 9),
Variable = rep(c("Solar", "Eólica", "Carga"), each = 3, times = 2),
Métrica = rep(c("R2", "RMSE", "MAE"), times = 6),
Valor = c(
0.292213, 1556.813, 1101.061,
-0.9057994, 3953.569, 3243.733,
-0.228126, 4468.826, 3584.862,
0.503, 1304.703, 966.0549,
-0.698, 3731.805, 2952.332,
-0.334, 4657.628, 3913.831
)
)
comparacion_metricas <- bind_rows(metricas_todas, otros_algoritmos)
ggplot(comparacion_metricas, aes(x = Variable, y = Valor, fill = Algoritmo)) +
geom_col(position = "dodge") +
facet_wrap(~Métrica, scales = "free_y") +
labs(title = "Comparación de R², RMSE y MAE entre Algoritmos",
y = "Valor de la Métrica", x = "Variable") +
theme_minimal()
