Instalación y carga de paquetes

if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  tidyverse,
  lubridate,
  xgboost,
  ggplot2
)
## package 'xgboost' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\a4ama\AppData\Local\Temp\RtmpGA2kR7\downloaded_packages

Lectura y preparación de datos

url <- "https://raw.githubusercontent.com/vneumannufprbr/TrabajosRStudio/main/energy_dataset.csv"
data <- read.csv(url, stringsAsFactors = FALSE) %>%
  mutate(time = ymd_hms(time)) %>%
  arrange(time) %>%
  select(time, generation.solar, generation.wind.onshore, total.load.actual) %>%
  na.omit()

Configuración de parámetros

targets <- c("generation.solar", "generation.wind.onshore", "total.load.actual")
window_size <- 24
test_size <- 48
forecast_horizon <- 24

Funciones auxiliares

create_features <- function(serie, window) {
  n <- length(serie)
  features <- matrix(NA, nrow = n - window, ncol = window)
  for (i in 1:window) {
    features[, i] <- serie[i:(n - window + i - 1)]
  }
  target <- serie[(window + 1):n]
  return(data.frame(features, target))
}

safe_calculate_metrics <- function(actual, predicted) {
  if (length(actual) != length(predicted) || length(actual) == 0) {
    return(data.frame(R2 = NA, RMSE = NA, MAE = NA))
  }
  rmse <- sqrt(mean((actual - predicted)^2))
  mae <- mean(abs(actual - predicted))
  ss_res <- sum((actual - predicted)^2)
  ss_tot <- sum((actual - mean(actual))^2)
  r_squared <- ifelse(ss_tot == 0, NA, 1 - (ss_res/ss_tot))
  return(data.frame(R2 = r_squared, RMSE = rmse, MAE = mae))
}

Modelado y pronóstico

results <- list()
metrics <- list()

for (target_var in targets) {
  serie <- data[[target_var]] %>% as.numeric()
  n <- length(serie)
  train_series <- serie[1:(n - test_size)]
  test_series <- serie[(n - test_size + 1):n]

  train_data <- create_features(train_series, window_size)
  test_data <- create_features(test_series, window_size)

  dtrain <- xgb.DMatrix(data = as.matrix(train_data[, 1:window_size]), label = train_data$target)
  dtest <- xgb.DMatrix(data = as.matrix(test_data[, 1:window_size]), label = test_data$target)

  params <- list(objective = "reg:squarederror", max_depth = 6, eta = 0.1,
                 subsample = 0.8, colsample_bytree = 0.8, eval_metric = "rmse")

  xgb_model <- xgb.train(params, dtrain, nrounds = 500,
                         watchlist = list(train = dtrain, test = dtest),
                         early_stopping_rounds = 20, verbose = 0)

  test_preds <- predict(xgb_model, as.matrix(test_data[, 1:window_size]))
  metrics[[target_var]] <- safe_calculate_metrics(test_data$target, test_preds)

  full_data <- create_features(serie, window_size)
  dfull <- xgb.DMatrix(data = as.matrix(full_data[, 1:window_size]), label = full_data$target)

  xgb_full <- xgb.train(params, dfull, nrounds = xgb_model$best_iteration, verbose = 0)

  last_window <- tail(serie, window_size)
  future_preds <- numeric(forecast_horizon)
  for (i in 1:forecast_horizon) {
    current_input <- matrix(last_window, nrow = 1)
    future_preds[i] <- predict(xgb_full, current_input)
    last_window <- c(last_window[-1], future_preds[i])
  }

  results[[target_var]] <- future_preds
}

Resultados y visualización

metrics_df <- bind_rows(metrics, .id = "variable") %>%
  mutate(variable = case_when(
    variable == "generation.solar" ~ "Solar",
    variable == "generation.wind.onshore" ~ "Eólica",
    variable == "total.load.actual" ~ "Carga"
  ), Algoritmo = "XGBoost")

metrics_df
##   variable        R2     RMSE      MAE Algoritmo
## 1    Solar 0.9804853 185.0957 145.4345   XGBoost
## 2   Eólica 0.9193023 448.4580 359.3936   XGBoost
## 3    Carga 0.9791759 411.9730 308.3706   XGBoost

Gráfico 1: Pronóstico por variable

last_date <- tail(data$time, 1)
future_dates <- seq(last_date + hours(1), by = "hour", length.out = forecast_horizon)

forecast_df <- data.frame(
  time = future_dates,
  solar = results$generation.solar,
  wind = results$generation.wind.onshore,
  load = results$total.load.actual
) %>% pivot_longer(-time, names_to = "variable", values_to = "value") %>%
  mutate(variable = case_when(
    variable == "solar" ~ "Solar",
    variable == "wind" ~ "Eólica",
    variable == "load" ~ "Carga"
  ))

ggplot(forecast_df, aes(x = time, y = value, color = variable)) +
  geom_line(linewidth = 1) +
  facet_wrap(~variable, scales = "free_y", ncol = 1) +
  labs(title = "Pronóstico a 1 día usando XGBoost",
       x = "Fecha", y = "Valor", color = "Variable") +
  theme_minimal() +
  theme(legend.position = "none")

Gráfico 2: Comparación de R²

r2_comparacion <- data.frame(
  Algoritmo = rep(c("KNN", "SVM", "XGBoost"), each = 3),
  Variable = rep(c("Solar", "Eólica", "Carga"), 3),
  R2 = c(0.292, -0.906, -0.228, 0.503, -0.698, -0.334,
         metrics_df$R2)
)

ggplot(r2_comparacion, aes(x = Variable, y = R2, fill = Algoritmo)) +
  geom_col(position = "dodge") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(title = "Comparación de R² entre Algoritmos",
       y = "R²") +
  theme_minimal()

Gráfico 3: Comparación de todas las métricas

metricas_todas <- metrics_df %>%
  pivot_longer(cols = c("R2", "RMSE", "MAE"), names_to = "Métrica", values_to = "Valor")

otros_algoritmos <- data.frame(
  Algoritmo = rep(c("KNN", "SVM"), each = 9),
  Variable = rep(c("Solar", "Eólica", "Carga"), each = 3, times = 2),
  Métrica = rep(c("R2", "RMSE", "MAE"), times = 6),
  Valor = c(
    0.292213, 1556.813, 1101.061,
    -0.9057994, 3953.569, 3243.733,
    -0.228126, 4468.826, 3584.862,
    0.503, 1304.703, 966.0549,
    -0.698, 3731.805, 2952.332,
    -0.334, 4657.628, 3913.831
  )
)

comparacion_metricas <- bind_rows(metricas_todas, otros_algoritmos)

ggplot(comparacion_metricas, aes(x = Variable, y = Valor, fill = Algoritmo)) +
  geom_col(position = "dodge") +
  facet_wrap(~Métrica, scales = "free_y") +
  labs(title = "Comparación de R², RMSE y MAE entre Algoritmos",
       y = "Valor de la Métrica", x = "Variable") +
  theme_minimal()