📦 Instalación y carga de paquetes

if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  tidyverse,
  lubridate,
  caret,
  randomForest,
  ggplot2
)

🔍 Lectura y preparación de datos

url <- "https://raw.githubusercontent.com/vneumannufprbr/TrabajosRStudio/main/energy_dataset.csv"
data <- read.csv(url, stringsAsFactors = FALSE) %>%
  mutate(time = ymd_hms(time)) %>%
  arrange(time) %>%
  select(time,
         generation.solar,
         generation.wind.onshore,
         total.load.actual) %>%
  na.omit()

targets <- c("generation.solar", "generation.wind.onshore", "total.load.actual")
window_size <- 24
test_size <- 48
forecast_horizon <- 24

🛠️ Funciones auxiliares

create_features <- function(serie, window) {
  n <- length(serie)
  features <- matrix(NA, nrow = n - window, ncol = window)
  for (i in 1:window) {
    features[, i] <- serie[i:(n - window + i - 1)]
  }
  target <- serie[(window + 1):n]
  colnames(features) <- paste0("X", 1:window)
  return(data.frame(features, target))
}

safe_calculate_metrics <- function(actual, predicted) {
  valid <- is.finite(actual) & is.finite(predicted)
  actual <- actual[valid]
  predicted <- predicted[valid]
  rmse <- sqrt(mean((actual - predicted)^2))
  mae <- mean(abs(actual - predicted))
  ss_res <- sum((actual - predicted)^2)
  ss_tot <- sum((actual - mean(actual))^2)
  r2 <- ifelse(ss_tot < .Machine$double.eps, NA, 1 - (ss_res / ss_tot))
  data.frame(R2 = r2, RMSE = rmse, MAE = mae)
}

🧠 Entrenamiento con Random Forest

results <- list()
metrics <- list()
train_control <- trainControl(method = "cv", number = 5)

for (target_var in targets) {
  serie <- data[[target_var]] %>% as.numeric()
  n <- length(serie)
  train_series <- serie[1:(n - test_size)]
  test_series <- serie[(n - test_size + 1):n]

  train_data <- create_features(train_series, window_size)
  test_data <- create_features(test_series, window_size)

  set.seed(1912)
  rf_model <- train(
    target ~ ., data = train_data,
    method = "rf",
    trControl = train_control,
    ntree = 100
  )

  test_preds <- predict(rf_model, newdata = test_data %>% select(-target))
  metrics[[target_var]] <- safe_calculate_metrics(test_data$target, test_preds)

  # Pronóstico futuro
  full_data <- create_features(serie, window_size)
  rf_full <- train(
    target ~ ., data = full_data,
    method = "rf",
    trControl = trainControl(method = "none"),
    tuneGrid = rf_model$bestTune,
    ntree = 100
  )

  last_window <- tail(serie, window_size)
  future_preds <- numeric(forecast_horizon)

  for (i in 1:forecast_horizon) {
    input <- as.data.frame(matrix(last_window, nrow = 1))
    colnames(input) <- paste0("X", 1:window_size)
    pred <- predict(rf_full, newdata = input)
    future_preds[i] <- pred
    last_window <- c(last_window[-1], pred)
  }
  results[[target_var]] <- future_preds
}

📊 Gráfico 1: Pronóstico de 24 horas

last_date <- tail(data$time, 1)
future_dates <- seq(last_date + hours(1), by = "hour", length.out = forecast_horizon)

forecast_df <- bind_rows(
  data.frame(time = future_dates, variable = "Solar", value = results$generation.solar),
  data.frame(time = future_dates, variable = "Eólica", value = results$generation.wind.onshore),
  data.frame(time = future_dates, variable = "Carga", value = results$total.load.actual)
)

ggplot(forecast_df, aes(x = time, y = value, color = variable)) +
  geom_line(linewidth = 1) +
  facet_wrap(~variable, scales = "free_y", ncol = 1) +
  labs(
    title = "Pronóstico de 24 horas con Random Forest",
    x = "Fecha", y = "Valor"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

📊 Gráfico 2: Comparación de R2

metrics_rf_solar <- metrics$generation.solar$R2
metrics_rf_wind <- metrics$generation.wind.onshore$R2
metrics_rf_load <- metrics$total.load.actual$R2

metrics_comp_df <- data.frame(
  Algoritmo = rep(c("KNN", "SVM","XGBoost","Random Forest"), each = 3),
  Variable = rep(c("Solar", "Eólica", "Carga"), 4),
  R2 = c(
    0.292213, -0.9057994, -0.228126,
    0.503, -0.698, -0.334,
    0.988, 0.938134, 0.9765706,
    metrics_rf_solar, metrics_rf_wind, metrics_rf_load
  )
)

ggplot(metrics_comp_df, aes(x = Variable, y = R2, fill = Algoritmo)) +
  geom_col(position = "dodge") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(title = "Comparación de R² entre Algoritmos", y = "R²") +
  theme_minimal()

📊 Gráfico 3: Comparación completa (R², RMSE, MAE)

metrics_rf_solar_all <- metrics$generation.solar
metrics_rf_wind_all <- metrics$generation.wind.onshore
metrics_rf_load_all <- metrics$total.load.actual

metrics_comp_df_full <- data.frame(
  Algoritmo = rep(c("KNN", "SVM", "XGBoost", "Random Forest"), each = 9),
  Variable = rep(rep(c("Solar", "Eólica", "Carga"), each = 3), 4),
  Metrica = rep(c("R2", "RMSE", "MAE"), 12),
  Valor = c(
    0.292213, 1556.813, 1101.061,
    -0.9057994, 3953.569, 3243.733,
    -0.228126, 4468.826, 3584.862,
    0.503, 1304.703, 966.0549,
    -0.698, 3731.805, 2952.332,
    -0.334, 4657.628, 3913.831,
    0.988, 201.488, 137.3568,
    0.938134, 640.2372, 380.0813,
    0.9765706, 563.9765, 408.3985,
    metrics_rf_solar_all$R2, metrics_rf_solar_all$RMSE, metrics_rf_solar_all$MAE,
    metrics_rf_wind_all$R2, metrics_rf_wind_all$RMSE, metrics_rf_wind_all$MAE,
    metrics_rf_load_all$R2, metrics_rf_load_all$RMSE, metrics_rf_load_all$MAE
  )
)

ggplot(metrics_comp_df_full, aes(x = Variable, y = Valor, fill = Algoritmo)) +
  geom_col(position = "dodge") +
  facet_wrap(~Metrica, scales = "free_y") +
  geom_hline(data = subset(metrics_comp_df_full, Metrica == "R2"), aes(yintercept = 0),
             linetype = "dashed", color = "red") +
  labs(title = "Comparación de R², RMSE y MAE entre Algoritmos") +
  theme_minimal(base_size = 13)