#Evaluación robusta del rendimiento de tu 
#modelo de regresión lineal utilizando 
#la validación cruzada con 10 pliegues.

# LibrerĆ­as necesarias
library(readr)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rsample)
library(caret)
## Cargando paquete requerido: ggplot2
## Cargando paquete requerido: lattice
## 
## Adjuntando el paquete: 'caret'
## The following object is masked from 'package:rsample':
## 
##     calibration
library(ggplot2)

# Cargar datos
Sleep <- read_csv("Sleep_Efficiency.csv")
## Rows: 452 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (2): Gender, Smoking status
## dbl  (11): ID, Age, Sleep duration, Sleep efficiency, REM sleep percentage, ...
## dttm  (2): Bedtime, Wakeup time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Sleep = as.data.frame(unclass(Sleep), 
                      stringsAsFactors = TRUE)

# V-Fold Cross-Validation
set.seed(123) # Para reproducibilidad
vfold_x = vfold_cv(Sleep, 
                   v = 10,
                   strata = "Sleep.duration")

# Almacenar los resultados de cada pliegue
results <- data.frame(RMSE = numeric(10), 
                      R2 = numeric(10),
                      MAE = numeric(10))

# Función para calcular MAE
MAE <- function(pred, obs) {
  mean(abs(pred - obs))
}

# Entrenar y evaluar el modelo en cada pliegue
for(i in 1:10) {
  train_indices = vfold_x$splits[[i]]$in_id
  test_indices = setdiff(1:nrow(Sleep),
                         train_indices)
  Sleep_train = Sleep[train_indices, ]
  Sleep_test = Sleep[test_indices, ]
  model = lm(Sleep.duration ~ Sleep.efficiency, 
             data = Sleep_train)
  
  pred = predict(model, Sleep_test)
  results$RMSE[i] = RMSE(pred, Sleep_test$Sleep.duration)
  results$R2[i] = cor(pred, Sleep_test$Sleep.duration)^2
  results$MAE[i] = MAE(pred, Sleep_test$Sleep.duration)
}

results
##         RMSE          R2       MAE
## 1  0.9030784 0.008467377 0.6774206
## 2  0.9529268 0.009710587 0.7292493
## 3  0.9894874 0.004822418 0.7127898
## 4  0.8855210 0.010647322 0.6816742
## 5  0.8392129 0.007670700 0.6184981
## 6  0.7678726 0.004012230 0.5920409
## 7  0.8740886 0.008219702 0.6498083
## 8  0.7925972 0.023766378 0.6034066
## 9  0.8316208 0.015193491 0.6390870
## 10 0.7876147 0.000868994 0.6162722
# Calcular mƩtricas promedio
mean_rmse <- mean(results$RMSE)
mean_r2 <- mean(results$R2)
mean_mae <- mean(results$MAE)

# Visualizar los resultados de la validación cruzada
ggplot(results, aes(x = factor(1:10))) +
  geom_bar(aes(y = RMSE), 
           stat = "identity",
           fill = "blue", alpha = 0.6) +
  labs(title = "RMSE en cada pliegue", 
       x = "Pliegue", y = "RMSE") +
  theme_minimal()

ggplot(results, aes(x = factor(1:10))) +
  geom_bar(aes(y = R2), 
           stat = "identity", 
           fill = "green", alpha = 0.6) +
  labs(title = "R2 en cada pliegue",
       x = "Pliegue", y = "R2") +
  theme_minimal()

ggplot(results, aes(x = factor(1:10))) +
  geom_bar(aes(y = MAE),
           stat = "identity", 
           fill = "red", alpha = 0.6) +
  labs(title = "MAE en cada pliegue",
       x = "Pliegue", y = "MAE") +
  theme_minimal()

# Mostrar los resultados promedio
cat("Resultados Promedio de la Validación Cruzada:\n")
## Resultados Promedio de la Validación Cruzada:
cat("RMSE Promedio:", mean_rmse, "\n")
## RMSE Promedio: 0.862402
cat("R2 Promedio:", mean_r2, "\n")
## R2 Promedio: 0.00933792
cat("MAE Promedio:", mean_mae, "\n")
## MAE Promedio: 0.6520247