#Evaluación robusta del rendimiento de tu
#modelo de regresión lineal utilizando
#la validación cruzada con 10 pliegues.
# LibrerĆas necesarias
library(readr)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rsample)
library(caret)
## Cargando paquete requerido: ggplot2
## Cargando paquete requerido: lattice
##
## Adjuntando el paquete: 'caret'
## The following object is masked from 'package:rsample':
##
## calibration
library(ggplot2)
# Cargar datos
Sleep <- read_csv("Sleep_Efficiency.csv")
## Rows: 452 Columns: 15
## āā Column specification āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
## Delimiter: ","
## chr (2): Gender, Smoking status
## dbl (11): ID, Age, Sleep duration, Sleep efficiency, REM sleep percentage, ...
## dttm (2): Bedtime, Wakeup time
##
## ā¹ Use `spec()` to retrieve the full column specification for this data.
## ā¹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Sleep = as.data.frame(unclass(Sleep),
stringsAsFactors = TRUE)
# V-Fold Cross-Validation
set.seed(123) # Para reproducibilidad
vfold_x = vfold_cv(Sleep,
v = 10,
strata = "Sleep.duration")
# Almacenar los resultados de cada pliegue
results <- data.frame(RMSE = numeric(10),
R2 = numeric(10),
MAE = numeric(10))
# Función para calcular MAE
MAE <- function(pred, obs) {
mean(abs(pred - obs))
}
# Entrenar y evaluar el modelo en cada pliegue
for(i in 1:10) {
train_indices = vfold_x$splits[[i]]$in_id
test_indices = setdiff(1:nrow(Sleep),
train_indices)
Sleep_train = Sleep[train_indices, ]
Sleep_test = Sleep[test_indices, ]
model = lm(Sleep.duration ~ Sleep.efficiency,
data = Sleep_train)
pred = predict(model, Sleep_test)
results$RMSE[i] = RMSE(pred, Sleep_test$Sleep.duration)
results$R2[i] = cor(pred, Sleep_test$Sleep.duration)^2
results$MAE[i] = MAE(pred, Sleep_test$Sleep.duration)
}
results
## RMSE R2 MAE
## 1 0.9030784 0.008467377 0.6774206
## 2 0.9529268 0.009710587 0.7292493
## 3 0.9894874 0.004822418 0.7127898
## 4 0.8855210 0.010647322 0.6816742
## 5 0.8392129 0.007670700 0.6184981
## 6 0.7678726 0.004012230 0.5920409
## 7 0.8740886 0.008219702 0.6498083
## 8 0.7925972 0.023766378 0.6034066
## 9 0.8316208 0.015193491 0.6390870
## 10 0.7876147 0.000868994 0.6162722
# Calcular mƩtricas promedio
mean_rmse <- mean(results$RMSE)
mean_r2 <- mean(results$R2)
mean_mae <- mean(results$MAE)
# Visualizar los resultados de la validación cruzada
ggplot(results, aes(x = factor(1:10))) +
geom_bar(aes(y = RMSE),
stat = "identity",
fill = "blue", alpha = 0.6) +
labs(title = "RMSE en cada pliegue",
x = "Pliegue", y = "RMSE") +
theme_minimal()

ggplot(results, aes(x = factor(1:10))) +
geom_bar(aes(y = R2),
stat = "identity",
fill = "green", alpha = 0.6) +
labs(title = "R2 en cada pliegue",
x = "Pliegue", y = "R2") +
theme_minimal()

ggplot(results, aes(x = factor(1:10))) +
geom_bar(aes(y = MAE),
stat = "identity",
fill = "red", alpha = 0.6) +
labs(title = "MAE en cada pliegue",
x = "Pliegue", y = "MAE") +
theme_minimal()

# Mostrar los resultados promedio
cat("Resultados Promedio de la Validación Cruzada:\n")
## Resultados Promedio de la Validación Cruzada:
cat("RMSE Promedio:", mean_rmse, "\n")
## RMSE Promedio: 0.862402
cat("R2 Promedio:", mean_r2, "\n")
## R2 Promedio: 0.00933792
cat("MAE Promedio:", mean_mae, "\n")
## MAE Promedio: 0.6520247