data(GermanCredit)
seeds <- c(123, 456, 789, 101112, 131415)
results_dt <- data.frame(Run=integer(), Seed=integer(),
Accuracy=double(), Precision=double(),
Specificity=double(), Sensitivity=double(),
Time=double())
for (i in 1:5) {
set.seed(seeds[i])
idx <- createDataPartition(GermanCredit$Class, p=0.7, list=FALSE)
train <- GermanCredit[idx, ]
test <- GermanCredit[-idx, ]
start_time <- Sys.time()
model <- rpart(Class ~ ., data=train, method="class")
end_time <- Sys.time()
pred <- predict(model, test, type="class")
cm <- confusionMatrix(pred, test$Class, positive="Good")
results_dt <- rbind(results_dt, data.frame(
Run = i,
Seed = seeds[i],
Accuracy = cm$overall['Accuracy'],
Precision = cm$byClass['Pos Pred Value'],
Specificity = cm$byClass['Specificity'],
Sensitivity = cm$byClass['Sensitivity'],
Time = as.numeric(difftime(end_time, start_time, units="secs"))
))
}
kable(results_dt, caption = "Resultados por corrida - Árbol de Decisión", digits = 3)
Resultados por corrida - Árbol de Decisión
| Accuracy |
1 |
123 |
0.687 |
0.746 |
0.333 |
0.838 |
0.114 |
| Accuracy1 |
2 |
456 |
0.720 |
0.769 |
0.400 |
0.857 |
0.112 |
| Accuracy2 |
3 |
789 |
0.700 |
0.744 |
0.300 |
0.871 |
0.128 |
| Accuracy3 |
4 |
101112 |
0.693 |
0.759 |
0.389 |
0.824 |
0.105 |
| Accuracy4 |
5 |
131415 |
0.703 |
0.769 |
0.422 |
0.824 |
0.118 |
avg_dt <- data.frame(
Accuracy = mean(results_dt$Accuracy),
Precision = mean(results_dt$Precision),
Specificity = mean(results_dt$Specificity),
Sensitivity = mean(results_dt$Sensitivity),
Time = mean(results_dt$Time)
)
kable(avg_dt, caption = "Promedio de métricas - Árbol de Decisión", digits = 3)
Promedio de métricas - Árbol de Decisión
| 0.701 |
0.757 |
0.369 |
0.843 |
0.115 |
##Regresión logistica
results_glm <- data.frame(Run=integer(), Seed=integer(),
Accuracy=double(), Precision=double(),
Specificity=double(), Sensitivity=double(),
Time=double())
for (i in 1:5) {
set.seed(seeds[i])
idx <- createDataPartition(GermanCredit$Class, p=0.7, list=FALSE)
train <- GermanCredit[idx, ]
test <- GermanCredit[-idx, ]
start_time <- Sys.time()
model <- glm(Class ~ ., data=train, family="binomial")
end_time <- Sys.time()
prob <- predict(model, test, type="response")
pred <- ifelse(prob > 0.5, "Good", "Bad")
pred <- factor(pred, levels=c("Bad", "Good"))
cm <- confusionMatrix(pred, test$Class, positive="Good")
results_glm <- rbind(results_glm, data.frame(
Run = i,
Seed = seeds[i],
Accuracy = cm$overall['Accuracy'],
Precision = cm$byClass['Pos Pred Value'],
Specificity = cm$byClass['Specificity'],
Sensitivity = cm$byClass['Sensitivity'],
Time = as.numeric(difftime(end_time, start_time, units="secs"))
))
}
kable(results_glm, caption = "Resultados por corrida - Regresión Logística", digits = 3)
Resultados por corrida - Regresión Logística
| Accuracy |
1 |
123 |
0.713 |
0.782 |
0.467 |
0.819 |
0.048 |
| Accuracy1 |
2 |
456 |
0.740 |
0.789 |
0.467 |
0.857 |
0.087 |
| Accuracy2 |
3 |
789 |
0.737 |
0.810 |
0.556 |
0.814 |
0.116 |
| Accuracy3 |
4 |
101112 |
0.723 |
0.777 |
0.433 |
0.848 |
0.041 |
| Accuracy4 |
5 |
131415 |
0.773 |
0.814 |
0.533 |
0.876 |
0.072 |
avg_glm <- data.frame(
Accuracy = mean(results_glm$Accuracy),
Precision = mean(results_glm$Precision),
Specificity = mean(results_glm$Specificity),
Sensitivity = mean(results_glm$Sensitivity),
Time = mean(results_glm$Time)
)
kable(avg_glm, caption = "Promedio de métricas - Regresión Logística", digits = 3)
Promedio de métricas - Regresión Logística
| 0.737 |
0.795 |
0.491 |
0.843 |
0.073 |