Βιβλιοθηκες

install.packages(“caTools”) library(caTools)

install.packages(“ROCR”) library(ROCR)

install.packages(“rpart”) library(rpart)

install.packages(“rpart.plot”) library(rpart.plot)

Διερεύνηση Συνόλου Δεδομένων

heart <- read.csv(“framingham.csv”) str(heart) summary(heart)

Έλεγχος Ελλειπουσών Τιμών

colSums(is.na(heart)) heart <- na.omit(heart)

Outcome as factor for CART classification

heart\(TenYearCHD <- factor(heart\)TenYearCHD, levels = c(0, 1), labels = c(“No”, “Yes”))

Train / Test Split (65% / 35%)

set.seed(937) split <- sample.split(heart$TenYearCHD, SplitRatio = 0.65) train <- subset(heart, split == TRUE) test <- subset(heart, split == FALSE)

nrow(train); nrow(test) table(train\(TenYearCHD); table(test\)TenYearCHD)

Μοντέλο Λογιστικής Παλινδρόμησης

log_model <- glm(TenYearCHD ~ ., data = train, family = binomial()) pred_log <- predict(log_model, newdata = test, type = “response”)

Μοντέλο CART (Decision Tree)

tree_model <- rpart( TenYearCHD ~ ., data = train, method = “class”, control = rpart.control(cp = 0.001, xval = 10) )

Ελεγχος Πολυπλοκοτητας

printcp(tree_model)

best_cp <- tree_model\(cptable[which.min(tree_model\)cptable[, “xerror”]), “CP”] tree_pruned <- prune(tree_model, cp = best_cp)

Δενδρο Plot

rpart.plot(tree_pruned, type = 2, extra = 104, main = “CART (Pruned)”)

CART για κλαση “Yes”

pred_tree <- predict(tree_pruned, newdata = test, type = “prob”)[, “Yes”]

ROC + AUC για CART

pred_rocr_tree <- prediction(pred_tree, y_num) perf_roc_tree <- performance(pred_rocr_tree, “tpr”, “fpr”) plot(perf_roc_tree, main = “ROC – CART (Pruned)”) auc_tree <- performance(pred_rocr_tree, “auc”) auc_tree_val <- as.numeric() auc_tree_val

ROC + AUC για Logistic Regression (συγκριση)

pred_rocr_log <- prediction(pred_log, y_num) perf_roc_log <- performance(pred_rocr_log, “tpr”, “fpr”) plot(perf_roc_log, main = “ROC – Logistic Regression”) auc_log <- performance(pred_rocr_log, “auc”) auc_log_val <- as.numeric() auc_log_val

AUC πινακας

data.frame( Model = c(“Logistic Regression”, “CART (Pruned)”), AUC = c(auc_log_val, auc_tree_val) )