library(rpart.plot)
library(caret)
library(e1071)
library(corrplot)
library(ggplot2)
library(reshape2)
data <- read.csv("clipboard", header = TRUE)
names(data)[names(data) == "severe_maleria"] <- "severe_malaria"
names(data)[names(data) == "headace"] <- "headache"
names(data)[names(data) == "prostraction"] <- "prostration"
str(data)
summary(data)
```# Ubah target ke faktor data\(severe_malaria <- as.factor(data\)severe_malaria)
data\(age <- (data\)age - min(data\(age)) / (max(data\)age) - min(data$age))
## THE MEASURE OF CORRELATION
```data_num <- data[, sapply(data, is.numeric)]
corr_matrix <- cor(data_num, use = "complete.obs")
# Tampilkan matriks
corr_matrix
method = "color",
type = "full",
col = colorRampPalette(c("blue","white","red"))(200),
addCoef.col = "black",
number.cex = 0.5,
tl.col = "black",
tl.cex = 0.7,
tl.srt = 45)
# Plot pohon CART
rpart.plot(model_cart,
type = 2,
extra = 104,
fallen.leaves = TRUE,
main = "CART Tree for Severe Malaria Classification")
if(exists("pred_nb")){
# Tabel prediksi
table_pred <- table(pred_nb)
# Ubah ke persen
prob_pred <- prop.table(table_pred) * 100
# Gabungkan
data.frame(Complication = names(prob_pred),
Percentage = round(prob_pred, 2))
}
if(exists("cm_nb")){
accuracy <- sum(diag(cm_nb)) / sum(cm_nb)
accuracy
}
set.seed(123)
results <- data.frame(Attempt = 1:3,
Accuracy_CART = NA,
Accuracy_NB = NA)
for(i in 1:3){
# Split ulang
train_index <- createDataPartition(data$severe_malaria, p = 0.7, list = FALSE)
train_data <- data[train_index, ]
test_data <- data[-train_index, ]
# CART
model_cart <- rpart(severe_malaria ~ .,
data = train_data,
method = "class",
control = rpart.control(maxdepth = 4))
pred_cart <- predict(model_cart, test_data, type = "class")
acc_cart <- mean(pred_cart == test_data$severe_malaria)
train_index <- createDataPartition(data$severe_malaria,
p = 0.7,
list = FALSE)
train_data <- data[train_index, ]
test_data <- data[-train_index, ]
model_cart <- rpart(severe_malaria ~ ., data = train_data, method = "class", control = rpart.control(maxdepth = 4))
pred_cart <- predict(model_cart, test_data, type = "class")
data_nb <- test_data[pred_cart == 1, ]
```cm <- table(Predicted = pred_cart, Actual = test_data$severe_malaria)
cm
cm <- table(Predicted = pred_cart, Actual = test_data$severe_malaria)
cm TP <- cm[2,2] TN <- cm[1,1] FP <- cm[2,1] FN <- cm[1,2]
precision <- TP / (TP + FP) recall <- TP / (TP + FN) f1 <- 2 * precision * recall / (precision + recall)
data.frame(Precision = precision, Recall = recall, F1_Score = f1)
## NAÏVE BAYES (jika data cukup)
```set.seed(123)
results <- data.frame(Attempt = 1:3,
Accuracy_CART = NA,
Accuracy_NB = NA)
for(i in 1:3){
# Split ulang
train_index <- createDataPartition(data$severe_malaria, p = 0.7, list = FALSE)
train_data <- data[train_index, ]
test_data <- data[-train_index, ]
# CART
model_cart <- rpart(severe_malaria ~ .,
data = train_data,
method = "class",
control = rpart.control(maxdepth = 4))
pred_cart <- predict(model_cart, test_data, type = "class")
acc_cart <- mean(pred_cart == test_data$severe_malaria)
# NB (jika cukup data)
data_nb <- test_data[pred_cart == 1, ]
if(nrow(data_nb) > 2){
data_nb$complication <- as.factor(data_nb$complication)
train_index_nb <- createDataPartition(data_nb$complication, p = 0.85, list = FALSE)
train_nb <- data_nb[train_index_nb, ]
test_nb <- data_nb[-train_index_nb, ]
model_nb <- naiveBayes(complication ~ ., data = train_nb)
pred_nb <- predict(model_nb, test_nb)
acc_nb <- mean(pred_nb == test_nb$complication)
} else {
acc_nb <- NA
}
results$Accuracy_CART[i] <- acc_cart
results$Accuracy_NB[i] <- acc_nb
}
results