table(data$y)
##
## 0 1
## 36548 4640
#encoding
library(tidyverse)
data <- data %>%
mutate(across(where(is.character), as.factor))
data <- data %>% select(-duration)
#duplikat
data <- data[!duplicated(data), ]
#split data
library(caret)
set.seed(123)
trainIndex <- createDataPartition(data$y, p = 0.8, list = FALSE)
train <- data[trainIndex, ]
test <- data[-trainIndex, ]
# cek
table(train$y)
##
## 0 1
## 27848 3676
table(test$y)
##
## 0 1
## 6958 922
#model logistic
model_log <- glm(y ~ ., data = train, family = binomial)
#prediksi + evaluasi
pred_log_prob <- predict(model_log, test, type = "response")
pred_log <- ifelse(pred_log_prob > 0.5, "1", "0")
pred_log <- factor(pred_log, levels = c("0","1"))
test$y <- factor(test$y, levels = c("0","1"))
conf_log <- confusionMatrix(pred_log, test$y)
conf_log
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 6846 688
## 1 112 234
##
## Accuracy : 0.8985
## 95% CI : (0.8916, 0.9051)
## No Information Rate : 0.883
## P-Value [Acc > NIR] : 7.13e-06
##
## Kappa : 0.3261
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9839
## Specificity : 0.2538
## Pos Pred Value : 0.9087
## Neg Pred Value : 0.6763
## Prevalence : 0.8830
## Detection Rate : 0.8688
## Detection Prevalence : 0.9561
## Balanced Accuracy : 0.6188
##
## 'Positive' Class : 0
##
train$y <- factor(train$y)
test$y <- factor(test$y)
model_rf <- randomForest(y ~ ., data = train)
pred_rf <- predict(model_rf, test)
conf_rf <- confusionMatrix(pred_rf, test$y)
conf_rf
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 6775 647
## 1 183 275
##
## Accuracy : 0.8947
## 95% CI : (0.8877, 0.9014)
## No Information Rate : 0.883
## P-Value [Acc > NIR] : 0.0005762
##
## Kappa : 0.3479
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9737
## Specificity : 0.2983
## Pos Pred Value : 0.9128
## Neg Pred Value : 0.6004
## Prevalence : 0.8830
## Detection Rate : 0.8598
## Detection Prevalence : 0.9419
## Balanced Accuracy : 0.6360
##
## 'Positive' Class : 0
##
conf_log$overall["Accuracy"]
## Accuracy
## 0.8984772
conf_rf$overall["Accuracy"]
## Accuracy
## 0.8946701