#Libary
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(DT)
#Impor Data
data = read.csv("data_uas.csv")
data$Purchase = factor(data$Purchase)
data$Purchase <- relevel(data$Purchase, ref = "1")
data %>% datatable(caption = "Tabel 1. Dataset")
#Split dataset menjadi set training dan validasi
set.seed(123)
trainIndex <- createDataPartition(data$Purchase, p = 0.7,list = FALSE)
trainData <- data[trainIndex, ]
validationData <- data[-trainIndex, ]
trainData %>%
datatable(caption = "Tabel 2. Dataset training")
validationData %>%
datatable(caption = "Tabel 3. Dataset validasi")
#Mengevaluasi model
#1. Logistic Regression
logistic_model <- train(Purchase ~ Age + Income + Education_Level + Marital_Status + Job_Experience,
data = trainData, method = "glm", family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Now make predictions on the validation set
logistic_predictions <- predict(logistic_model, newdata = validationData)
# Evaluate each model's performance using a confusion matrix and accuracy
logistic_cm <- confusionMatrix(logistic_predictions, validationData$Purchase)
# Accuracy
logistic_cm$overall['Accuracy']
## Accuracy
## 0.9491525
# 2. k-Nearest Neighbors (kNN)
knn_model <- train(Purchase ~ Age + Income + Education_Level + Marital_Status + Job_Experience,
data = trainData, method = "knn", tuneLength = 5)
# Make predictions on the validation set
knn_predictions <- predict(knn_model, newdata = validationData)
# Evaluate each model's performance using a confusion matrix and accuracy
knn_cm <- confusionMatrix(knn_predictions, validationData$Purchase)
# Accuracy
knn_cm$overall['Accuracy']
## Accuracy
## 0.9491525
# 3. Decision Tree
tree_model <- train(Purchase ~ Age + Income + Education_Level + Marital_Status + Job_Experience,
data = trainData, method = "rpart")
# Make predictions on the validation set
tree_predictions <- predict(tree_model, newdata = validationData)
# Evaluate each model's performance using a confusion matrix and accuracy
tree_cm <- confusionMatrix(tree_predictions, validationData$Purchase)
# Accuracy
tree_cm$overall['Accuracy']
## Accuracy
## 0.9491525
#Confusion matrix
Predicted = c("Yes", "No")
Actual_Yes = c("True Positives", "False Negatives")
Actual_No = c("False Positives", "True Negatives")
data.frame(Predicted, Actual_Yes, Actual_No) %>%
datatable(caption = "Tabel 4. Kategori dalam confusion matrix")
#Indeks Evaluasi
conmat.logistic = logistic_cm$table %>%
as.data.frame() %>%
mutate(Prediction = recode(Prediction, "0" = "No", "1" = "Yes"),
Reference = recode(Reference, "0" = "No", "1" = "Yes")) %>%
mutate(Category = case_when(
Prediction == "No" & Reference == "No" ~ "True Negative (TN)",
Prediction == "Yes" & Reference == "No" ~ "False Positive (FP)",
Prediction == "No" & Reference == "Yes" ~ "False Negative (FN)",
Prediction == "Yes" & Reference == "Yes" ~ "True Positive (TP)"
))
conmat.logistic %>% datatable(caption = "Tabel 5. Confusion matrix logistic regression")
conmat.knn = knn_cm$table %>%
as.data.frame() %>%
mutate(Prediction = recode(Prediction, "0" = "No", "1" = "Yes"),
Reference = recode(Reference, "0" = "No", "1" = "Yes")) %>%
mutate(Category = case_when(
Prediction == "No" & Reference == "No" ~ "True Negative (TN)",
Prediction == "Yes" & Reference == "No" ~ "False Positive (FP)",
Prediction == "No" & Reference == "Yes" ~ "False Negative (FN)",
Prediction == "Yes" & Reference == "Yes" ~ "True Positive (TP)"
))
conmat.knn %>% datatable(caption = "Tabel 6. Confusion matrix kNN")
conmat.tree = tree_cm$table %>%
as.data.frame() %>%
mutate(Prediction = recode(Prediction, "0" = "No", "1" = "Yes"),
Reference = recode(Reference, "0" = "No", "1" = "Yes")) %>%
mutate(Category = case_when(
Prediction == "No" & Reference == "No" ~ "True Negative (TN)",
Prediction == "Yes" & Reference == "No" ~ "False Positive (FP)",
Prediction == "No" & Reference == "Yes" ~ "False Negative (FN)",
Prediction == "Yes" & Reference == "Yes" ~ "True Positive (TP)"
))
conmat.tree %>% datatable(caption = "Tabel 7. Confusion matrix decision tree")
# Buat data frame evaluasi model
evaluation_results <- data.frame(
Model = c("Logistic Regression", "KNN", "Decision Tree"),
Accuracy = c(
logistic_cm$overall["Accuracy"],
knn_cm$overall["Accuracy"],
tree_cm$overall["Accuracy"]
),
Precision = c(
logistic_cm$byClass["Precision"],
knn_cm$byClass["Precision"],
tree_cm$byClass["Precision"]
),
Recall = c(
logistic_cm$byClass["Recall"],
knn_cm$byClass["Recall"],
tree_cm$byClass["Recall"]
),
F1 = c(
logistic_cm$byClass["F1"],
knn_cm$byClass["F1"],
tree_cm$byClass["F1"]
)
)
evaluation_results %>% datatable(
caption = "Tabel 8. Perbandingan Accuracy, Precision, Recall, dan F1",
rownames = FALSE
)