library(tidyverse)
## Warning: package 'purrr' was built under R version 4.5.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(DT)
## Warning: package 'DT' was built under R version 4.5.1
# Import data dari working folder (e.g., tempat yang sama file RMarkdown ini disimpan)
data = read.csv("depression_data.csv")
# Pastikan DV sebagai faktor
data$depression = factor(data$depression)
# Lihat dataset
data %>% datatable(caption = "Tabel 1. Dataset") %>%
formatRound(columns = 5:8, digits = 2)
# Set seed for reproducibility
set.seed(123)
# Split the data: 70% training and 30% validation
trainIndex <- createDataPartition(data$depression, p = 0.7, list = FALSE)
# Create training and validation datasets
trainData <- data[trainIndex, ]
validationData <- data[-trainIndex, ]
# Cek data training
trainData %>%
datatable(caption = "Tabel 2. Dataset training") %>%
formatRound(columns = 5:8, digits = 2)
# Cek data validasi
validationData %>%
datatable(caption = "Tabel 3. Dataset validasi") %>%
formatRound(column = 5:8, digits = 2)
# 1. Logistic Regression
logistic_model <- train(depression ~ age + gender + socioeconomic_status + stress_level + sleep_quality + self_esteem + social_support,
data = trainData, method = "glm", family = "binomial")
# Now make predictions on the validation set
logistic_predictions <- predict(logistic_model, newdata = validationData)
# Evaluate each model's performance using a confusion matrix and accuracy
logistic_cm <- confusionMatrix(logistic_predictions, validationData$depression)
# Accuracy
logistic_cm$overall['Accuracy']
## Accuracy
## 0.7986577
# 2. k-Nearest Neighbors (kNN)
knn_model <- train(depression ~ age + gender + socioeconomic_status + stress_level + sleep_quality + self_esteem + social_support,
data = trainData, method = "knn", tuneLength = 5)
# Make predictions on the validation set
knn_predictions <- predict(knn_model, newdata = validationData)
# Evaluate each model's performance using a confusion matrix and accuracy
knn_cm <- confusionMatrix(knn_predictions, validationData$depression)
# Accuracy
knn_cm$overall['Accuracy']
## Accuracy
## 0.6845638
# 3. Decision Tree
tree_model <- train(depression ~ age + gender + socioeconomic_status + stress_level + sleep_quality + self_esteem + social_support,
data = trainData, method = "rpart")
# Make predictions on the validation set
tree_predictions <- predict(tree_model, newdata = validationData)
# Evaluate each model's performance using a confusion matrix and accuracy
tree_cm <- confusionMatrix(tree_predictions, validationData$depression)
# Accuracy
tree_cm$overall['Accuracy']
## Accuracy
## 0.8791946
Predicted = c("Yes", "No")
Actual_Yes = c("True Positives", "False Negatives")
Actual_No = c("False Positives", "True Negatives")
data.frame(Predicted, Actual_Yes, Actual_No) %>%
datatable(caption = "Tabel 4. Kategori dalam confusion matrix")
conmat.logistic = logistic_cm$table %>%
as.data.frame() %>%
mutate(Prediction = recode(Prediction, "0" = "No", "1" = "Yes"),
Reference = recode(Reference, "0" = "No", "1" = "Yes")) %>%
mutate(Category = c("True Negative (TN)", "False Positive (FP)", "False Negative (FN)", "True Positive (TP)"))
conmat.logistic %>% datatable(caption = "Tabel 5. Confusion matrix logistic regression")
conmat.kNN = knn_cm$table %>%
as.data.frame() %>%
mutate(Prediction = recode(Prediction, "0" = "No", "1" = "Yes"),
Reference = recode(Reference, "0" = "No", "1" = "Yes")) %>%
mutate(Category = c("True Negative (TN)", "False Positive (FP)", "False Negative (FN)", "True Positive (TP)"))
conmat.kNN %>% datatable(caption = "Tabel 6. Confusion matrix kNN")
conmat.tree_model = tree_cm$table %>%
as.data.frame() %>%
mutate(Prediction = recode(Prediction, "0" = "No", "1" = "Yes"),
Reference = recode(Reference, "0" = "No", "1" = "Yes")) %>%
mutate(Category = c("True Negative (TN)", "False Positive (FP)", "False Negative (FN)", "True Positive (TP)"))
conmat.tree_model %>% datatable(caption = "Tabel 7. Confusion matrix Decision Tree")