library(tidyverse)
## Warning: package 'purrr' was built under R version 4.5.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(DT)
## Warning: package 'DT' was built under R version 4.5.1
# Import data dari working folder (e.g., tempat yang sama file RMarkdown ini disimpan)
data = read.csv("depression_data.csv")

# Pastikan DV sebagai faktor
data$depression = factor(data$depression)

# Lihat dataset
data %>% datatable(caption = "Tabel 1. Dataset") %>%
  formatRound(columns = 5:8, digits = 2)
# Set seed for reproducibility
set.seed(123)

# Split the data: 70% training and 30% validation
trainIndex <- createDataPartition(data$depression, p = 0.7, list = FALSE)

# Create training and validation datasets
trainData <- data[trainIndex, ]
validationData <- data[-trainIndex, ]

# Cek data training
trainData %>% 
  datatable(caption = "Tabel 2. Dataset training") %>%
  formatRound(columns = 5:8, digits = 2)
# Cek data validasi
validationData %>%
  datatable(caption = "Tabel 3. Dataset validasi") %>%
  formatRound(column = 5:8, digits = 2)
# 1. Logistic Regression
logistic_model <- train(depression ~ age + gender + socioeconomic_status + stress_level + sleep_quality + self_esteem + social_support,
                        data = trainData, method = "glm", family = "binomial")

# Now make predictions on the validation set
logistic_predictions <- predict(logistic_model, newdata = validationData)

# Evaluate each model's performance using a confusion matrix and accuracy
logistic_cm <- confusionMatrix(logistic_predictions, validationData$depression)

# Accuracy
logistic_cm$overall['Accuracy']
##  Accuracy 
## 0.7986577
# 2. k-Nearest Neighbors (kNN)
knn_model <- train(depression ~ age + gender + socioeconomic_status + stress_level + sleep_quality + self_esteem + social_support,
                   data = trainData, method = "knn", tuneLength = 5)

# Make predictions on the validation set
knn_predictions <- predict(knn_model, newdata = validationData)

# Evaluate each model's performance using a confusion matrix and accuracy
knn_cm <- confusionMatrix(knn_predictions, validationData$depression)

# Accuracy
knn_cm$overall['Accuracy']
##  Accuracy 
## 0.6845638
# 3. Decision Tree
tree_model <- train(depression ~ age + gender + socioeconomic_status + stress_level + sleep_quality + self_esteem + social_support,
                    data = trainData, method = "rpart")

# Make predictions on the validation set
tree_predictions <- predict(tree_model, newdata = validationData)

# Evaluate each model's performance using a confusion matrix and accuracy
tree_cm <- confusionMatrix(tree_predictions, validationData$depression)

# Accuracy
tree_cm$overall['Accuracy']
##  Accuracy 
## 0.8791946
Predicted = c("Yes", "No")
Actual_Yes = c("True Positives", "False Negatives")
Actual_No = c("False Positives", "True Negatives")

data.frame(Predicted, Actual_Yes, Actual_No) %>% 
  datatable(caption = "Tabel 4. Kategori dalam confusion matrix")
conmat.logistic = logistic_cm$table %>% 
  as.data.frame() %>%
  mutate(Prediction = recode(Prediction, "0" = "No", "1" = "Yes"),
         Reference = recode(Reference, "0" = "No", "1" = "Yes")) %>%
  mutate(Category = c("True Negative (TN)", "False Positive (FP)", "False Negative (FN)", "True Positive (TP)"))

conmat.logistic %>% datatable(caption = "Tabel 5. Confusion matrix logistic regression")
conmat.kNN = knn_cm$table %>% 
  as.data.frame() %>%
  mutate(Prediction = recode(Prediction, "0" = "No", "1" = "Yes"),
         Reference = recode(Reference, "0" = "No", "1" = "Yes")) %>%
  mutate(Category = c("True Negative (TN)", "False Positive (FP)", "False Negative (FN)", "True Positive (TP)"))

conmat.kNN %>% datatable(caption = "Tabel 6. Confusion matrix kNN")
conmat.tree_model = tree_cm$table %>% 
  as.data.frame() %>%
  mutate(Prediction = recode(Prediction, "0" = "No", "1" = "Yes"),
         Reference = recode(Reference, "0" = "No", "1" = "Yes")) %>%
  mutate(Category = c("True Negative (TN)", "False Positive (FP)", "False Negative (FN)", "True Positive (TP)"))

conmat.tree_model %>% datatable(caption = "Tabel 7. Confusion matrix Decision Tree")