# =========================================================
# ANALISIS STRESS LEVEL DENGAN REGRESI LOGISTIK MULTIKELAS
# =========================================================
# 1. PERSIAPAN DAN IMPORT DATA
library(nnet) # multinom model
file_csv <- "Student Stress Factors.csv"
df <- read.csv(file_csv, stringsAsFactors = FALSE)
names(df) <- make.names(names(df)) # Buat nama kolom valid
target_col <- "How.would.you.rate.your.stress.levels."
df[[target_col]] <- factor(df[[target_col]])
cat("Data berhasil di-load dengan dimensi:", dim(df), "\n")
## Data berhasil di-load dengan dimensi: 53 7
df
## Timestamp Kindly.Rate.your.Sleep.Quality.
## 1 27/10/2023 21:54:15 3
## 2 28/10/2023 12:24:40 4
## 3 28/10/2023 12:24:51 2
## 4 28/10/2023 12:26:11 3
## 5 28/10/2023 12:26:45 2
## 6 28/10/2023 12:31:02 3
## 7 28/10/2023 12:34:45 3
## 8 28/10/2023 12:35:43 4
## 9 28/10/2023 12:36:07 2
## 10 28/10/2023 12:36:20 1
## 11 28/10/2023 12:37:22 2
## 12 28/10/2023 12:38:40 3
## 13 28/10/2023 12:39:43 2
## 14 28/10/2023 12:40:50 4
## 15 28/10/2023 12:41:12 4
## 16 28/10/2023 12:41:56 2
## 17 28/10/2023 12:48:22 3
## 18 28/10/2023 13:26:41 1
## 19 28/10/2023 13:28:47 2
## 20 28/10/2023 13:29:54 2
## 21 28/10/2023 13:30:57 3
## 22 28/10/2023 13:32:41 3
## 23 28/10/2023 13:33:15 2
## 24 28/10/2023 13:33:59 3
## 25 28/10/2023 13:41:28 5
## 26 28/10/2023 13:42:52 3
## 27 28/10/2023 13:44:07 5
## 28 28/10/2023 13:47:44 5
## 29 28/10/2023 13:52:22 2
## 30 28/10/2023 13:54:48 3
## 31 28/10/2023 13:57:20 5
## 32 28/10/2023 14:09:05 5
## 33 28/10/2023 14:27:19 5
## 34 28/10/2023 14:40:55 2
## 35 28/10/2023 14:58:39 4
## 36 28/10/2023 15:10:02 5
## 37 28/10/2023 15:10:43 4
## 38 28/10/2023 15:39:59 3
## 39 28/10/2023 16:16:13 3
## 40 28/10/2023 16:34:16 3
## 41 28/10/2023 16:55:49 4
## 42 28/10/2023 17:24:10 4
## 43 28/10/2023 17:26:18 4
## 44 28/10/2023 17:29:55 2
## 45 28/10/2023 17:31:52 5
## 46 28/10/2023 17:56:58 4
## 47 28/10/2023 22:33:48 2
## 48 28/10/2023 23:06:00 4
## 49 28/10/2023 23:45:09 3
## 50 29/10/2023 00:32:26 1
## 51 29/10/2023 11:40:20 5
## 52 31/10/2023 10:10:03 1
## 53 31/10/2023 20:49:11 3
## How.many.times.a.week.do.you.suffer.headaches.
## 1 1
## 2 1
## 3 1
## 4 2
## 5 3
## 6 1
## 7 5
## 8 3
## 9 1
## 10 2
## 11 3
## 12 1
## 13 3
## 14 1
## 15 1
## 16 3
## 17 1
## 18 1
## 19 1
## 20 3
## 21 4
## 22 3
## 23 1
## 24 1
## 25 1
## 26 1
## 27 1
## 28 5
## 29 3
## 30 4
## 31 1
## 32 2
## 33 5
## 34 3
## 35 1
## 36 1
## 37 1
## 38 2
## 39 4
## 40 1
## 41 4
## 42 1
## 43 2
## 44 1
## 45 1
## 46 1
## 47 1
## 48 3
## 49 3
## 50 1
## 51 1
## 52 1
## 53 2
## How.would.you.rate.you.academic.performance.
## 1 3
## 2 2
## 3 2
## 4 3
## 5 1
## 6 3
## 7 1
## 8 1
## 9 4
## 10 3
## 11 5
## 12 5
## 13 3
## 14 4
## 15 3
## 16 3
## 17 3
## 18 1
## 19 3
## 20 3
## 21 2
## 22 4
## 23 2
## 24 4
## 25 4
## 26 3
## 27 5
## 28 5
## 29 3
## 30 2
## 31 3
## 32 3
## 33 4
## 34 4
## 35 5
## 36 4
## 37 1
## 38 3
## 39 4
## 40 4
## 41 3
## 42 4
## 43 3
## 44 4
## 45 4
## 46 3
## 47 4
## 48 3
## 49 4
## 50 1
## 51 5
## 52 5
## 53 3
## how.would.you.rate.your.study.load.
## 1 4
## 2 3
## 3 1
## 4 2
## 5 5
## 6 2
## 7 4
## 8 4
## 9 4
## 10 2
## 11 5
## 12 1
## 13 4
## 14 4
## 15 2
## 16 5
## 17 3
## 18 1
## 19 1
## 20 2
## 21 2
## 22 4
## 23 3
## 24 4
## 25 2
## 26 2
## 27 1
## 28 1
## 29 1
## 30 1
## 31 5
## 32 2
## 33 5
## 34 1
## 35 4
## 36 3
## 37 2
## 38 1
## 39 5
## 40 4
## 41 2
## 42 5
## 43 3
## 44 4
## 45 5
## 46 2
## 47 4
## 48 2
## 49 4
## 50 2
## 51 1
## 52 1
## 53 2
## How.many.times.a.week.you.practice.extracurricular.activities.
## 1 2
## 2 3
## 3 4
## 4 3
## 5 5
## 6 1
## 7 3
## 8 1
## 9 5
## 10 5
## 11 2
## 12 4
## 13 4
## 14 1
## 15 5
## 16 3
## 17 3
## 18 1
## 19 2
## 20 3
## 21 1
## 22 3
## 23 2
## 24 2
## 25 1
## 26 2
## 27 5
## 28 1
## 29 1
## 30 3
## 31 5
## 32 3
## 33 2
## 34 2
## 35 5
## 36 2
## 37 4
## 38 1
## 39 3
## 40 4
## 41 2
## 42 1
## 43 5
## 44 1
## 45 4
## 46 5
## 47 1
## 48 2
## 49 4
## 50 3
## 51 3
## 52 5
## 53 5
## How.would.you.rate.your.stress.levels.
## 1 3
## 2 2
## 3 4
## 4 3
## 5 3
## 6 1
## 7 5
## 8 1
## 9 1
## 10 2
## 11 4
## 12 1
## 13 3
## 14 1
## 15 2
## 16 4
## 17 4
## 18 2
## 19 3
## 20 4
## 21 2
## 22 3
## 23 2
## 24 3
## 25 1
## 26 1
## 27 5
## 28 1
## 29 2
## 30 1
## 31 5
## 32 4
## 33 5
## 34 2
## 35 2
## 36 5
## 37 5
## 38 3
## 39 1
## 40 3
## 41 2
## 42 5
## 43 4
## 44 3
## 45 5
## 46 4
## 47 4
## 48 2
## 49 3
## 50 1
## 51 3
## 52 1
## 53 2
# 2. PRA-PROSES DATA
df <- na.omit(df)
cat("Jumlah data setelah dihapus NA:", nrow(df), "\n")
## Jumlah data setelah dihapus NA: 53
# Jika ada kolom Timestamp, ubah ke POSIXct dan ekstrak fitur baru
if ("Timestamp" %in% names(df)) {
df$Timestamp <- as.POSIXct(df$Timestamp, format="%d/%m/%Y %H:%M:%S") # sesuaikan format jika berbeda
df$Hour <- as.numeric(format(df$Timestamp, "%H"))
df$DayOfWeek <- factor(weekdays(df$Timestamp))
df$Timestamp <- NULL # hilangkan kolom asli Timestamp supaya gak error faktor baru
cat("Kolom Timestamp ditemukan dan diolah menjadi Hour dan DayOfWeek\n")
}
## Kolom Timestamp ditemukan dan diolah menjadi Hour dan DayOfWeek
predictor_cols <- setdiff(names(df), target_col)
# Scaling hanya untuk kolom numerik
num_cols <- sapply(df[predictor_cols], is.numeric)
df[predictor_cols][num_cols] <- scale(df[predictor_cols][num_cols])
cat("Summary data numerik setelah scaling:\n")
## Summary data numerik setelah scaling:
print(summary(df[, predictor_cols[num_cols]]))
## Kindly.Rate.your.Sleep.Quality. How.many.times.a.week.do.you.suffer.headaches.
## Min. :-1.7935 Min. :-0.7767
## 1st Qu.:-0.9597 1st Qu.:-0.7767
## Median :-0.1259 Median :-0.7767
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.7080 3rd Qu.: 0.8065
## Max. : 1.5418 Max. : 2.3898
## How.would.you.rate.you.academic.performance.
## Min. :-1.9288
## 1st Qu.:-0.1962
## Median :-0.1962
## Mean : 0.0000
## 3rd Qu.: 0.6702
## Max. : 1.5365
## how.would.you.rate.your.study.load.
## Min. :-1.2680
## 1st Qu.:-0.5679
## Median :-0.5679
## Mean : 0.0000
## 3rd Qu.: 0.8321
## Max. : 1.5321
## How.many.times.a.week.you.practice.extracurricular.activities.
## Min. :-1.30129
## 1st Qu.:-0.61161
## Median : 0.07808
## Mean : 0.00000
## 3rd Qu.: 0.76776
## Max. : 1.45745
## Hour
## Min. :-3.8801
## 1st Qu.:-0.5363
## Median :-0.2576
## Mean : 0.0000
## 3rd Qu.: 0.2997
## Max. : 2.5289
# Ubah kolom non-numerik (kecuali target) menjadi faktor
factor_cols <- sapply(df[predictor_cols], function(x) is.character(x) || is.factor(x))
factor_names <- names(factor_cols)[factor_cols]
for (col in factor_names) {
df[[col]] <- factor(df[[col]])
cat("Kolom", col, "diubah menjadi faktor dengan level:", levels(df[[col]]), "\n")
}
## Kolom DayOfWeek diubah menjadi faktor dengan level: Friday Saturday Sunday Tuesday
# Pastikan target juga faktor
df[[target_col]] <- factor(df[[target_col]])
cat("Target variable levels:", levels(df[[target_col]]), "\n")
## Target variable levels: 1 2 3 4 5
# 3. SPLIT DATA (TRAIN 60% - TEST 40%)
set.seed(123)
train_idx <- sample(seq_len(nrow(df)), 0.6 * nrow(df))
train <- df[train_idx, ]
test <- df[-train_idx, ]
# Sinkronisasi faktor level di test dengan train untuk menghindari error faktor baru
factor_cols_all <- sapply(train, is.factor)
factor_names_all <- names(factor_cols_all)[factor_cols_all]
for (col in factor_names_all) {
test[[col]] <- factor(test[[col]], levels=levels(train[[col]]))
}
cat("Ukuran data train :", nrow(train), "\n")
## Ukuran data train : 31
cat("Ukuran data test :", nrow(test), "\n")
## Ukuran data test : 22
# 4. LATIH MODEL MULTINOMIAL LOGISTIC REGRESSION
logit_mod <- multinom(as.formula(paste(target_col, "~ .")), data = train, trace = FALSE)
cat("Model berhasil dilatih:\n")
## Model berhasil dilatih:
logit_mod
## Call:
## multinom(formula = as.formula(paste(target_col, "~ .")), data = train,
## trace = FALSE)
##
## Coefficients:
## (Intercept) Kindly.Rate.your.Sleep.Quality.
## 2 0.2516130 -2.029659
## 3 3.9339726 -2.231933
## 4 0.4086354 -2.353742
## 5 -1.4638098 2.621609
## How.many.times.a.week.do.you.suffer.headaches.
## 2 0.8239384
## 3 -0.3192163
## 4 -0.3316504
## 5 0.5279213
## How.would.you.rate.you.academic.performance.
## 2 0.6843111
## 3 -0.6253891
## 4 0.2433560
## 5 -0.9193150
## how.would.you.rate.your.study.load.
## 2 -0.66354965
## 3 -0.01320639
## 4 0.23199355
## 5 1.23614728
## How.many.times.a.week.you.practice.extracurricular.activities. Hour
## 2 1.8051529 5.888951
## 3 0.6952313 4.963428
## 4 1.3664681 5.905651
## 5 1.4748917 4.875895
## DayOfWeekSaturday DayOfWeekSunday DayOfWeekTuesday
## 2 1.1802862 -0.9286732 0
## 3 -3.0670679 7.0010405 0
## 4 0.8786019 -0.4699665 0
## 5 0.6161171 -2.0799269 0
##
## Residual Deviance: 55.93425
## AIC: 119.9342
# 5. PREDIKSI DAN CONFUSION MATRIX
pred <- predict(logit_mod, test)
cm <- table(Actual = test[[target_col]], Predicted = pred)
cat("Confusion Matrix:\n")
## Confusion Matrix:
print(cm)
## Predicted
## Actual 1 2 3 4 5
## 1 1 2 1 0 0
## 2 1 2 3 0 0
## 3 1 3 1 2 0
## 4 0 2 0 1 1
## 5 0 0 0 0 1
# 6. METRIK EVALUASI
acc <- sum(diag(cm)) / sum(cm)
prec <- diag(cm) / colSums(cm)
rec <- diag(cm) / rowSums(cm)
f1 <- 2 * prec * rec / (prec + rec)
macro_f1 <- mean(f1, na.rm = TRUE)
p0 <- acc
pe <- sum(rowSums(cm) * colSums(cm)) / (sum(cm)^2)
kappa <- (p0 - pe) / (1 - pe)
nice_num <- function(x, digits = 4) formatC(x, format = "f", digits = digits)
# 7. OUTPUT METRIK
cat("\n================ CONFUSION MATRIX ================\n")
##
## ================ CONFUSION MATRIX ================
print(cm)
## Predicted
## Actual 1 2 3 4 5
## 1 1 2 1 0 0
## 2 1 2 3 0 0
## 3 1 3 1 2 0
## 4 0 2 0 1 1
## 5 0 0 0 0 1
metrics_tbl <- data.frame(
Kelas = rownames(cm),
Presisi = nice_num(prec),
Recall = nice_num(rec),
F1 = nice_num(f1)
)
cat("\n================ METRIK PER KELAS ================\n")
##
## ================ METRIK PER KELAS ================
print(metrics_tbl, row.names = FALSE)
## Kelas Presisi Recall F1
## 1 0.3333 0.2500 0.2857
## 2 0.2222 0.3333 0.2667
## 3 0.2000 0.1429 0.1667
## 4 0.3333 0.2500 0.2857
## 5 0.5000 1.0000 0.6667
cat("\n================ RINGKASAN EVALUASI ==============\n")
##
## ================ RINGKASAN EVALUASI ==============
cat("Akurasi :", nice_num(acc), "\n")
## Akurasi : 0.2727
cat("Macro-F1 :", nice_num(macro_f1), "\n")
## Macro-F1 : 0.3343
cat("Cohen’s Kappa :", nice_num(kappa), "\n")
## Cohen’s Kappa : 0.0461
# 8. CROSS-VALIDATION 5-FOLD
k <- 5
folds <- sample(rep(1:k, length.out = nrow(df)))
fold_acc <- numeric(k)
for (i in seq_len(k)) {
train_cv <- df[folds != i, ]
test_cv <- df[folds == i, ]
# Sinkronisasi faktor level di test_cv dengan train_cv
factor_cols_cv <- sapply(train_cv, is.factor)
factor_names_cv <- names(factor_cols_cv)[factor_cols_cv]
for (col in factor_names_cv) {
test_cv[[col]] <- factor(test_cv[[col]], levels=levels(train_cv[[col]]))
}
mod_cv <- multinom(as.formula(paste(target_col, "~ .")), data = train_cv, trace = FALSE)
pred_cv <- predict(mod_cv, test_cv)
fold_acc[i] <- mean(pred_cv == test_cv[[target_col]])
cat(sprintf("Fold %d accuracy: %.4f\n", i, fold_acc[i]))
}
## Fold 1 accuracy: 0.2727
## Fold 2 accuracy: 0.1818
## Fold 3 accuracy: 0.1818
## Fold 4 accuracy: 0.1000
## Fold 5 accuracy: 0.2000
cat("\n========= 5-FOLD CROSS-VALIDATION ACCURACY =======\n")
##
## ========= 5-FOLD CROSS-VALIDATION ACCURACY =======
cat("Akurasi rata-rata 5-Fold CV :", nice_num(mean(fold_acc)), " ± ", nice_num(sd(fold_acc)), "\n")
## Akurasi rata-rata 5-Fold CV : 0.1873 ± 0.0615