# =========================================================
# ANALISIS STRESS LEVEL DENGAN REGRESI LOGISTIK MULTIKELAS
# =========================================================

# 1. PERSIAPAN DAN IMPORT DATA
library(nnet)  # multinom model

file_csv <- "Student Stress Factors.csv"
df <- read.csv(file_csv, stringsAsFactors = FALSE)

names(df) <- make.names(names(df))  # Buat nama kolom valid
target_col <- "How.would.you.rate.your.stress.levels."
df[[target_col]] <- factor(df[[target_col]])
cat("Data berhasil di-load dengan dimensi:", dim(df), "\n")
## Data berhasil di-load dengan dimensi: 53 7
df
##              Timestamp Kindly.Rate.your.Sleep.Quality.
## 1  27/10/2023 21:54:15                               3
## 2  28/10/2023 12:24:40                               4
## 3  28/10/2023 12:24:51                               2
## 4  28/10/2023 12:26:11                               3
## 5  28/10/2023 12:26:45                               2
## 6  28/10/2023 12:31:02                               3
## 7  28/10/2023 12:34:45                               3
## 8  28/10/2023 12:35:43                               4
## 9  28/10/2023 12:36:07                               2
## 10 28/10/2023 12:36:20                               1
## 11 28/10/2023 12:37:22                               2
## 12 28/10/2023 12:38:40                               3
## 13 28/10/2023 12:39:43                               2
## 14 28/10/2023 12:40:50                               4
## 15 28/10/2023 12:41:12                               4
## 16 28/10/2023 12:41:56                               2
## 17 28/10/2023 12:48:22                               3
## 18 28/10/2023 13:26:41                               1
## 19 28/10/2023 13:28:47                               2
## 20 28/10/2023 13:29:54                               2
## 21 28/10/2023 13:30:57                               3
## 22 28/10/2023 13:32:41                               3
## 23 28/10/2023 13:33:15                               2
## 24 28/10/2023 13:33:59                               3
## 25 28/10/2023 13:41:28                               5
## 26 28/10/2023 13:42:52                               3
## 27 28/10/2023 13:44:07                               5
## 28 28/10/2023 13:47:44                               5
## 29 28/10/2023 13:52:22                               2
## 30 28/10/2023 13:54:48                               3
## 31 28/10/2023 13:57:20                               5
## 32 28/10/2023 14:09:05                               5
## 33 28/10/2023 14:27:19                               5
## 34 28/10/2023 14:40:55                               2
## 35 28/10/2023 14:58:39                               4
## 36 28/10/2023 15:10:02                               5
## 37 28/10/2023 15:10:43                               4
## 38 28/10/2023 15:39:59                               3
## 39 28/10/2023 16:16:13                               3
## 40 28/10/2023 16:34:16                               3
## 41 28/10/2023 16:55:49                               4
## 42 28/10/2023 17:24:10                               4
## 43 28/10/2023 17:26:18                               4
## 44 28/10/2023 17:29:55                               2
## 45 28/10/2023 17:31:52                               5
## 46 28/10/2023 17:56:58                               4
## 47 28/10/2023 22:33:48                               2
## 48 28/10/2023 23:06:00                               4
## 49 28/10/2023 23:45:09                               3
## 50 29/10/2023 00:32:26                               1
## 51 29/10/2023 11:40:20                               5
## 52 31/10/2023 10:10:03                               1
## 53 31/10/2023 20:49:11                               3
##    How.many.times.a.week.do.you.suffer.headaches.
## 1                                               1
## 2                                               1
## 3                                               1
## 4                                               2
## 5                                               3
## 6                                               1
## 7                                               5
## 8                                               3
## 9                                               1
## 10                                              2
## 11                                              3
## 12                                              1
## 13                                              3
## 14                                              1
## 15                                              1
## 16                                              3
## 17                                              1
## 18                                              1
## 19                                              1
## 20                                              3
## 21                                              4
## 22                                              3
## 23                                              1
## 24                                              1
## 25                                              1
## 26                                              1
## 27                                              1
## 28                                              5
## 29                                              3
## 30                                              4
## 31                                              1
## 32                                              2
## 33                                              5
## 34                                              3
## 35                                              1
## 36                                              1
## 37                                              1
## 38                                              2
## 39                                              4
## 40                                              1
## 41                                              4
## 42                                              1
## 43                                              2
## 44                                              1
## 45                                              1
## 46                                              1
## 47                                              1
## 48                                              3
## 49                                              3
## 50                                              1
## 51                                              1
## 52                                              1
## 53                                              2
##    How.would.you.rate.you.academic.performance.
## 1                                             3
## 2                                             2
## 3                                             2
## 4                                             3
## 5                                             1
## 6                                             3
## 7                                             1
## 8                                             1
## 9                                             4
## 10                                            3
## 11                                            5
## 12                                            5
## 13                                            3
## 14                                            4
## 15                                            3
## 16                                            3
## 17                                            3
## 18                                            1
## 19                                            3
## 20                                            3
## 21                                            2
## 22                                            4
## 23                                            2
## 24                                            4
## 25                                            4
## 26                                            3
## 27                                            5
## 28                                            5
## 29                                            3
## 30                                            2
## 31                                            3
## 32                                            3
## 33                                            4
## 34                                            4
## 35                                            5
## 36                                            4
## 37                                            1
## 38                                            3
## 39                                            4
## 40                                            4
## 41                                            3
## 42                                            4
## 43                                            3
## 44                                            4
## 45                                            4
## 46                                            3
## 47                                            4
## 48                                            3
## 49                                            4
## 50                                            1
## 51                                            5
## 52                                            5
## 53                                            3
##    how.would.you.rate.your.study.load.
## 1                                    4
## 2                                    3
## 3                                    1
## 4                                    2
## 5                                    5
## 6                                    2
## 7                                    4
## 8                                    4
## 9                                    4
## 10                                   2
## 11                                   5
## 12                                   1
## 13                                   4
## 14                                   4
## 15                                   2
## 16                                   5
## 17                                   3
## 18                                   1
## 19                                   1
## 20                                   2
## 21                                   2
## 22                                   4
## 23                                   3
## 24                                   4
## 25                                   2
## 26                                   2
## 27                                   1
## 28                                   1
## 29                                   1
## 30                                   1
## 31                                   5
## 32                                   2
## 33                                   5
## 34                                   1
## 35                                   4
## 36                                   3
## 37                                   2
## 38                                   1
## 39                                   5
## 40                                   4
## 41                                   2
## 42                                   5
## 43                                   3
## 44                                   4
## 45                                   5
## 46                                   2
## 47                                   4
## 48                                   2
## 49                                   4
## 50                                   2
## 51                                   1
## 52                                   1
## 53                                   2
##    How.many.times.a.week.you.practice.extracurricular.activities.
## 1                                                               2
## 2                                                               3
## 3                                                               4
## 4                                                               3
## 5                                                               5
## 6                                                               1
## 7                                                               3
## 8                                                               1
## 9                                                               5
## 10                                                              5
## 11                                                              2
## 12                                                              4
## 13                                                              4
## 14                                                              1
## 15                                                              5
## 16                                                              3
## 17                                                              3
## 18                                                              1
## 19                                                              2
## 20                                                              3
## 21                                                              1
## 22                                                              3
## 23                                                              2
## 24                                                              2
## 25                                                              1
## 26                                                              2
## 27                                                              5
## 28                                                              1
## 29                                                              1
## 30                                                              3
## 31                                                              5
## 32                                                              3
## 33                                                              2
## 34                                                              2
## 35                                                              5
## 36                                                              2
## 37                                                              4
## 38                                                              1
## 39                                                              3
## 40                                                              4
## 41                                                              2
## 42                                                              1
## 43                                                              5
## 44                                                              1
## 45                                                              4
## 46                                                              5
## 47                                                              1
## 48                                                              2
## 49                                                              4
## 50                                                              3
## 51                                                              3
## 52                                                              5
## 53                                                              5
##    How.would.you.rate.your.stress.levels.
## 1                                       3
## 2                                       2
## 3                                       4
## 4                                       3
## 5                                       3
## 6                                       1
## 7                                       5
## 8                                       1
## 9                                       1
## 10                                      2
## 11                                      4
## 12                                      1
## 13                                      3
## 14                                      1
## 15                                      2
## 16                                      4
## 17                                      4
## 18                                      2
## 19                                      3
## 20                                      4
## 21                                      2
## 22                                      3
## 23                                      2
## 24                                      3
## 25                                      1
## 26                                      1
## 27                                      5
## 28                                      1
## 29                                      2
## 30                                      1
## 31                                      5
## 32                                      4
## 33                                      5
## 34                                      2
## 35                                      2
## 36                                      5
## 37                                      5
## 38                                      3
## 39                                      1
## 40                                      3
## 41                                      2
## 42                                      5
## 43                                      4
## 44                                      3
## 45                                      5
## 46                                      4
## 47                                      4
## 48                                      2
## 49                                      3
## 50                                      1
## 51                                      3
## 52                                      1
## 53                                      2
# 2. PRA-PROSES DATA
df <- na.omit(df)
cat("Jumlah data setelah dihapus NA:", nrow(df), "\n")
## Jumlah data setelah dihapus NA: 53
# Jika ada kolom Timestamp, ubah ke POSIXct dan ekstrak fitur baru
if ("Timestamp" %in% names(df)) {
  df$Timestamp <- as.POSIXct(df$Timestamp, format="%d/%m/%Y %H:%M:%S")  # sesuaikan format jika berbeda
  df$Hour <- as.numeric(format(df$Timestamp, "%H"))
  df$DayOfWeek <- factor(weekdays(df$Timestamp))
  df$Timestamp <- NULL  # hilangkan kolom asli Timestamp supaya gak error faktor baru
  cat("Kolom Timestamp ditemukan dan diolah menjadi Hour dan DayOfWeek\n")
}
## Kolom Timestamp ditemukan dan diolah menjadi Hour dan DayOfWeek
predictor_cols <- setdiff(names(df), target_col)

# Scaling hanya untuk kolom numerik
num_cols <- sapply(df[predictor_cols], is.numeric)
df[predictor_cols][num_cols] <- scale(df[predictor_cols][num_cols])
cat("Summary data numerik setelah scaling:\n")
## Summary data numerik setelah scaling:
print(summary(df[, predictor_cols[num_cols]]))
##  Kindly.Rate.your.Sleep.Quality. How.many.times.a.week.do.you.suffer.headaches.
##  Min.   :-1.7935                 Min.   :-0.7767                               
##  1st Qu.:-0.9597                 1st Qu.:-0.7767                               
##  Median :-0.1259                 Median :-0.7767                               
##  Mean   : 0.0000                 Mean   : 0.0000                               
##  3rd Qu.: 0.7080                 3rd Qu.: 0.8065                               
##  Max.   : 1.5418                 Max.   : 2.3898                               
##  How.would.you.rate.you.academic.performance.
##  Min.   :-1.9288                             
##  1st Qu.:-0.1962                             
##  Median :-0.1962                             
##  Mean   : 0.0000                             
##  3rd Qu.: 0.6702                             
##  Max.   : 1.5365                             
##  how.would.you.rate.your.study.load.
##  Min.   :-1.2680                    
##  1st Qu.:-0.5679                    
##  Median :-0.5679                    
##  Mean   : 0.0000                    
##  3rd Qu.: 0.8321                    
##  Max.   : 1.5321                    
##  How.many.times.a.week.you.practice.extracurricular.activities.
##  Min.   :-1.30129                                              
##  1st Qu.:-0.61161                                              
##  Median : 0.07808                                              
##  Mean   : 0.00000                                              
##  3rd Qu.: 0.76776                                              
##  Max.   : 1.45745                                              
##       Hour        
##  Min.   :-3.8801  
##  1st Qu.:-0.5363  
##  Median :-0.2576  
##  Mean   : 0.0000  
##  3rd Qu.: 0.2997  
##  Max.   : 2.5289
# Ubah kolom non-numerik (kecuali target) menjadi faktor
factor_cols <- sapply(df[predictor_cols], function(x) is.character(x) || is.factor(x))
factor_names <- names(factor_cols)[factor_cols]
for (col in factor_names) {
  df[[col]] <- factor(df[[col]])
  cat("Kolom", col, "diubah menjadi faktor dengan level:", levels(df[[col]]), "\n")
}
## Kolom DayOfWeek diubah menjadi faktor dengan level: Friday Saturday Sunday Tuesday
# Pastikan target juga faktor
df[[target_col]] <- factor(df[[target_col]])
cat("Target variable levels:", levels(df[[target_col]]), "\n")
## Target variable levels: 1 2 3 4 5
# 3. SPLIT DATA (TRAIN 60% - TEST 40%)
set.seed(123)
train_idx <- sample(seq_len(nrow(df)), 0.6 * nrow(df))
train <- df[train_idx, ]
test  <- df[-train_idx, ]

# Sinkronisasi faktor level di test dengan train untuk menghindari error faktor baru
factor_cols_all <- sapply(train, is.factor)
factor_names_all <- names(factor_cols_all)[factor_cols_all]

for (col in factor_names_all) {
  test[[col]] <- factor(test[[col]], levels=levels(train[[col]]))
}

cat("Ukuran data train :", nrow(train), "\n")
## Ukuran data train : 31
cat("Ukuran data test  :", nrow(test), "\n")
## Ukuran data test  : 22
# 4. LATIH MODEL MULTINOMIAL LOGISTIC REGRESSION
logit_mod <- multinom(as.formula(paste(target_col, "~ .")), data = train, trace = FALSE)
cat("Model berhasil dilatih:\n")
## Model berhasil dilatih:
logit_mod
## Call:
## multinom(formula = as.formula(paste(target_col, "~ .")), data = train, 
##     trace = FALSE)
## 
## Coefficients:
##   (Intercept) Kindly.Rate.your.Sleep.Quality.
## 2   0.2516130                       -2.029659
## 3   3.9339726                       -2.231933
## 4   0.4086354                       -2.353742
## 5  -1.4638098                        2.621609
##   How.many.times.a.week.do.you.suffer.headaches.
## 2                                      0.8239384
## 3                                     -0.3192163
## 4                                     -0.3316504
## 5                                      0.5279213
##   How.would.you.rate.you.academic.performance.
## 2                                    0.6843111
## 3                                   -0.6253891
## 4                                    0.2433560
## 5                                   -0.9193150
##   how.would.you.rate.your.study.load.
## 2                         -0.66354965
## 3                         -0.01320639
## 4                          0.23199355
## 5                          1.23614728
##   How.many.times.a.week.you.practice.extracurricular.activities.     Hour
## 2                                                      1.8051529 5.888951
## 3                                                      0.6952313 4.963428
## 4                                                      1.3664681 5.905651
## 5                                                      1.4748917 4.875895
##   DayOfWeekSaturday DayOfWeekSunday DayOfWeekTuesday
## 2         1.1802862      -0.9286732                0
## 3        -3.0670679       7.0010405                0
## 4         0.8786019      -0.4699665                0
## 5         0.6161171      -2.0799269                0
## 
## Residual Deviance: 55.93425 
## AIC: 119.9342
# 5. PREDIKSI DAN CONFUSION MATRIX
pred <- predict(logit_mod, test)
cm <- table(Actual = test[[target_col]], Predicted = pred)
cat("Confusion Matrix:\n")
## Confusion Matrix:
print(cm)
##       Predicted
## Actual 1 2 3 4 5
##      1 1 2 1 0 0
##      2 1 2 3 0 0
##      3 1 3 1 2 0
##      4 0 2 0 1 1
##      5 0 0 0 0 1
# 6. METRIK EVALUASI
acc  <- sum(diag(cm)) / sum(cm)
prec <- diag(cm) / colSums(cm)
rec  <- diag(cm) / rowSums(cm)
f1   <- 2 * prec * rec / (prec + rec)
macro_f1 <- mean(f1, na.rm = TRUE)
p0 <- acc
pe <- sum(rowSums(cm) * colSums(cm)) / (sum(cm)^2)
kappa <- (p0 - pe) / (1 - pe)

nice_num <- function(x, digits = 4) formatC(x, format = "f", digits = digits)
# 7. OUTPUT METRIK
cat("\n================ CONFUSION MATRIX ================\n")
## 
## ================ CONFUSION MATRIX ================
print(cm)
##       Predicted
## Actual 1 2 3 4 5
##      1 1 2 1 0 0
##      2 1 2 3 0 0
##      3 1 3 1 2 0
##      4 0 2 0 1 1
##      5 0 0 0 0 1
metrics_tbl <- data.frame(
  Kelas   = rownames(cm),
  Presisi = nice_num(prec),
  Recall  = nice_num(rec),
  F1      = nice_num(f1)
)

cat("\n================ METRIK PER KELAS ================\n")
## 
## ================ METRIK PER KELAS ================
print(metrics_tbl, row.names = FALSE)
##  Kelas Presisi Recall     F1
##      1  0.3333 0.2500 0.2857
##      2  0.2222 0.3333 0.2667
##      3  0.2000 0.1429 0.1667
##      4  0.3333 0.2500 0.2857
##      5  0.5000 1.0000 0.6667
cat("\n================ RINGKASAN EVALUASI ==============\n")
## 
## ================ RINGKASAN EVALUASI ==============
cat("Akurasi         :", nice_num(acc), "\n")
## Akurasi         : 0.2727
cat("Macro-F1        :", nice_num(macro_f1), "\n")
## Macro-F1        : 0.3343
cat("Cohen’s Kappa   :", nice_num(kappa), "\n")
## Cohen’s Kappa   : 0.0461
# 8. CROSS-VALIDATION 5-FOLD
k <- 5
folds <- sample(rep(1:k, length.out = nrow(df)))
fold_acc <- numeric(k)

for (i in seq_len(k)) {
  train_cv <- df[folds != i, ]
  test_cv  <- df[folds == i, ]
  
  # Sinkronisasi faktor level di test_cv dengan train_cv
  factor_cols_cv <- sapply(train_cv, is.factor)
  factor_names_cv <- names(factor_cols_cv)[factor_cols_cv]
  for (col in factor_names_cv) {
    test_cv[[col]] <- factor(test_cv[[col]], levels=levels(train_cv[[col]]))
  }
  
  mod_cv  <- multinom(as.formula(paste(target_col, "~ .")), data = train_cv, trace = FALSE)
  pred_cv <- predict(mod_cv, test_cv)
  fold_acc[i] <- mean(pred_cv == test_cv[[target_col]])
  cat(sprintf("Fold %d accuracy: %.4f\n", i, fold_acc[i]))
}
## Fold 1 accuracy: 0.2727
## Fold 2 accuracy: 0.1818
## Fold 3 accuracy: 0.1818
## Fold 4 accuracy: 0.1000
## Fold 5 accuracy: 0.2000
cat("\n========= 5-FOLD CROSS-VALIDATION ACCURACY =======\n")
## 
## ========= 5-FOLD CROSS-VALIDATION ACCURACY =======
cat("Akurasi rata-rata 5-Fold CV :", nice_num(mean(fold_acc)), " ± ", nice_num(sd(fold_acc)), "\n")
## Akurasi rata-rata 5-Fold CV : 0.1873  ±  0.0615