# 1. MUAT & RAPIKAN DATA ---------------------------------
file_csv <- "Student Stress Factors.csv" # Ganti path jika perlu
df <- read.csv(file_csv, stringsAsFactors = FALSE)
# Ganti nama kolom agar valid di formula R
names(df) <- make.names(names(df))
# Kolom target setelah make.names:
target_col <- "How.would.you.rate.your.stress.levels."
# Pastikan target bertipe factor
df[[target_col]] <- factor(df[[target_col]])
# 2. PRA-PROSES ------------------------------------------
# Buang baris yang mengandung NA
df <- na.omit(df)
# Standarisasi kolom numerik (kecuali target)
predictor_cols <- setdiff(names(df), target_col)
num_cols <- sapply(df[predictor_cols], is.numeric)
df[predictor_cols][num_cols] <- scale(df[predictor_cols][num_cols])
# 3. SPLIT TRAIN : TEST (60 : 40) -------------------------
set.seed(123) # Agar replikabel
train_idx <- sample(seq_len(nrow(df)), 0.6 * nrow(df))
train <- df[train_idx, ]
test <- df[-train_idx, ]
# 4. LATIH MODEL REGRESI LOGISTIK MULTIKELAS -------------
library(nnet) # Bawaan R
logit_mod <- multinom(as.formula(paste(target_col, "~ .")), data = train, trace = FALSE)
# 5. PREDIKSI & CONFUSION MATRIX -------------------------
pred <- predict(logit_mod, test)
cm <- table(Actual = test[[target_col]], Predicted = pred)
# 6. HITUNG METRIK ---------------------------------------
acc <- sum(diag(cm)) / sum(cm)
prec <- diag(cm) / colSums(cm)
rec <- diag(cm) / rowSums(cm)
f1 <- 2 * prec * rec / (prec + rec)
macro_f1 <- mean(f1, na.rm = TRUE)
p0 <- acc
pe <- sum(rowSums(cm) * colSums(cm)) / (sum(cm)^2)
kappa <- (p0 - pe) / (1 - pe)
# 7. FUNGSI & CETAK OUTPUT RAPI --------------------------
nice_num <- function(x, digits = 4) formatC(x, format = "f", digits = digits)
border <- strrep("=", 62)
cat("\n", border, "\nCONFUSION MATRIX\n", sep = "")
##
## ==============================================================
## CONFUSION MATRIX
print(cm)
## Predicted
## Actual 1 2 3 4 5
## 1 18 14 5 5 3
## 2 12 20 5 0 8
## 3 3 8 23 11 13
## 4 8 10 7 1 4
## 5 6 1 10 0 13
cat(border, "\n\n", sep = "")
## ==============================================================
metrics_tbl <- data.frame(
Kelas = rownames(cm),
Presisi = nice_num(prec),
Recall = nice_num(rec),
F1 = nice_num(f1)
)
cat("METRIK PER KELAS\n")
## METRIK PER KELAS
print(metrics_tbl, row.names = FALSE)
## Kelas Presisi Recall F1
## 1 0.3830 0.4000 0.3913
## 2 0.3774 0.4444 0.4082
## 3 0.4600 0.3966 0.4259
## 4 0.0588 0.0333 0.0426
## 5 0.3171 0.4333 0.3662
cat("\nAkurasi : ", nice_num(acc), "\n", sep = "")
##
## Akurasi : 0.3606
cat("Macro-F1 : ", nice_num(macro_f1), "\n", sep = "")
## Macro-F1 : 0.3268
cat("Cohen’s Kappa : ", nice_num(kappa), "\n", sep = "")
## Cohen’s Kappa : 0.1893
cat(border, "\n")
## ==============================================================
# 8. 5-FOLD CROSS-VALIDATION ------------------------------
k <- 5
folds <- sample(rep(1:k, length.out = nrow(df)))
fold_acc <- numeric(k)
for (i in seq_len(k)) {
train_cv <- df[folds != i, ]
test_cv <- df[folds == i, ]
mod_cv <- multinom(as.formula(paste(target_col, "~ .")), data = train_cv, trace = FALSE)
pred_cv <- predict(mod_cv, test_cv)
fold_acc[i] <- mean(pred_cv == test_cv[[target_col]])
}
cat("\nAkurasi 5-Fold CV : ",
nice_num(mean(fold_acc)), " ± ", nice_num(sd(fold_acc)), "\n",
border, "\n", sep = "")
##
## Akurasi 5-Fold CV : 0.3788 ± 0.0629
## ==============================================================