Load Library & Data

pkgs <- c("MASS", "ggplot2", "caret", "knitr", "dplyr",
          "ordinal", "car", "MVN", "biotools")
invisible(lapply(pkgs, function(p) {
  if (!require(p, character.only = TRUE))
    install.packages(p, repos = "https://cloud.r-project.org", quiet = TRUE)
  library(p, character.only = TRUE)
}))
df <- read.csv("sleep_health_dataset.csv", stringsAsFactors = TRUE)


df$disorder_ord <- factor(df$sleep_disorder_risk,
                           levels = c("Healthy","Mild","Moderate","Severe"),
                           ordered = TRUE)
df$disorder_fct <- factor(df$sleep_disorder_risk,
                           levels = c("Healthy","Mild","Moderate","Severe"),
                           ordered = FALSE)

# Prediktor numerik (dipakai di kedua model)
pred_vars <- c("sleep_duration_hrs","stress_score","rem_percentage",
               "deep_sleep_percentage","sleep_latency_mins",
               "wake_episodes_per_night","heart_rate_resting_bpm",
               "steps_that_day","caffeine_mg_before_bed","bmi")

kable(head(df[, c("age", pred_vars[1:4], "disorder_ord")], 6),
      caption = "Pratampil Data (6 Baris)")
Pratampil Data (6 Baris)
age sleep_duration_hrs stress_score rem_percentage deep_sleep_percentage disorder_ord
29 6.19 4.4 22.5 19.3 Healthy
55 8.32 4.0 26.9 14.9 Healthy
42 3.74 7.8 20.2 16.2 Severe
37 6.79 4.9 17.7 17.7 Healthy
23 5.02 7.4 23.3 18.3 Mild
23 8.16 5.5 17.3 20.1 Mild

Eksplorasi Data

Statistika Deskriptif

# Ringkasan numerik seluruh prediktor
desc <- do.call(rbind, lapply(pred_vars, function(v) {
  x <- df[[v]]
  data.frame(
    Variabel = v,
    N        = length(x),
    Mean     = round(mean(x, na.rm = TRUE), 3),
    SD       = round(sd(x,   na.rm = TRUE), 3),
    Min      = round(min(x,  na.rm = TRUE), 3),
    Q1       = round(quantile(x, .25, na.rm = TRUE), 3),
    Median   = round(median(x,   na.rm = TRUE), 3),
    Q3       = round(quantile(x, .75, na.rm = TRUE), 3),
    Max      = round(max(x,  na.rm = TRUE), 3),
    Skewness = round(moments::skewness(x, na.rm = TRUE), 3),
    Kurtosis = round(moments::kurtosis(x, na.rm = TRUE), 3)
  )
}))
kable(desc, caption = "Statistika Deskriptif – Variabel Prediktor",
      row.names = FALSE)
Statistika Deskriptif – Variabel Prediktor
Variabel N Mean SD Min Q1 Median Q3 Max Skewness Kurtosis
sleep_duration_hrs 1e+05 6.424 1.275 3 5.53 6.36 7.27 10.5 0.217 2.921
stress_score 1e+05 5.733 1.619 1 4.80 5.80 6.80 10.0 -0.364 3.251
rem_percentage 1e+05 20.244 3.411 10 18.00 20.30 22.60 30.0 -0.113 2.939
deep_sleep_percentage 1e+05 20.253 4.251 5 17.40 20.30 23.20 30.0 -0.109 2.826
sleep_latency_mins 1e+05 19.837 7.584 1 14.00 19.00 25.00 58.0 0.242 2.924
wake_episodes_per_night 1e+05 3.347 1.921 0 2.00 3.00 5.00 8.0 0.455 2.691
heart_rate_resting_bpm 1e+05 66.570 7.228 45 62.00 67.00 71.00 99.0 0.006 2.965
steps_that_day 1e+05 7496.860 3460.424 500 5045.00 7442.00 9887.00 20000.0 0.135 2.669
caffeine_mg_before_bed 1e+05 38.850 69.396 0 0.00 0.00 80.00 400.0 2.384 9.665
bmi 1e+05 26.290 4.480 16 23.20 26.30 29.30 45.0 0.072 2.819
# Frekuensi & proporsi variabel target
freq_tbl <- df %>%
  count(disorder_ord) %>%
  mutate(Proporsi = paste0(round(n / sum(n) * 100, 1), "%")) %>%
  rename(Kategori = disorder_ord, Frekuensi = n)
kable(freq_tbl, caption = "Distribusi Frekuensi – Sleep Disorder Risk")
Distribusi Frekuensi – Sleep Disorder Risk
Kategori Frekuensi Proporsi
Healthy 54156 54.2%
Mild 33479 33.5%
Moderate 8299 8.3%
Severe 4066 4.1%
# Statistika deskriptif prediktor per kelompok disorder
desc_grp <- df %>%
  group_by(disorder_fct) %>%
  summarise(across(all_of(pred_vars),
                   list(Mean = ~round(mean(.),2),
                        SD   = ~round(sd(.),2)),
                   .names = "{.col}_{.fn}"),
            .groups = "drop")

# Tampilkan transpose agar lebih mudah dibaca
desc_t <- as.data.frame(t(desc_grp[, -1]))
colnames(desc_t) <- levels(df$disorder_fct)
desc_t$Statistik <- rownames(desc_t)
rownames(desc_t) <- NULL
kable(desc_t[, c("Statistik", levels(df$disorder_fct))],
      caption = "Mean & SD Prediktor per Kelompok Sleep Disorder Risk")
Mean & SD Prediktor per Kelompok Sleep Disorder Risk
Statistik Healthy Mild Moderate Severe
sleep_duration_hrs_Mean 6.93 6.05 5.41 4.84
sleep_duration_hrs_SD 1.02 1.25 1.22 1.02
stress_score_Mean 5.04 6.32 7.05 7.48
stress_score_SD 1.48 1.34 1.22 1.16
rem_percentage_Mean 20.80 19.87 19.05 18.29
rem_percentage_SD 3.19 3.45 3.61 3.74
deep_sleep_percentage_Mean 20.62 20.00 19.44 19.03
deep_sleep_percentage_SD 4.17 4.27 4.30 4.38
sleep_latency_mins_Mean 17.74 21.38 23.94 26.62
sleep_latency_mins_SD 6.87 7.39 7.61 7.69
wake_episodes_per_night_Mean 2.75 3.86 4.40 4.89
wake_episodes_per_night_SD 1.73 1.87 1.87 1.80
heart_rate_resting_bpm_Mean 66.27 66.84 67.04 67.40
heart_rate_resting_bpm_SD 7.22 7.23 7.18 7.29
steps_that_day_Mean 7500.15 7517.39 7463.23 7352.69
steps_that_day_SD 3459.76 3461.84 3448.38 3479.47
caffeine_mg_before_bed_Mean 33.45 42.68 48.68 59.14
caffeine_mg_before_bed_SD 60.70 74.63 81.86 93.36
bmi_Mean 25.37 27.03 27.97 28.96
bmi_SD 4.15 4.58 4.59 4.46
ggplot(df, aes(x = disorder_fct, fill = disorder_fct)) +
  geom_bar(show.legend = FALSE) +
  geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.4) +
  labs(title = "Distribusi Sleep Disorder Risk", x = NULL, y = "Frekuensi") +
  theme_minimal()

ggplot(df, aes(x = disorder_fct, y = stress_score, fill = disorder_fct)) +
  geom_boxplot(show.legend = FALSE, alpha = .7) +
  labs(title = "Stress Score per Kategori Sleep Disorder Risk",
       x = NULL, y = "Stress Score") +
  theme_minimal()

ggplot(df, aes(x = sleep_duration_hrs, y = stress_score, colour = disorder_fct)) +
  geom_point(alpha = .3, size = 1) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Sleep Duration vs Stress Score per Disorder Risk",
       x = "Sleep Duration (hrs)", y = "Stress Score",
       colour = "Disorder Risk") +
  theme_minimal()


Uji Asumsi — Analisis Diskriminan (LDA)

LDA mensyaratkan tiga asumsi utama: (1) normalitas multivariat, (2) homogenitas matriks kovarians, dan (3) tidak ada multikolinearitas ekstrem.

1. Normalitas Multivariat (Uji Mardia)

# Uji normalitas univariat per variabel (Shapiro-Wilk, n dibatasi 500)
set.seed(1)
sub <- df[sample(nrow(df), 500), pred_vars]

sw_res <- lapply(pred_vars, function(v) {
  t <- shapiro.test(sub[[v]])
  data.frame(Variabel  = v,
             Statistik = round(t$statistic, 4),
             p_value   = round(t$p.value, 4),
             Normal    = ifelse(t$p.value > 0.05, "Ya", "Tidak"))
})
kable(do.call(rbind, sw_res),
      caption = "Uji Normalitas Univariat – Shapiro-Wilk (n = 500)")
Uji Normalitas Univariat – Shapiro-Wilk (n = 500)
Variabel Statistik p_value Normal
W sleep_duration_hrs 0.9934 0.0273 Tidak
W1 stress_score 0.9861 0.0001 Tidak
W2 rem_percentage 0.9973 0.5765 Ya
W3 deep_sleep_percentage 0.9956 0.1724 Ya
W4 sleep_latency_mins 0.9911 0.0042 Tidak
W5 wake_episodes_per_night 0.9551 0.0000 Tidak
W6 heart_rate_resting_bpm 0.9971 0.5380 Ya
W7 steps_that_day 0.9921 0.0096 Tidak
W8 caffeine_mg_before_bed 0.5758 0.0000 Tidak
W9 bmi 0.9936 0.0320 Tidak
# Uji Mardia manual (skewness & kurtosis multivariat) via moments
if (!require("moments", quietly = TRUE))
  install.packages("moments", repos = "https://cloud.r-project.org", quiet = TRUE)
library(moments)

X   <- as.matrix(scale(sub))
n   <- nrow(X); p <- ncol(X)
S   <- cov(X)
D   <- X %*% solve(S) %*% t(X)

# Mardia skewness
b1p  <- mean(D^3) / 6
chi2 <- n * b1p
df_m <- p * (p + 1) * (p + 2) / 6
p_sk <- pchisq(chi2, df_m, lower.tail = FALSE)

# Mardia kurtosis
b2p  <- mean(diag(D^2))
z_ku <- (b2p - p*(p+2)) / sqrt(8*p*(p+2)/n)
p_ku <- 2 * pnorm(abs(z_ku), lower.tail = FALSE)

kable(data.frame(
  Uji        = c("Mardia Skewness","Mardia Kurtosis"),
  Statistik  = round(c(chi2, z_ku), 4),
  p_value    = round(c(p_sk, p_ku), 4),
  Kesimpulan = ifelse(c(p_sk, p_ku) > 0.05,
                      "Normal Multivariat", "Tidak Normal Multivariat")
), caption = "Uji Normalitas Multivariat – Mardia (manual)")
Uji Normalitas Multivariat – Mardia (manual)
Uji Statistik p_value Kesimpulan
Mardia Skewness 899.7327 0 Tidak Normal Multivariat
Mardia Kurtosis 6.6593 0 Tidak Normal Multivariat

Catatan: Jika asumsi normalitas multivariat dilanggar, LDA masih relatif robust pada ukuran sampel besar (n ≥ 100 per kelompok). Alternatif: QDA atau metode non-parametrik.

2. Homogenitas Matriks Kovarians (Box’s M Test)

X   <- as.matrix(df[, pred_vars])
grp <- df$disorder_fct

boxm_res <- boxM(X, grp)
print(boxm_res)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  X
## Chi-Sq (approx.) = 11021, df = 165, p-value < 2.2e-16
cat("\nKesimpulan: p-value =", round(boxm_res$p.value, 4),
    ifelse(boxm_res$p.value < 0.05,
           "→ Matriks kovarians TIDAK homogen (tolak H0).",
           "→ Matriks kovarians homogen (gagal tolak H0)."), "\n")
## 
## Kesimpulan: p-value = 0 → Matriks kovarians TIDAK homogen (tolak H0).

Catatan: Box’s M sangat sensitif pada sampel besar; p-value kecil belum tentu berarti perbedaan praktis. Jika matriks tidak homogen, pertimbangkan QDA sebagai alternatif.

3. Multikolinearitas (VIF via Regresi Linear Bantu)

# Gunakan regresi linear bantu (target numerik dummy) untuk hitung VIF
lm_aux <- lm(as.numeric(disorder_fct) ~ ., data = df[, c("disorder_fct", pred_vars)])
vif_val <- vif(lm_aux)

kable(data.frame(Variabel = names(vif_val),
                 VIF      = round(vif_val, 3),
                 Status   = ifelse(vif_val > 10, "âš  Tinggi",
                             ifelse(vif_val > 5, "Sedang", "✓ Aman"))),
      caption = "Variance Inflation Factor (VIF) – Prediktor LDA")
Variance Inflation Factor (VIF) – Prediktor LDA
Variabel VIF Status
sleep_duration_hrs sleep_duration_hrs 1.339 ✓ Aman
stress_score stress_score 1.426 ✓ Aman
rem_percentage rem_percentage 1.047 ✓ Aman
deep_sleep_percentage deep_sleep_percentage 1.049 ✓ Aman
sleep_latency_mins sleep_latency_mins 1.154 ✓ Aman
wake_episodes_per_night wake_episodes_per_night 1.101 ✓ Aman
heart_rate_resting_bpm heart_rate_resting_bpm 1.070 ✓ Aman
steps_that_day steps_that_day 1.092 ✓ Aman
caffeine_mg_before_bed caffeine_mg_before_bed 1.119 ✓ Aman
bmi bmi 1.063 ✓ Aman

Acuan: VIF < 5 = aman, 5–10 = perlu perhatian, > 10 = multikolinearitas serius.


Uji Asumsi — Regresi Logistik Ordinal (POLR)

POLR mensyaratkan dua asumsi utama: (1) proportional odds (paralel lines) dan (2) tidak ada multikolinearitas ekstrem.

1. Uji Proportional Odds (Brant Test)

# Fit model POLR sementara untuk uji Brant
# Paket brant diperlukan
if (!require("brant", quietly = TRUE))
  install.packages("brant", repos = "https://cloud.r-project.org", quiet = TRUE)
library(brant)

polr_tmp <- polr(
  disorder_ord ~ sleep_duration_hrs + stress_score + rem_percentage +
                  deep_sleep_percentage + sleep_latency_mins +
                  wake_episodes_per_night + heart_rate_resting_bpm +
                  steps_that_day + caffeine_mg_before_bed + bmi,
  data = df, Hess = TRUE
)

brant_res <- brant(polr_tmp)
## ------------------------------------------------------------ 
## Test for         X2  df  probability 
## ------------------------------------------------------------ 
## Omnibus              811.61  20  0
## sleep_duration_hrs       557.93  2   0
## stress_score         37.63   2   0
## rem_percentage           21.99   2   0
## deep_sleep_percentage        2.03    2   0.36
## sleep_latency_mins       54.35   2   0
## wake_episodes_per_night  108.76  2   0
## heart_rate_resting_bpm   2.31    2   0.31
## steps_that_day           6.51    2   0.04
## caffeine_mg_before_bed   4.59    2   0.1
## bmi              23.35   2   0
## ------------------------------------------------------------ 
## 
## H0: Parallel Regression Assumption holds
print(brant_res)
##                                 X2 df   probability
## Omnibus                 811.606567 20 4.860332e-159
## sleep_duration_hrs      557.934361  2 7.016054e-122
## stress_score             37.628043  2  6.747989e-09
## rem_percentage           21.987225  2  1.680873e-05
## deep_sleep_percentage     2.031046  2  3.622130e-01
## sleep_latency_mins       54.352045  2  1.576171e-12
## wake_episodes_per_night 108.760784  2  2.414882e-24
## heart_rate_resting_bpm    2.311447  2  3.148297e-01
## steps_that_day            6.508261  2  3.861439e-02
## caffeine_mg_before_bed    4.589947  2  1.007641e-01
## bmi                      23.346064  2  8.520531e-06

Interpretasi: Jika p-value uji Omnibus < 0.05, asumsi proportional odds dilanggar → pertimbangkan model partial proportional odds atau multinomial logistik.

2. Multikolinearitas (VIF)

VIF untuk POLR menggunakan prediktor yang sama, sehingga hasil identik dengan tabel VIF pada bagian LDA di atas — tidak perlu dihitung ulang.

kable(data.frame(Variabel = names(vif_val),
                 VIF      = round(vif_val, 3),
                 Status   = ifelse(vif_val > 10, "âš  Tinggi",
                             ifelse(vif_val > 5, "Sedang", "✓ Aman"))),
      caption = "VIF – Prediktor POLR (sama dengan LDA)")
VIF – Prediktor POLR (sama dengan LDA)
Variabel VIF Status
sleep_duration_hrs sleep_duration_hrs 1.339 ✓ Aman
stress_score stress_score 1.426 ✓ Aman
rem_percentage rem_percentage 1.047 ✓ Aman
deep_sleep_percentage deep_sleep_percentage 1.049 ✓ Aman
sleep_latency_mins sleep_latency_mins 1.154 ✓ Aman
wake_episodes_per_night wake_episodes_per_night 1.101 ✓ Aman
heart_rate_resting_bpm heart_rate_resting_bpm 1.070 ✓ Aman
steps_that_day steps_that_day 1.092 ✓ Aman
caffeine_mg_before_bed caffeine_mg_before_bed 1.119 ✓ Aman
bmi bmi 1.063 ✓ Aman

Analisis Diskriminan (LDA)

Split Data

set.seed(123)
idx   <- createDataPartition(df$disorder_ord, p = .8, list = FALSE)
train <- df[idx, ]
test  <- df[-idx, ]
cat("Train:", nrow(train), "| Test:", nrow(test), "\n")
## Train: 80002 | Test: 19998

Fit LDA

lda_mod <- lda(
  disorder_fct ~ sleep_duration_hrs + stress_score + rem_percentage +
                  deep_sleep_percentage + sleep_latency_mins +
                  wake_episodes_per_night + heart_rate_resting_bpm +
                  steps_that_day + caffeine_mg_before_bed + bmi,
  data = train
)
lda_mod
## Call:
## lda(disorder_fct ~ sleep_duration_hrs + stress_score + rem_percentage + 
##     deep_sleep_percentage + sleep_latency_mins + wake_episodes_per_night + 
##     heart_rate_resting_bpm + steps_that_day + caffeine_mg_before_bed + 
##     bmi, data = train)
## 
## Prior probabilities of groups:
##    Healthy       Mild   Moderate     Severe 
## 0.54154896 0.33479163 0.08299793 0.04066148 
## 
## Group means:
##          sleep_duration_hrs stress_score rem_percentage deep_sleep_percentage
## Healthy            6.931118     5.032824       20.81180              20.61684
## Mild               6.056926     6.328943       19.87379              20.01497
## Moderate           5.414247     7.044654       19.04586              19.42367
## Severe             4.844608     7.470089       18.28783              18.99318
##          sleep_latency_mins wake_episodes_per_night heart_rate_resting_bpm
## Healthy            17.74576                2.763254               66.26283
## Mild               21.42675                3.856369               66.84185
## Moderate           23.94352                4.396235               67.01069
## Severe             26.58561                4.880725               67.47618
##          steps_that_day caffeine_mg_before_bed      bmi
## Healthy        7491.556               33.46105 25.36675
## Mild           7509.467               42.59147 27.01663
## Moderate       7425.834               47.87952 28.00367
## Severe         7341.761               59.86474 28.91233
## 
## Coefficients of linear discriminants:
##                                   LD1           LD2           LD3
## sleep_duration_hrs      -4.481923e-01 -5.847640e-01  0.0822635533
## stress_score             3.141970e-01 -5.758674e-01 -0.2262654184
## rem_percentage          -7.242110e-02 -1.169086e-01  0.0479184141
## deep_sleep_percentage   -1.159923e-02 -4.083165e-02  0.0687148523
## sleep_latency_mins       6.283195e-02  3.421146e-02  0.0158277163
## wake_episodes_per_night  2.246678e-01 -1.414664e-01  0.2591849767
## heart_rate_resting_bpm  -3.333400e-04 -1.377471e-03  0.0595406239
## steps_that_day           6.247955e-06 -1.693401e-05  0.0000410205
## caffeine_mg_before_bed   3.448319e-04  1.208402e-03  0.0080477557
## bmi                      8.378567e-02  7.434680e-03  0.0076181726
## 
## Proportion of trace:
##    LD1    LD2    LD3 
## 0.9940 0.0057 0.0003

Proporsi Variansi Antar-Kelompok

prop <- round(lda_mod$svd^2 / sum(lda_mod$svd^2) * 100, 2)
kable(data.frame(LD            = paste0("LD", seq_along(prop)),
                 `Proporsi (%)` = prop,
                 `Kumulatif (%)` = cumsum(prop)),
      caption = "Proporsi Variansi Antar-Kelompok")
Proporsi Variansi Antar-Kelompok
LD Proporsi…. Kumulatif….
LD1 99.40 99.40
LD2 0.57 99.97
LD3 0.03 100.00

Plot Skor Diskriminan

scores <- data.frame(predict(lda_mod, train)$x, Disorder = train$disorder_fct)

ggplot(scores, aes(LD1, LD2, colour = Disorder)) +
  geom_point(alpha = .4, size = 1.2) +
  stat_ellipse(level = .75, linewidth = .8) +
  labs(title = "Fungsi Diskriminan Linear (LD1 vs LD2)",
       x = "LD1", y = "LD2") +
  theme_minimal()

Evaluasi – Confusion Matrix & Akurasi

pred_lda <- predict(lda_mod, test)$class
cm_lda   <- confusionMatrix(pred_lda, test$disorder_fct)

# Heatmap confusion matrix LDA
cm_lda_df <- as.data.frame(cm_lda$table)
colnames(cm_lda_df) <- c("Prediksi", "Aktual", "N")

ggplot(cm_lda_df, aes(x = Aktual, y = Prediksi, fill = N)) +
  geom_tile(colour = "white", linewidth = 0.8) +
  geom_text(aes(label = N), size = 5, fontface = "bold", colour = "white") +
  scale_fill_gradient(low = "#d6e4f0", high = "#1a5276") +
  labs(title = "Confusion Matrix – LDA",
       x = "Kelas Aktual", y = "Kelas Prediksi", fill = "Jumlah") +
  theme_minimal(base_size = 13) +
  theme(panel.grid = element_blank(),
        axis.text  = element_text(size = 11))

cat("Akurasi LDA :", round(cm_lda$overall["Accuracy"] * 100, 2), "%\n")
## Akurasi LDA : 71.05 %
cat("Kappa       :", round(cm_lda$overall["Kappa"], 4), "\n")
## Kappa       : 0.4808

Regresi Logistik Ordinal (POLR)

Fit Model

polr_mod <- polr(
  disorder_ord ~ sleep_duration_hrs + stress_score + rem_percentage +
                  deep_sleep_percentage + sleep_latency_mins +
                  wake_episodes_per_night + heart_rate_resting_bpm +
                  steps_that_day + caffeine_mg_before_bed + bmi,
  data = train, Hess = TRUE
)
summary(polr_mod)
## Call:
## polr(formula = disorder_ord ~ sleep_duration_hrs + stress_score + 
##     rem_percentage + deep_sleep_percentage + sleep_latency_mins + 
##     wake_episodes_per_night + heart_rate_resting_bpm + steps_that_day + 
##     caffeine_mg_before_bed + bmi, data = train, Hess = TRUE)
## 
## Coefficients:
##                              Value Std. Error   t value
## sleep_duration_hrs      -7.936e-01  0.0070923 -111.8936
## stress_score             6.054e-01  0.0061687   98.1476
## rem_percentage          -1.140e-01  0.0022153  -51.4595
## deep_sleep_percentage   -1.464e-02  0.0017847   -8.2035
## sleep_latency_mins       1.049e-01  0.0012042   87.1476
## wake_episodes_per_night  3.555e-01  0.0045077   78.8615
## heart_rate_resting_bpm  -3.394e-04  0.0009394   -0.3613
## steps_that_day           8.453e-06        NaN       NaN
## caffeine_mg_before_bed   4.573e-04  0.0001189    3.8453
## bmi                      1.431e-01  0.0018327   78.0653
## 
## Intercepts:
##                 Value      Std. Error t value   
## Healthy|Mild        3.1825     0.0003 12245.2091
## Mild|Moderate       6.3691     0.0178   357.1747
## Moderate|Severe     8.1593     0.0261   313.1233
## 
## Residual Deviance: 109524.28 
## AIC: 109550.28

Odds Ratio & Uji Signifikansi

ctbl  <- coef(summary(polr_mod))
pval  <- pnorm(abs(ctbl[, "t value"]), lower.tail = FALSE) * 2
result <- data.frame(
  Koefisien = round(ctbl[, "Value"], 4),
  SE        = round(ctbl[, "Std. Error"], 4),
  OR        = round(exp(ctbl[, "Value"]), 4),
  p_value   = round(pval, 4),
  Sig       = ifelse(pval < .001, "***",
               ifelse(pval < .01,  "**",
                ifelse(pval < .05, "*", "")))
)
kable(result, caption = "Koefisien, Odds Ratio, dan p-value Model POLR")
Koefisien, Odds Ratio, dan p-value Model POLR
Koefisien SE OR p_value Sig
sleep_duration_hrs -0.7936 0.0071 0.4522 0.0000 ***
stress_score 0.6054 0.0062 1.8321 0.0000 ***
rem_percentage -0.1140 0.0022 0.8923 0.0000 ***
deep_sleep_percentage -0.0146 0.0018 0.9855 0.0000 ***
sleep_latency_mins 0.1049 0.0012 1.1107 0.0000 ***
wake_episodes_per_night 0.3555 0.0045 1.4269 0.0000 ***
heart_rate_resting_bpm -0.0003 0.0009 0.9997 0.7179
steps_that_day 0.0000 NaN 1.0000 NaN NA
caffeine_mg_before_bed 0.0005 0.0001 1.0005 0.0001 ***
bmi 0.1431 0.0018 1.1538 0.0000 ***
Healthy|Mild 3.1825 0.0003 24.1078 0.0000 ***
Mild|Moderate 6.3691 0.0178 583.5247 0.0000 ***
Moderate|Severe 8.1593 0.0261 3495.7304 0.0000 ***

Evaluasi Model

pred_polr <- predict(polr_mod, test)
cm_polr   <- confusionMatrix(pred_polr, test$disorder_ord)

# Heatmap confusion matrix POLR
cm_polr_df <- as.data.frame(cm_polr$table)
colnames(cm_polr_df) <- c("Prediksi", "Aktual", "N")

ggplot(cm_polr_df, aes(x = Aktual, y = Prediksi, fill = N)) +
  geom_tile(colour = "white", linewidth = 0.8) +
  geom_text(aes(label = N), size = 5, fontface = "bold", colour = "white") +
  scale_fill_gradient(low = "#d5f5e3", high = "#1e8449") +
  labs(title = "Confusion Matrix – POLR",
       x = "Kelas Aktual", y = "Kelas Prediksi", fill = "Jumlah") +
  theme_minimal(base_size = 13) +
  theme(panel.grid = element_blank(),
        axis.text  = element_text(size = 11))

cat("Akurasi POLR :", round(cm_polr$overall["Accuracy"] * 100, 2), "%\n")
## Akurasi POLR : 71.61 %
cat("Kappa        :", round(cm_polr$overall["Kappa"], 4), "\n")
## Kappa        : 0.4976

Visualisasi Prediksi vs Aktual

res_df <- data.frame(Aktual = test$disorder_ord, Prediksi = pred_polr)

ggplot(res_df, aes(x = Aktual, fill = Prediksi)) +
  geom_bar(position = "fill") +
  scale_fill_brewer(palette = "RdYlGn", direction = -1) +
  labs(title = "Proporsi Prediksi POLR per Kategori Aktual Sleep Disorder Risk",
       x = "Disorder Risk Aktual", y = "Proporsi", fill = "Prediksi") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 15, hjust = 1))


Rangkuman

kable(data.frame(
  Metode    = c("LDA","POLR"),
  Target    = "sleep_disorder_risk (4 kelas)",
  Prediktor = "10 variabel numerik (sama)",
  Akurasi   = c(paste0(round(cm_lda$overall["Accuracy"]*100,  2), "%"),
                paste0(round(cm_polr$overall["Accuracy"]*100, 2), "%")),
  Kappa     = c(round(cm_lda$overall["Kappa"],  4),
                round(cm_polr$overall["Kappa"], 4))
), caption = "Perbandingan Performa Model")
Perbandingan Performa Model
Metode Target Prediktor Akurasi Kappa
LDA sleep_disorder_risk (4 kelas) 10 variabel numerik (sama) 71.05% 0.4808
POLR sleep_disorder_risk (4 kelas) 10 variabel numerik (sama) 71.61% 0.4976

Interpretasi singkat: