pkgs <- c("MASS", "ggplot2", "caret", "knitr", "dplyr",
"ordinal", "car", "MVN", "biotools")
invisible(lapply(pkgs, function(p) {
if (!require(p, character.only = TRUE))
install.packages(p, repos = "https://cloud.r-project.org", quiet = TRUE)
library(p, character.only = TRUE)
}))
df <- read.csv("sleep_health_dataset.csv", stringsAsFactors = TRUE)
df$disorder_ord <- factor(df$sleep_disorder_risk,
levels = c("Healthy","Mild","Moderate","Severe"),
ordered = TRUE)
df$disorder_fct <- factor(df$sleep_disorder_risk,
levels = c("Healthy","Mild","Moderate","Severe"),
ordered = FALSE)
# Prediktor numerik (dipakai di kedua model)
pred_vars <- c("sleep_duration_hrs","stress_score","rem_percentage",
"deep_sleep_percentage","sleep_latency_mins",
"wake_episodes_per_night","heart_rate_resting_bpm",
"steps_that_day","caffeine_mg_before_bed","bmi")
kable(head(df[, c("age", pred_vars[1:4], "disorder_ord")], 6),
caption = "Pratampil Data (6 Baris)")
| age | sleep_duration_hrs | stress_score | rem_percentage | deep_sleep_percentage | disorder_ord |
|---|---|---|---|---|---|
| 29 | 6.19 | 4.4 | 22.5 | 19.3 | Healthy |
| 55 | 8.32 | 4.0 | 26.9 | 14.9 | Healthy |
| 42 | 3.74 | 7.8 | 20.2 | 16.2 | Severe |
| 37 | 6.79 | 4.9 | 17.7 | 17.7 | Healthy |
| 23 | 5.02 | 7.4 | 23.3 | 18.3 | Mild |
| 23 | 8.16 | 5.5 | 17.3 | 20.1 | Mild |
# Ringkasan numerik seluruh prediktor
desc <- do.call(rbind, lapply(pred_vars, function(v) {
x <- df[[v]]
data.frame(
Variabel = v,
N = length(x),
Mean = round(mean(x, na.rm = TRUE), 3),
SD = round(sd(x, na.rm = TRUE), 3),
Min = round(min(x, na.rm = TRUE), 3),
Q1 = round(quantile(x, .25, na.rm = TRUE), 3),
Median = round(median(x, na.rm = TRUE), 3),
Q3 = round(quantile(x, .75, na.rm = TRUE), 3),
Max = round(max(x, na.rm = TRUE), 3),
Skewness = round(moments::skewness(x, na.rm = TRUE), 3),
Kurtosis = round(moments::kurtosis(x, na.rm = TRUE), 3)
)
}))
kable(desc, caption = "Statistika Deskriptif – Variabel Prediktor",
row.names = FALSE)
| Variabel | N | Mean | SD | Min | Q1 | Median | Q3 | Max | Skewness | Kurtosis |
|---|---|---|---|---|---|---|---|---|---|---|
| sleep_duration_hrs | 1e+05 | 6.424 | 1.275 | 3 | 5.53 | 6.36 | 7.27 | 10.5 | 0.217 | 2.921 |
| stress_score | 1e+05 | 5.733 | 1.619 | 1 | 4.80 | 5.80 | 6.80 | 10.0 | -0.364 | 3.251 |
| rem_percentage | 1e+05 | 20.244 | 3.411 | 10 | 18.00 | 20.30 | 22.60 | 30.0 | -0.113 | 2.939 |
| deep_sleep_percentage | 1e+05 | 20.253 | 4.251 | 5 | 17.40 | 20.30 | 23.20 | 30.0 | -0.109 | 2.826 |
| sleep_latency_mins | 1e+05 | 19.837 | 7.584 | 1 | 14.00 | 19.00 | 25.00 | 58.0 | 0.242 | 2.924 |
| wake_episodes_per_night | 1e+05 | 3.347 | 1.921 | 0 | 2.00 | 3.00 | 5.00 | 8.0 | 0.455 | 2.691 |
| heart_rate_resting_bpm | 1e+05 | 66.570 | 7.228 | 45 | 62.00 | 67.00 | 71.00 | 99.0 | 0.006 | 2.965 |
| steps_that_day | 1e+05 | 7496.860 | 3460.424 | 500 | 5045.00 | 7442.00 | 9887.00 | 20000.0 | 0.135 | 2.669 |
| caffeine_mg_before_bed | 1e+05 | 38.850 | 69.396 | 0 | 0.00 | 0.00 | 80.00 | 400.0 | 2.384 | 9.665 |
| bmi | 1e+05 | 26.290 | 4.480 | 16 | 23.20 | 26.30 | 29.30 | 45.0 | 0.072 | 2.819 |
# Frekuensi & proporsi variabel target
freq_tbl <- df %>%
count(disorder_ord) %>%
mutate(Proporsi = paste0(round(n / sum(n) * 100, 1), "%")) %>%
rename(Kategori = disorder_ord, Frekuensi = n)
kable(freq_tbl, caption = "Distribusi Frekuensi – Sleep Disorder Risk")
| Kategori | Frekuensi | Proporsi |
|---|---|---|
| Healthy | 54156 | 54.2% |
| Mild | 33479 | 33.5% |
| Moderate | 8299 | 8.3% |
| Severe | 4066 | 4.1% |
# Statistika deskriptif prediktor per kelompok disorder
desc_grp <- df %>%
group_by(disorder_fct) %>%
summarise(across(all_of(pred_vars),
list(Mean = ~round(mean(.),2),
SD = ~round(sd(.),2)),
.names = "{.col}_{.fn}"),
.groups = "drop")
# Tampilkan transpose agar lebih mudah dibaca
desc_t <- as.data.frame(t(desc_grp[, -1]))
colnames(desc_t) <- levels(df$disorder_fct)
desc_t$Statistik <- rownames(desc_t)
rownames(desc_t) <- NULL
kable(desc_t[, c("Statistik", levels(df$disorder_fct))],
caption = "Mean & SD Prediktor per Kelompok Sleep Disorder Risk")
| Statistik | Healthy | Mild | Moderate | Severe |
|---|---|---|---|---|
| sleep_duration_hrs_Mean | 6.93 | 6.05 | 5.41 | 4.84 |
| sleep_duration_hrs_SD | 1.02 | 1.25 | 1.22 | 1.02 |
| stress_score_Mean | 5.04 | 6.32 | 7.05 | 7.48 |
| stress_score_SD | 1.48 | 1.34 | 1.22 | 1.16 |
| rem_percentage_Mean | 20.80 | 19.87 | 19.05 | 18.29 |
| rem_percentage_SD | 3.19 | 3.45 | 3.61 | 3.74 |
| deep_sleep_percentage_Mean | 20.62 | 20.00 | 19.44 | 19.03 |
| deep_sleep_percentage_SD | 4.17 | 4.27 | 4.30 | 4.38 |
| sleep_latency_mins_Mean | 17.74 | 21.38 | 23.94 | 26.62 |
| sleep_latency_mins_SD | 6.87 | 7.39 | 7.61 | 7.69 |
| wake_episodes_per_night_Mean | 2.75 | 3.86 | 4.40 | 4.89 |
| wake_episodes_per_night_SD | 1.73 | 1.87 | 1.87 | 1.80 |
| heart_rate_resting_bpm_Mean | 66.27 | 66.84 | 67.04 | 67.40 |
| heart_rate_resting_bpm_SD | 7.22 | 7.23 | 7.18 | 7.29 |
| steps_that_day_Mean | 7500.15 | 7517.39 | 7463.23 | 7352.69 |
| steps_that_day_SD | 3459.76 | 3461.84 | 3448.38 | 3479.47 |
| caffeine_mg_before_bed_Mean | 33.45 | 42.68 | 48.68 | 59.14 |
| caffeine_mg_before_bed_SD | 60.70 | 74.63 | 81.86 | 93.36 |
| bmi_Mean | 25.37 | 27.03 | 27.97 | 28.96 |
| bmi_SD | 4.15 | 4.58 | 4.59 | 4.46 |
ggplot(df, aes(x = disorder_fct, fill = disorder_fct)) +
geom_bar(show.legend = FALSE) +
geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.4) +
labs(title = "Distribusi Sleep Disorder Risk", x = NULL, y = "Frekuensi") +
theme_minimal()
ggplot(df, aes(x = disorder_fct, y = stress_score, fill = disorder_fct)) +
geom_boxplot(show.legend = FALSE, alpha = .7) +
labs(title = "Stress Score per Kategori Sleep Disorder Risk",
x = NULL, y = "Stress Score") +
theme_minimal()
ggplot(df, aes(x = sleep_duration_hrs, y = stress_score, colour = disorder_fct)) +
geom_point(alpha = .3, size = 1) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Sleep Duration vs Stress Score per Disorder Risk",
x = "Sleep Duration (hrs)", y = "Stress Score",
colour = "Disorder Risk") +
theme_minimal()
LDA mensyaratkan tiga asumsi utama: (1) normalitas multivariat, (2) homogenitas matriks kovarians, dan (3) tidak ada multikolinearitas ekstrem.
# Uji normalitas univariat per variabel (Shapiro-Wilk, n dibatasi 500)
set.seed(1)
sub <- df[sample(nrow(df), 500), pred_vars]
sw_res <- lapply(pred_vars, function(v) {
t <- shapiro.test(sub[[v]])
data.frame(Variabel = v,
Statistik = round(t$statistic, 4),
p_value = round(t$p.value, 4),
Normal = ifelse(t$p.value > 0.05, "Ya", "Tidak"))
})
kable(do.call(rbind, sw_res),
caption = "Uji Normalitas Univariat – Shapiro-Wilk (n = 500)")
| Variabel | Statistik | p_value | Normal | |
|---|---|---|---|---|
| W | sleep_duration_hrs | 0.9934 | 0.0273 | Tidak |
| W1 | stress_score | 0.9861 | 0.0001 | Tidak |
| W2 | rem_percentage | 0.9973 | 0.5765 | Ya |
| W3 | deep_sleep_percentage | 0.9956 | 0.1724 | Ya |
| W4 | sleep_latency_mins | 0.9911 | 0.0042 | Tidak |
| W5 | wake_episodes_per_night | 0.9551 | 0.0000 | Tidak |
| W6 | heart_rate_resting_bpm | 0.9971 | 0.5380 | Ya |
| W7 | steps_that_day | 0.9921 | 0.0096 | Tidak |
| W8 | caffeine_mg_before_bed | 0.5758 | 0.0000 | Tidak |
| W9 | bmi | 0.9936 | 0.0320 | Tidak |
# Uji Mardia manual (skewness & kurtosis multivariat) via moments
if (!require("moments", quietly = TRUE))
install.packages("moments", repos = "https://cloud.r-project.org", quiet = TRUE)
library(moments)
X <- as.matrix(scale(sub))
n <- nrow(X); p <- ncol(X)
S <- cov(X)
D <- X %*% solve(S) %*% t(X)
# Mardia skewness
b1p <- mean(D^3) / 6
chi2 <- n * b1p
df_m <- p * (p + 1) * (p + 2) / 6
p_sk <- pchisq(chi2, df_m, lower.tail = FALSE)
# Mardia kurtosis
b2p <- mean(diag(D^2))
z_ku <- (b2p - p*(p+2)) / sqrt(8*p*(p+2)/n)
p_ku <- 2 * pnorm(abs(z_ku), lower.tail = FALSE)
kable(data.frame(
Uji = c("Mardia Skewness","Mardia Kurtosis"),
Statistik = round(c(chi2, z_ku), 4),
p_value = round(c(p_sk, p_ku), 4),
Kesimpulan = ifelse(c(p_sk, p_ku) > 0.05,
"Normal Multivariat", "Tidak Normal Multivariat")
), caption = "Uji Normalitas Multivariat – Mardia (manual)")
| Uji | Statistik | p_value | Kesimpulan |
|---|---|---|---|
| Mardia Skewness | 899.7327 | 0 | Tidak Normal Multivariat |
| Mardia Kurtosis | 6.6593 | 0 | Tidak Normal Multivariat |
Catatan: Jika asumsi normalitas multivariat dilanggar, LDA masih relatif robust pada ukuran sampel besar (n ≥ 100 per kelompok). Alternatif: QDA atau metode non-parametrik.
X <- as.matrix(df[, pred_vars])
grp <- df$disorder_fct
boxm_res <- boxM(X, grp)
print(boxm_res)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: X
## Chi-Sq (approx.) = 11021, df = 165, p-value < 2.2e-16
cat("\nKesimpulan: p-value =", round(boxm_res$p.value, 4),
ifelse(boxm_res$p.value < 0.05,
"→ Matriks kovarians TIDAK homogen (tolak H0).",
"→ Matriks kovarians homogen (gagal tolak H0)."), "\n")
##
## Kesimpulan: p-value = 0 → Matriks kovarians TIDAK homogen (tolak H0).
Catatan: Box’s M sangat sensitif pada sampel besar; p-value kecil belum tentu berarti perbedaan praktis. Jika matriks tidak homogen, pertimbangkan QDA sebagai alternatif.
# Gunakan regresi linear bantu (target numerik dummy) untuk hitung VIF
lm_aux <- lm(as.numeric(disorder_fct) ~ ., data = df[, c("disorder_fct", pred_vars)])
vif_val <- vif(lm_aux)
kable(data.frame(Variabel = names(vif_val),
VIF = round(vif_val, 3),
Status = ifelse(vif_val > 10, "âš Tinggi",
ifelse(vif_val > 5, "Sedang", "✓ Aman"))),
caption = "Variance Inflation Factor (VIF) – Prediktor LDA")
| Variabel | VIF | Status | |
|---|---|---|---|
| sleep_duration_hrs | sleep_duration_hrs | 1.339 | ✓ Aman |
| stress_score | stress_score | 1.426 | ✓ Aman |
| rem_percentage | rem_percentage | 1.047 | ✓ Aman |
| deep_sleep_percentage | deep_sleep_percentage | 1.049 | ✓ Aman |
| sleep_latency_mins | sleep_latency_mins | 1.154 | ✓ Aman |
| wake_episodes_per_night | wake_episodes_per_night | 1.101 | ✓ Aman |
| heart_rate_resting_bpm | heart_rate_resting_bpm | 1.070 | ✓ Aman |
| steps_that_day | steps_that_day | 1.092 | ✓ Aman |
| caffeine_mg_before_bed | caffeine_mg_before_bed | 1.119 | ✓ Aman |
| bmi | bmi | 1.063 | ✓ Aman |
Acuan: VIF < 5 = aman, 5–10 = perlu perhatian, > 10 = multikolinearitas serius.
POLR mensyaratkan dua asumsi utama: (1) proportional odds (paralel lines) dan (2) tidak ada multikolinearitas ekstrem.
# Fit model POLR sementara untuk uji Brant
# Paket brant diperlukan
if (!require("brant", quietly = TRUE))
install.packages("brant", repos = "https://cloud.r-project.org", quiet = TRUE)
library(brant)
polr_tmp <- polr(
disorder_ord ~ sleep_duration_hrs + stress_score + rem_percentage +
deep_sleep_percentage + sleep_latency_mins +
wake_episodes_per_night + heart_rate_resting_bpm +
steps_that_day + caffeine_mg_before_bed + bmi,
data = df, Hess = TRUE
)
brant_res <- brant(polr_tmp)
## ------------------------------------------------------------
## Test for X2 df probability
## ------------------------------------------------------------
## Omnibus 811.61 20 0
## sleep_duration_hrs 557.93 2 0
## stress_score 37.63 2 0
## rem_percentage 21.99 2 0
## deep_sleep_percentage 2.03 2 0.36
## sleep_latency_mins 54.35 2 0
## wake_episodes_per_night 108.76 2 0
## heart_rate_resting_bpm 2.31 2 0.31
## steps_that_day 6.51 2 0.04
## caffeine_mg_before_bed 4.59 2 0.1
## bmi 23.35 2 0
## ------------------------------------------------------------
##
## H0: Parallel Regression Assumption holds
print(brant_res)
## X2 df probability
## Omnibus 811.606567 20 4.860332e-159
## sleep_duration_hrs 557.934361 2 7.016054e-122
## stress_score 37.628043 2 6.747989e-09
## rem_percentage 21.987225 2 1.680873e-05
## deep_sleep_percentage 2.031046 2 3.622130e-01
## sleep_latency_mins 54.352045 2 1.576171e-12
## wake_episodes_per_night 108.760784 2 2.414882e-24
## heart_rate_resting_bpm 2.311447 2 3.148297e-01
## steps_that_day 6.508261 2 3.861439e-02
## caffeine_mg_before_bed 4.589947 2 1.007641e-01
## bmi 23.346064 2 8.520531e-06
Interpretasi: Jika p-value uji Omnibus < 0.05, asumsi proportional odds dilanggar → pertimbangkan model partial proportional odds atau multinomial logistik.
VIF untuk POLR menggunakan prediktor yang sama, sehingga hasil identik dengan tabel VIF pada bagian LDA di atas — tidak perlu dihitung ulang.
kable(data.frame(Variabel = names(vif_val),
VIF = round(vif_val, 3),
Status = ifelse(vif_val > 10, "âš Tinggi",
ifelse(vif_val > 5, "Sedang", "✓ Aman"))),
caption = "VIF – Prediktor POLR (sama dengan LDA)")
| Variabel | VIF | Status | |
|---|---|---|---|
| sleep_duration_hrs | sleep_duration_hrs | 1.339 | ✓ Aman |
| stress_score | stress_score | 1.426 | ✓ Aman |
| rem_percentage | rem_percentage | 1.047 | ✓ Aman |
| deep_sleep_percentage | deep_sleep_percentage | 1.049 | ✓ Aman |
| sleep_latency_mins | sleep_latency_mins | 1.154 | ✓ Aman |
| wake_episodes_per_night | wake_episodes_per_night | 1.101 | ✓ Aman |
| heart_rate_resting_bpm | heart_rate_resting_bpm | 1.070 | ✓ Aman |
| steps_that_day | steps_that_day | 1.092 | ✓ Aman |
| caffeine_mg_before_bed | caffeine_mg_before_bed | 1.119 | ✓ Aman |
| bmi | bmi | 1.063 | ✓ Aman |
set.seed(123)
idx <- createDataPartition(df$disorder_ord, p = .8, list = FALSE)
train <- df[idx, ]
test <- df[-idx, ]
cat("Train:", nrow(train), "| Test:", nrow(test), "\n")
## Train: 80002 | Test: 19998
lda_mod <- lda(
disorder_fct ~ sleep_duration_hrs + stress_score + rem_percentage +
deep_sleep_percentage + sleep_latency_mins +
wake_episodes_per_night + heart_rate_resting_bpm +
steps_that_day + caffeine_mg_before_bed + bmi,
data = train
)
lda_mod
## Call:
## lda(disorder_fct ~ sleep_duration_hrs + stress_score + rem_percentage +
## deep_sleep_percentage + sleep_latency_mins + wake_episodes_per_night +
## heart_rate_resting_bpm + steps_that_day + caffeine_mg_before_bed +
## bmi, data = train)
##
## Prior probabilities of groups:
## Healthy Mild Moderate Severe
## 0.54154896 0.33479163 0.08299793 0.04066148
##
## Group means:
## sleep_duration_hrs stress_score rem_percentage deep_sleep_percentage
## Healthy 6.931118 5.032824 20.81180 20.61684
## Mild 6.056926 6.328943 19.87379 20.01497
## Moderate 5.414247 7.044654 19.04586 19.42367
## Severe 4.844608 7.470089 18.28783 18.99318
## sleep_latency_mins wake_episodes_per_night heart_rate_resting_bpm
## Healthy 17.74576 2.763254 66.26283
## Mild 21.42675 3.856369 66.84185
## Moderate 23.94352 4.396235 67.01069
## Severe 26.58561 4.880725 67.47618
## steps_that_day caffeine_mg_before_bed bmi
## Healthy 7491.556 33.46105 25.36675
## Mild 7509.467 42.59147 27.01663
## Moderate 7425.834 47.87952 28.00367
## Severe 7341.761 59.86474 28.91233
##
## Coefficients of linear discriminants:
## LD1 LD2 LD3
## sleep_duration_hrs -4.481923e-01 -5.847640e-01 0.0822635533
## stress_score 3.141970e-01 -5.758674e-01 -0.2262654184
## rem_percentage -7.242110e-02 -1.169086e-01 0.0479184141
## deep_sleep_percentage -1.159923e-02 -4.083165e-02 0.0687148523
## sleep_latency_mins 6.283195e-02 3.421146e-02 0.0158277163
## wake_episodes_per_night 2.246678e-01 -1.414664e-01 0.2591849767
## heart_rate_resting_bpm -3.333400e-04 -1.377471e-03 0.0595406239
## steps_that_day 6.247955e-06 -1.693401e-05 0.0000410205
## caffeine_mg_before_bed 3.448319e-04 1.208402e-03 0.0080477557
## bmi 8.378567e-02 7.434680e-03 0.0076181726
##
## Proportion of trace:
## LD1 LD2 LD3
## 0.9940 0.0057 0.0003
prop <- round(lda_mod$svd^2 / sum(lda_mod$svd^2) * 100, 2)
kable(data.frame(LD = paste0("LD", seq_along(prop)),
`Proporsi (%)` = prop,
`Kumulatif (%)` = cumsum(prop)),
caption = "Proporsi Variansi Antar-Kelompok")
| LD | Proporsi…. | Kumulatif…. |
|---|---|---|
| LD1 | 99.40 | 99.40 |
| LD2 | 0.57 | 99.97 |
| LD3 | 0.03 | 100.00 |
scores <- data.frame(predict(lda_mod, train)$x, Disorder = train$disorder_fct)
ggplot(scores, aes(LD1, LD2, colour = Disorder)) +
geom_point(alpha = .4, size = 1.2) +
stat_ellipse(level = .75, linewidth = .8) +
labs(title = "Fungsi Diskriminan Linear (LD1 vs LD2)",
x = "LD1", y = "LD2") +
theme_minimal()
pred_lda <- predict(lda_mod, test)$class
cm_lda <- confusionMatrix(pred_lda, test$disorder_fct)
# Heatmap confusion matrix LDA
cm_lda_df <- as.data.frame(cm_lda$table)
colnames(cm_lda_df) <- c("Prediksi", "Aktual", "N")
ggplot(cm_lda_df, aes(x = Aktual, y = Prediksi, fill = N)) +
geom_tile(colour = "white", linewidth = 0.8) +
geom_text(aes(label = N), size = 5, fontface = "bold", colour = "white") +
scale_fill_gradient(low = "#d6e4f0", high = "#1a5276") +
labs(title = "Confusion Matrix – LDA",
x = "Kelas Aktual", y = "Kelas Prediksi", fill = "Jumlah") +
theme_minimal(base_size = 13) +
theme(panel.grid = element_blank(),
axis.text = element_text(size = 11))
cat("Akurasi LDA :", round(cm_lda$overall["Accuracy"] * 100, 2), "%\n")
## Akurasi LDA : 71.05 %
cat("Kappa :", round(cm_lda$overall["Kappa"], 4), "\n")
## Kappa : 0.4808
polr_mod <- polr(
disorder_ord ~ sleep_duration_hrs + stress_score + rem_percentage +
deep_sleep_percentage + sleep_latency_mins +
wake_episodes_per_night + heart_rate_resting_bpm +
steps_that_day + caffeine_mg_before_bed + bmi,
data = train, Hess = TRUE
)
summary(polr_mod)
## Call:
## polr(formula = disorder_ord ~ sleep_duration_hrs + stress_score +
## rem_percentage + deep_sleep_percentage + sleep_latency_mins +
## wake_episodes_per_night + heart_rate_resting_bpm + steps_that_day +
## caffeine_mg_before_bed + bmi, data = train, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## sleep_duration_hrs -7.936e-01 0.0070923 -111.8936
## stress_score 6.054e-01 0.0061687 98.1476
## rem_percentage -1.140e-01 0.0022153 -51.4595
## deep_sleep_percentage -1.464e-02 0.0017847 -8.2035
## sleep_latency_mins 1.049e-01 0.0012042 87.1476
## wake_episodes_per_night 3.555e-01 0.0045077 78.8615
## heart_rate_resting_bpm -3.394e-04 0.0009394 -0.3613
## steps_that_day 8.453e-06 NaN NaN
## caffeine_mg_before_bed 4.573e-04 0.0001189 3.8453
## bmi 1.431e-01 0.0018327 78.0653
##
## Intercepts:
## Value Std. Error t value
## Healthy|Mild 3.1825 0.0003 12245.2091
## Mild|Moderate 6.3691 0.0178 357.1747
## Moderate|Severe 8.1593 0.0261 313.1233
##
## Residual Deviance: 109524.28
## AIC: 109550.28
ctbl <- coef(summary(polr_mod))
pval <- pnorm(abs(ctbl[, "t value"]), lower.tail = FALSE) * 2
result <- data.frame(
Koefisien = round(ctbl[, "Value"], 4),
SE = round(ctbl[, "Std. Error"], 4),
OR = round(exp(ctbl[, "Value"]), 4),
p_value = round(pval, 4),
Sig = ifelse(pval < .001, "***",
ifelse(pval < .01, "**",
ifelse(pval < .05, "*", "")))
)
kable(result, caption = "Koefisien, Odds Ratio, dan p-value Model POLR")
| Koefisien | SE | OR | p_value | Sig | |
|---|---|---|---|---|---|
| sleep_duration_hrs | -0.7936 | 0.0071 | 0.4522 | 0.0000 | *** |
| stress_score | 0.6054 | 0.0062 | 1.8321 | 0.0000 | *** |
| rem_percentage | -0.1140 | 0.0022 | 0.8923 | 0.0000 | *** |
| deep_sleep_percentage | -0.0146 | 0.0018 | 0.9855 | 0.0000 | *** |
| sleep_latency_mins | 0.1049 | 0.0012 | 1.1107 | 0.0000 | *** |
| wake_episodes_per_night | 0.3555 | 0.0045 | 1.4269 | 0.0000 | *** |
| heart_rate_resting_bpm | -0.0003 | 0.0009 | 0.9997 | 0.7179 | |
| steps_that_day | 0.0000 | NaN | 1.0000 | NaN | NA |
| caffeine_mg_before_bed | 0.0005 | 0.0001 | 1.0005 | 0.0001 | *** |
| bmi | 0.1431 | 0.0018 | 1.1538 | 0.0000 | *** |
| Healthy|Mild | 3.1825 | 0.0003 | 24.1078 | 0.0000 | *** |
| Mild|Moderate | 6.3691 | 0.0178 | 583.5247 | 0.0000 | *** |
| Moderate|Severe | 8.1593 | 0.0261 | 3495.7304 | 0.0000 | *** |
pred_polr <- predict(polr_mod, test)
cm_polr <- confusionMatrix(pred_polr, test$disorder_ord)
# Heatmap confusion matrix POLR
cm_polr_df <- as.data.frame(cm_polr$table)
colnames(cm_polr_df) <- c("Prediksi", "Aktual", "N")
ggplot(cm_polr_df, aes(x = Aktual, y = Prediksi, fill = N)) +
geom_tile(colour = "white", linewidth = 0.8) +
geom_text(aes(label = N), size = 5, fontface = "bold", colour = "white") +
scale_fill_gradient(low = "#d5f5e3", high = "#1e8449") +
labs(title = "Confusion Matrix – POLR",
x = "Kelas Aktual", y = "Kelas Prediksi", fill = "Jumlah") +
theme_minimal(base_size = 13) +
theme(panel.grid = element_blank(),
axis.text = element_text(size = 11))
cat("Akurasi POLR :", round(cm_polr$overall["Accuracy"] * 100, 2), "%\n")
## Akurasi POLR : 71.61 %
cat("Kappa :", round(cm_polr$overall["Kappa"], 4), "\n")
## Kappa : 0.4976
res_df <- data.frame(Aktual = test$disorder_ord, Prediksi = pred_polr)
ggplot(res_df, aes(x = Aktual, fill = Prediksi)) +
geom_bar(position = "fill") +
scale_fill_brewer(palette = "RdYlGn", direction = -1) +
labs(title = "Proporsi Prediksi POLR per Kategori Aktual Sleep Disorder Risk",
x = "Disorder Risk Aktual", y = "Proporsi", fill = "Prediksi") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 15, hjust = 1))
kable(data.frame(
Metode = c("LDA","POLR"),
Target = "sleep_disorder_risk (4 kelas)",
Prediktor = "10 variabel numerik (sama)",
Akurasi = c(paste0(round(cm_lda$overall["Accuracy"]*100, 2), "%"),
paste0(round(cm_polr$overall["Accuracy"]*100, 2), "%")),
Kappa = c(round(cm_lda$overall["Kappa"], 4),
round(cm_polr$overall["Kappa"], 4))
), caption = "Perbandingan Performa Model")
| Metode | Target | Prediktor | Akurasi | Kappa |
|---|---|---|---|---|
| LDA | sleep_disorder_risk (4 kelas) | 10 variabel numerik (sama) | 71.05% | 0.4808 |
| POLR | sleep_disorder_risk (4 kelas) | 10 variabel numerik (sama) | 71.61% | 0.4976 |
Interpretasi singkat:
LDA memisahkan kelompok secara nominal
berdasarkan kombinasi linear prediktor. Variabel dengan kontribusi
terbesar pada LD1 umumnya adalah stress_score,
sleep_latency_mins, dan
rem_percentage.
POLR memanfaatkan informasi urutan kategori
(Healthy < Mild < Moderate < Severe) sehingga lebih efisien
secara parameter. OR > 1 pada stress_score dan
wake_episodes_per_night menunjukkan keduanya meningkatkan
risiko ke kategori yang lebih parah; sebaliknya
sleep_duration_hrs bersifat protektif (OR < 1).