1. Persiapan Dataset

1.1 Instalasi & Pemuatan Paket

library(MASS)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## Warning: package 'caret' was built under R version 4.5.3
## Loading required package: lattice
library(brant)
## Warning: package 'brant' was built under R version 4.5.3
library(tidyr)
library(scales)
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

1.2 Import Dataset

df <- read.csv("C:/Users/ASUS/Downloads/Sleep_health_and_lifestyle_dataset.csv", stringsAsFactors = FALSE)
cat("Dimensi dataset:", nrow(df), "baris x", ncol(df), "kolom\n")
## Dimensi dataset: 374 baris x 13 kolom

2. Pemeriksaan Struktur Data

str(df)
## 'data.frame':    374 obs. of  13 variables:
##  $ Person.ID              : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Gender                 : chr  "Male" "Male" "Male" "Male" ...
##  $ Age                    : int  27 28 28 28 28 28 29 29 29 29 ...
##  $ Occupation             : chr  "Software Engineer" "Doctor" "Doctor" "Sales Representative" ...
##  $ Sleep.Duration         : num  6.1 6.2 6.2 5.9 5.9 5.9 6.3 7.8 7.8 7.8 ...
##  $ Quality.of.Sleep       : int  6 6 6 4 4 4 6 7 7 7 ...
##  $ Physical.Activity.Level: int  42 60 60 30 30 30 40 75 75 75 ...
##  $ Stress.Level           : int  6 8 8 8 8 8 7 6 6 6 ...
##  $ BMI.Category           : chr  "Overweight" "Normal" "Normal" "Obese" ...
##  $ Blood.Pressure         : chr  "126/83" "125/80" "125/80" "140/90" ...
##  $ Heart.Rate             : int  77 75 75 85 85 85 82 70 70 70 ...
##  $ Daily.Steps            : int  4200 10000 10000 3000 3000 3000 3500 8000 8000 8000 ...
##  $ Sleep.Disorder         : chr  "None" "None" "None" "Sleep Apnea" ...
kable(head(df, 6), caption = "Enam Baris Pertama Dataset") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed"), full_width = FALSE)
Enam Baris Pertama Dataset
Person.ID Gender Age Occupation Sleep.Duration Quality.of.Sleep Physical.Activity.Level Stress.Level BMI.Category Blood.Pressure Heart.Rate Daily.Steps Sleep.Disorder
1 Male 27 Software Engineer 6.1 6 42 6 Overweight 126/83 77 4200 None
2 Male 28 Doctor 6.2 6 60 8 Normal 125/80 75 10000 None
3 Male 28 Doctor 6.2 6 60 8 Normal 125/80 75 10000 None
4 Male 28 Sales Representative 5.9 4 30 8 Obese 140/90 85 3000 Sleep Apnea
5 Male 28 Sales Representative 5.9 4 30 8 Obese 140/90 85 3000 Sleep Apnea
6 Male 28 Software Engineer 5.9 4 30 8 Obese 140/90 85 3000 Insomnia

3. Ringkasan Statistik Deskriptif

summary(df)
##    Person.ID         Gender               Age         Occupation       
##  Min.   :  1.00   Length:374         Min.   :27.00   Length:374        
##  1st Qu.: 94.25   Class :character   1st Qu.:35.25   Class :character  
##  Median :187.50   Mode  :character   Median :43.00   Mode  :character  
##  Mean   :187.50                      Mean   :42.18                     
##  3rd Qu.:280.75                      3rd Qu.:50.00                     
##  Max.   :374.00                      Max.   :59.00                     
##  Sleep.Duration  Quality.of.Sleep Physical.Activity.Level  Stress.Level  
##  Min.   :5.800   Min.   :4.000    Min.   :30.00           Min.   :3.000  
##  1st Qu.:6.400   1st Qu.:6.000    1st Qu.:45.00           1st Qu.:4.000  
##  Median :7.200   Median :7.000    Median :60.00           Median :5.000  
##  Mean   :7.132   Mean   :7.313    Mean   :59.17           Mean   :5.385  
##  3rd Qu.:7.800   3rd Qu.:8.000    3rd Qu.:75.00           3rd Qu.:7.000  
##  Max.   :8.500   Max.   :9.000    Max.   :90.00           Max.   :8.000  
##  BMI.Category       Blood.Pressure       Heart.Rate     Daily.Steps   
##  Length:374         Length:374         Min.   :65.00   Min.   : 3000  
##  Class :character   Class :character   1st Qu.:68.00   1st Qu.: 5600  
##  Mode  :character   Mode  :character   Median :70.00   Median : 7000  
##                                        Mean   :70.17   Mean   : 6817  
##                                        3rd Qu.:72.00   3rd Qu.: 8000  
##                                        Max.   :86.00   Max.   :10000  
##  Sleep.Disorder    
##  Length:374        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
num_cols <- df %>% select(where(is.numeric))
ringkasan_df <- data.frame(
  Variabel = names(num_cols),
  Min      = sapply(num_cols, min, na.rm = TRUE),
  Q1       = sapply(num_cols, quantile, 0.25, na.rm = TRUE),
  Median   = sapply(num_cols, median, na.rm = TRUE),
  Mean     = round(sapply(num_cols, mean, na.rm = TRUE), 2),
  Q3       = sapply(num_cols, quantile, 0.75, na.rm = TRUE),
  Max      = sapply(num_cols, max, na.rm = TRUE)
)
kable(ringkasan_df, caption = "Ringkasan Statistik Variabel Numerik", row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Ringkasan Statistik Variabel Numerik
Variabel Min Q1 Median Mean Q3 Max
Person.ID 1.0 94.25 187.5 187.50 280.75 374.0
Age 27.0 35.25 43.0 42.18 50.00 59.0
Sleep.Duration 5.8 6.40 7.2 7.13 7.80 8.5
Quality.of.Sleep 4.0 6.00 7.0 7.31 8.00 9.0
Physical.Activity.Level 30.0 45.00 60.0 59.17 75.00 90.0
Stress.Level 3.0 4.00 5.0 5.39 7.00 8.0
Heart.Rate 65.0 68.00 70.0 70.17 72.00 86.0
Daily.Steps 3000.0 5600.00 7000.0 6816.84 8000.00 10000.0

4. Pemeriksaan Nilai Hilang

missing_df <- data.frame(
  Kolom          = names(df),
  Jumlah_Missing = sapply(df, function(x) sum(is.na(x))),
  Persentase     = round(sapply(df, function(x) mean(is.na(x)) * 100), 2)
)
kable(missing_df, caption = "Nilai Hilang per Kolom", row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  row_spec(which(missing_df$Jumlah_Missing > 0), background = "#fff3cd")
Nilai Hilang per Kolom
Kolom Jumlah_Missing Persentase
Person.ID 0 0
Gender 0 0
Age 0 0
Occupation 0 0
Sleep.Duration 0 0
Quality.of.Sleep 0 0
Physical.Activity.Level 0 0
Stress.Level 0 0
BMI.Category 0 0
Blood.Pressure 0 0
Heart.Rate 0 0
Daily.Steps 0 0
Sleep.Disorder 0 0
ggplot(missing_df, aes(x = reorder(Kolom, Jumlah_Missing),
                        y = Jumlah_Missing, fill = Jumlah_Missing > 0)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  scale_fill_manual(values = c("steelblue","tomato")) +
  labs(title = "Jumlah Nilai Hilang per Kolom", x = "Kolom", y = "Jumlah Missing") +
  theme_minimal(base_size = 12)


5. Distribusi Variabel Target

5.1 Pembuatan Variabel Target Ordinal

Variabel Quality.of.Sleep (skala 4–9) dikonversi menjadi 3 kategori ordinal: - Rendah: skor ≤ 5 | Sedang: skor 6–7 | Tinggi: skor ≥ 8

df <- df %>%
  mutate(
    Kualitas_Tidur = case_when(
      Quality.of.Sleep <= 5 ~ "Rendah",
      Quality.of.Sleep <= 7 ~ "Sedang",
      TRUE                  ~ "Tinggi"
    ),
    Kualitas_Tidur = factor(Kualitas_Tidur,
                            levels  = c("Rendah","Sedang","Tinggi"),
                            ordered = TRUE)
  )

cat("Distribusi Kualitas Tidur:\n"); print(table(df$Kualitas_Tidur))
## Distribusi Kualitas Tidur:
## 
## Rendah Sedang Tinggi 
##     12    182    180
cat("\nProporsi (%):\n"); print(round(prop.table(table(df$Kualitas_Tidur)) * 100, 2))
## 
## Proporsi (%):
## 
## Rendah Sedang Tinggi 
##   3.21  48.66  48.13

5.2 Visualisasi Distribusi Target

freq_target <- as.data.frame(table(df$Kualitas_Tidur)) %>%
  rename(Kategori = Var1, Frekuensi = Freq) %>%
  mutate(Persen = round(Frekuensi / sum(Frekuensi) * 100, 1))

ggplot(freq_target, aes(x = Kategori, y = Frekuensi, fill = Kategori)) +
  geom_col(width = 0.6, show.legend = FALSE) +
  geom_text(aes(label = paste0(Frekuensi, "\n(", Persen, "%)")), vjust = -0.3, size = 4.5) +
  scale_fill_manual(values = c("Rendah"="#e74c3c","Sedang"="#f39c12","Tinggi"="#2ecc71")) +
  labs(title    = "Distribusi Kualitas Tidur (Variabel Target Ordinal)",
       subtitle = "Sleep Health & Lifestyle Dataset",
       x = "Kategori Kualitas Tidur", y = "Frekuensi") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))


6. Visualisasi Distribusi Prediktor

6.1 Variabel Numerik

numerik_long <- df %>%
  select(Age, Sleep.Duration, Physical.Activity.Level,
         Stress.Level, Heart.Rate, Daily.Steps) %>%
  pivot_longer(everything(), names_to = "Variabel", values_to = "Nilai")

ggplot(numerik_long, aes(x = Nilai, fill = Variabel)) +
  geom_histogram(bins = 20, color = "white", alpha = 0.85, show.legend = FALSE) +
  facet_wrap(~ Variabel, scales = "free", ncol = 3) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Distribusi Variabel Numerik Prediktor", x = "Nilai", y = "Frekuensi") +
  theme_minimal(base_size = 11) +
  theme(plot.title = element_text(face = "bold"))

6.2 Variabel Kategorik vs Target

p1 <- ggplot(df, aes(x = BMI.Category, fill = Kualitas_Tidur)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_manual(values = c("Rendah"="#e74c3c","Sedang"="#f39c12","Tinggi"="#2ecc71")) +
  labs(title = "BMI vs Kualitas Tidur", x = "BMI Category",
       y = "Proporsi", fill = "Kualitas Tidur") +
  theme_minimal(base_size = 11) +
  theme(axis.text.x = element_text(angle = 20, hjust = 1))

p2 <- ggplot(df, aes(x = Gender, fill = Kualitas_Tidur)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_manual(values = c("Rendah"="#e74c3c","Sedang"="#f39c12","Tinggi"="#2ecc71")) +
  labs(title = "Gender vs Kualitas Tidur", x = "Gender",
       y = "Proporsi", fill = "Kualitas Tidur") +
  theme_minimal(base_size = 11)

grid.arrange(p1, p2, ncol = 2)


7. Membangun Model Regresi Logistik Ordinal

7.1 Validasi Kolom & Persiapan Data

kolom_penting <- c("Age","Sleep.Duration","Physical.Activity.Level",
                   "Stress.Level","Heart.Rate","Daily.Steps",
                   "BMI.Category","Gender","Kualitas_Tidur")

cat("Pengecekan kolom penting:\n")
## Pengecekan kolom penting:
for (kol in kolom_penting) {
  ada <- kol %in% names(df)
  cat(sprintf("  %-35s : %s\n", kol, ifelse(ada, "Ada", "Tidak Ada")))
}
##   Age                                 : Ada
##   Sleep.Duration                      : Ada
##   Physical.Activity.Level             : Ada
##   Stress.Level                        : Ada
##   Heart.Rate                          : Ada
##   Daily.Steps                         : Ada
##   BMI.Category                        : Ada
##   Gender                              : Ada
##   Kualitas_Tidur                      : Ada
df_model <- df %>%
  select(all_of(kolom_penting)) %>%
  mutate(BMI.Category = as.factor(BMI.Category), Gender = as.factor(Gender)) %>%
  drop_na()

cat("\nJumlah observasi untuk pemodelan:", nrow(df_model), "\n")
## 
## Jumlah observasi untuk pemodelan: 374

7.2 Split Data 80:20

set.seed(42)
idx_train  <- createDataPartition(df_model$Kualitas_Tidur, p = 0.8, list = FALSE)
data_train <- df_model[ idx_train, ]
data_test  <- df_model[-idx_train, ]

cat("Training:", nrow(data_train), "| Testing:", nrow(data_test), "\n")
## Training: 300 | Testing: 74

7.3 Scaling & Fit Model polr()

Catatan pemilihan prediktor: Berdasarkan pemeriksaan tabel silang, variabel Sleep.Duration dan Stress.Level mengalami perfect separation terhadap variabel target — artinya nilai-nilai tertentu dari kedua variabel ini secara sempurna memisahkan kategori Kualitas Tidur tanpa tumpang tindih. Kondisi ini menyebabkan koefisien divergen ke tak hingga sehingga algoritma optimasi polr() gagal konvergen. Oleh karena itu, model dibangun menggunakan 4 prediktor yang stabil: Age, Physical.Activity.Level, Heart.Rate, dan Daily.Steps. BMI.Category juga dikeluarkan karena quasi-complete separation (hanya 1 observasi pada kombinasi Tidak_Normal + Sedang).

cols_scale <- c("Age", "Sleep.Duration", "Physical.Activity.Level",
                "Stress.Level", "Heart.Rate", "Daily.Steps")

# Hitung mean & SD dari data training
means <- sapply(data_train[, cols_scale], mean, na.rm = TRUE)
sds   <- sapply(data_train[, cols_scale], sd,   na.rm = TRUE)

cat("=== SDs ===\n"); print(sds)
## === SDs ===
##                     Age          Sleep.Duration Physical.Activity.Level 
##               8.6087217               0.7948976              20.5125516 
##            Stress.Level              Heart.Rate             Daily.Steps 
##               1.7689884               4.2942120            1639.4276335
# Hapus kolom dengan SD = 0
cols_scale <- cols_scale[sds > 0]
means      <- means[cols_scale]
sds        <- sds[cols_scale]

# Scale manual
data_train_scaled <- data_train
data_test_scaled  <- data_test

for (col in cols_scale) {
  data_train_scaled[[col]] <- (data_train[[col]] - means[col]) / sds[col]
  data_test_scaled[[col]]  <- (data_test[[col]]  - means[col]) / sds[col]
}

# Buang baris dengan nilai tidak finite
data_train_scaled <- data_train_scaled %>%
  filter(if_all(where(is.numeric), ~ is.finite(.x)))
data_test_scaled <- data_test_scaled %>%
  filter(if_all(where(is.numeric), ~ is.finite(.x)))

cat("Train rows setelah cleaning:", nrow(data_train_scaled), "\n")
## Train rows setelah cleaning: 300
cat("Test rows setelah cleaning:",  nrow(data_test_scaled),  "\n")
## Test rows setelah cleaning: 74
# Pastikan faktor tidak memiliki level kosong
data_train_scaled$BMI.Category <- droplevels(data_train_scaled$BMI.Category)
data_train_scaled$Gender       <- droplevels(data_train_scaled$Gender)
data_test_scaled$BMI.Category  <- droplevels(data_test_scaled$BMI.Category)
data_test_scaled$Gender        <- droplevels(data_test_scaled$Gender)

# Fit model dengan 4 prediktor stabil
# Sleep.Duration & Stress.Level dikeluarkan karena perfect separation
# BMI.Category dikeluarkan karena quasi-complete separation
model_olr <- polr(
  Kualitas_Tidur ~ Age + Physical.Activity.Level + Heart.Rate + Daily.Steps,
  data   = data_train_scaled,
  Hess   = TRUE,
  method = "logistic"
)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(model_olr)
## Call:
## polr(formula = Kualitas_Tidur ~ Age + Physical.Activity.Level + 
##     Heart.Rate + Daily.Steps, data = data_train_scaled, Hess = TRUE, 
##     method = "logistic")
## 
## Coefficients:
##                           Value Std. Error t value
## Age                      0.5726     0.1747   3.277
## Physical.Activity.Level  2.0255     0.3228   6.276
## Heart.Rate              -2.2097     0.2383  -9.274
## Daily.Steps             -1.0344     0.2767  -3.739
## 
## Intercepts:
##               Value   Std. Error t value
## Rendah|Sedang -6.3409  0.6909    -9.1781
## Sedang|Tinggi  0.3889  0.1764     2.2040
## 
## Residual Deviance: 283.6826 
## AIC: 295.6826

8. Uji Signifikansi Koefisien (p-value)

ctable <- coef(summary(model_olr))
p_val  <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
hasil_koef <- cbind(ctable, `p-value` = round(p_val, 4))

kable(as.data.frame(hasil_koef), caption = "Koefisien Model dengan p-value", digits = 4) %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  row_spec(which(p_val < 0.05), background = "#d4edda")
Koefisien Model dengan p-value
Value Std. Error t value p-value
Age 0.5726 0.1747 3.2775 0.0010
Physical.Activity.Level 2.0255 0.3228 6.2757 0.0000
Heart.Rate -2.2097 0.2383 -9.2741 0.0000
Daily.Steps -1.0344 0.2767 -3.7388 0.0002
Rendah&#124;Sedang -6.3409 0.6909 -9.1781 0.0000
Sedang&#124;Tinggi 0.3889 0.1764 2.2040 0.0275
# Pisahkan koefisien prediktor dari intercept
all_coef <- coef(summary(model_olr))
# Ambil hanya baris prediktor (bukan intercept Rendah|Sedang & Sedang|Tinggi)
pred_names <- names(coef(model_olr))  # hanya 4 prediktor

coef_vals <- coef(model_olr)                          # 4 nilai
se_vals   <- sqrt(diag(vcov(model_olr))[pred_names])  # 4 SE yang sesuai

or_wald <- data.frame(
  OR       = round(exp(coef_vals), 4),
  CI_Lower = round(exp(coef_vals - 1.96 * se_vals), 4),
  CI_Upper = round(exp(coef_vals + 1.96 * se_vals), 4)
)

kable(or_wald, caption = "Odds Ratio & CI 95% (Metode Wald)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Odds Ratio & CI 95% (Metode Wald)
OR CI_Lower CI_Upper
Age 1.7728 1.2588 2.4967
Physical.Activity.Level 7.5801 4.0266 14.2698
Heart.Rate 0.1097 0.0688 0.1750
Daily.Steps 0.3555 0.2067 0.6113

9. Uji Asumsi Proportional Odds (Brant Test)

# H0: asumsi proportional odds terpenuhi (p-value > 0.05 = OK)
tryCatch(
  print(brant(model_olr)),
  error   = function(e) cat("Brant test error:", conditionMessage(e), "\n"),
  warning = function(w) cat("Brant test warning:", conditionMessage(w), "\n")
)
## Brant test warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

10. Prediksi & Evaluasi Model

# Gunakan data_test_scaled
pred_kelas <- predict(model_olr, newdata = data_test_scaled, type = "class")
pred_prob  <- predict(model_olr, newdata = data_test_scaled, type = "probs")

hasil_pred <- data.frame(
  Aktual      = data_test_scaled$Kualitas_Tidur,
  Prediksi    = pred_kelas,
  Prob_Rendah = round(pred_prob[, "Rendah"], 3),
  Prob_Sedang = round(pred_prob[, "Sedang"], 3),
  Prob_Tinggi = round(pred_prob[, "Tinggi"], 3)
)

kable(head(hasil_pred, 10), caption = "10 Hasil Prediksi Pertama", row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
10 Hasil Prediksi Pertama
Aktual Prediksi Prob_Rendah Prob_Sedang Prob_Tinggi
Sedang Sedang 0.002 0.584 0.414
Sedang Sedang 0.126 0.866 0.008
Sedang Sedang 0.126 0.866 0.008
Sedang Sedang 0.126 0.866 0.008
Sedang Sedang 0.002 0.568 0.430
Sedang Sedang 0.002 0.568 0.430
Rendah Sedang 0.301 0.696 0.003
Sedang Sedang 0.050 0.928 0.022
Sedang Sedang 0.001 0.552 0.447
Sedang Sedang 0.001 0.535 0.463
cm <- confusionMatrix(pred_kelas, data_test_scaled$Kualitas_Tidur)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Rendah Sedang Tinggi
##     Rendah      0      0      0
##     Sedang      2     30      7
##     Tinggi      0      6     29
## 
## Overall Statistics
##                                           
##                Accuracy : 0.7973          
##                  95% CI : (0.6878, 0.8819)
##     No Information Rate : 0.4865          
##     P-Value [Acc > NIR] : 3.721e-08       
##                                           
##                   Kappa : 0.6053          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Rendah Class: Sedang Class: Tinggi
## Sensitivity                0.00000        0.8333        0.8056
## Specificity                1.00000        0.7632        0.8421
## Pos Pred Value                 NaN        0.7692        0.8286
## Neg Pred Value             0.97297        0.8286        0.8205
## Prevalence                 0.02703        0.4865        0.4865
## Detection Rate             0.00000        0.4054        0.3919
## Detection Prevalence       0.00000        0.5270        0.4730
## Balanced Accuracy          0.50000        0.7982        0.8238

11. Visualisasi Confusion Matrix

cm_tabel <- as.data.frame(cm$table) %>%
  rename(Prediksi = Prediction, Aktual = Reference, Frekuensi = Freq)

ggplot(cm_tabel, aes(x = Aktual, y = Prediksi, fill = Frekuensi)) +
  geom_tile(color = "white", linewidth = 0.8) +
  geom_text(aes(label = Frekuensi), size = 7, fontface = "bold", color = "white") +
  scale_fill_gradient(low = "#aed6f1", high = "#1a5276") +
  labs(title    = "Confusion Matrix - Regresi Logistik Ordinal",
       subtitle = "Prediksi vs Aktual pada Data Testing",
       x = "Kategori Aktual", y = "Kategori Prediksi", fill = "Frekuensi") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))


12. Akurasi & Metrik Evaluasi

akurasi <- cm$overall["Accuracy"]
kappa   <- cm$overall["Kappa"]
cat(sprintf("Akurasi Model : %.4f (%.2f%%)\n", akurasi, akurasi * 100))
## Akurasi Model : 0.7973 (79.73%)
cat(sprintf("Cohen's Kappa : %.4f\n", kappa))
## Cohen's Kappa : 0.6053
metrik_kelas <- as.data.frame(cm$byClass) %>%
  mutate(across(where(is.numeric), ~ round(.x, 4)))

kable(metrik_kelas, caption = "Metrik Evaluasi per Kelas") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Metrik Evaluasi per Kelas
Sensitivity Specificity Pos Pred Value Neg Pred Value Precision Recall F1 Prevalence Detection Rate Detection Prevalence Balanced Accuracy
Class: Rendah 0.0000 1.0000 NaN 0.9730 NA 0.0000 NA 0.0270 0.0000 0.000 0.5000
Class: Sedang 0.8333 0.7632 0.7692 0.8286 0.7692 0.8333 0.8000 0.4865 0.4054 0.527 0.7982
Class: Tinggi 0.8056 0.8421 0.8286 0.8205 0.8286 0.8056 0.8169 0.4865 0.3919 0.473 0.8238
metrik_plot <- data.frame(
  Kelas       = rownames(cm$byClass),
  Sensitivity = cm$byClass[, "Sensitivity"],
  Specificity = cm$byClass[, "Specificity"],
  F1_Score    = cm$byClass[, "F1"]
) %>% pivot_longer(-Kelas, names_to = "Metrik", values_to = "Nilai")

ggplot(metrik_plot, aes(x = Kelas, y = Nilai, fill = Metrik)) +
  geom_col(position = "dodge", width = 0.65) +
  geom_text(aes(label = round(Nilai, 2)),
            position = position_dodge(0.65), vjust = -0.4, size = 3.5) +
  scale_fill_brewer(palette = "Set2") +
  scale_y_continuous(limits = c(0, 1.1), labels = percent_format()) +
  labs(title    = "Metrik Evaluasi per Kelas",
       subtitle = "Sensitivity, Specificity, dan F1-Score",
       x = "Kelas", y = "Nilai", fill = "Metrik") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_col()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).


13. Kesimpulan

## === RINGKASAN HASIL ANALISIS ===
## Dataset  : Sleep Health & Lifestyle Dataset
## Target   : Kualitas Tidur (Ordinal: Rendah < Sedang < Tinggi)
## Metode   : Proportional Odds Logistic Regression (POLR)
## Paket R  : MASS::polr()
## 
## Prediktor dalam Model (4 variabel):
##   Age, Physical.Activity.Level, Heart.Rate, Daily.Steps
## 
## Prediktor Dikeluarkan:
##   Sleep.Duration  -> perfect separation
##   Stress.Level    -> perfect separation
##   BMI.Category    -> quasi-complete separation
## 
## Ukuran Data
##   Training : 300 observasi (80%)
##   Testing  : 74 observasi (20%)
## 
## Hasil Evaluasi
##   Akurasi       : 79.73%
##   Cohen's Kappa : 0.6053

Dataset: Sleep Health and Lifestyle - Kaggle