Dataset yang digunakan bersumber dari Kaggle dan berisi informasi gaya hidup dan kesehatan individu, seperti aktivitas fisik, tekanan darah, serta gangguan tidur. Tujuan analisis ini adalah mengklasifikasi jenis gangguan tidur berdasarkan faktor gaya hidup.
library(DT)
## Warning: package 'DT' was built under R version 4.4.3
df <- read.csv("Sleep_health_and_lifestyle_dataset.csv")
datatable(df)
cat_cols <- sapply(df, function(x) is.factor(x) || is.character(x))
for (col_name in names(df)[cat_cols]) {
cat("Kolom:", col_name, "\n")
print(table(df[[col_name]]))
cat("\n---------------------\n\n")
}
## Kolom: Gender
##
## Female Male
## 185 189
##
## ---------------------
##
## Kolom: Occupation
##
## Accountant Doctor Engineer
## 37 71 63
## Lawyer Manager Nurse
## 47 1 73
## Sales Representative Salesperson Scientist
## 2 32 4
## Software Engineer Teacher
## 4 40
##
## ---------------------
##
## Kolom: BMI.Category
##
## Normal Normal Weight Obese Overweight
## 195 21 10 148
##
## ---------------------
##
## Kolom: Blood.Pressure
##
## 115/75 115/78 117/76 118/75 118/76 119/77 120/80 121/79 122/80 125/80 125/82
## 32 2 2 2 1 2 45 1 1 65 4
## 126/83 128/84 128/85 129/84 130/85 130/86 131/86 132/87 135/88 135/90 139/91
## 2 2 3 2 99 2 2 3 2 27 2
## 140/90 140/95 142/92
## 4 65 2
##
## ---------------------
##
## Kolom: Sleep.Disorder
##
## Insomnia None Sleep Apnea
## 77 219 78
##
## ---------------------
missing_counts <- colSums(is.na(df))
print(missing_counts)
## Person.ID Gender Age
## 0 0 0
## Occupation Sleep.Duration Quality.of.Sleep
## 0 0 0
## Physical.Activity.Level Stress.Level BMI.Category
## 0 0 0
## Blood.Pressure Heart.Rate Daily.Steps
## 0 0 0
## Sleep.Disorder
## 0
Person ID
)df$Person.ID <- NULL
Pada kolom BMI Category
terdapat nilai yang tidak
konsisten, yaitu Normal
dan Normal Weight
.
Nilai Normal Weight
diubah menjadi Normal
agar
konsisten.
df$BMI.Category <- ifelse(df$BMI.Category == "Normal Weight", "Normal", df$BMI.Category)
Blood Pressure
menjadi
BP_Systolic
dan BP_Diastolic
Tipe data pada kolom Blood Pressure
merupakan
kategorikal yang dimana berupa angka untuk merepresentasikan tekanan
darah dalam sistolik dan diastolik. Pada data baris pertama yaitu
126/83
, dimana nilai 126 adalah nilai tekanan darah
sistolik dan nilai 83 adalah nilai tekanan darah diastolik. Nilai ini
kemudian akan dipecah menjadi kolom BP_Systolic
dan
BP_Diastolic
agar menjadi tipe data numerik.
bp_split <- strsplit(as.character(df$Blood.Pressure), "/")
df$BP_Systolic <- as.numeric(sapply(bp_split, `[`, 1))
df$BP_Diastolic <- as.numeric(sapply(bp_split, `[`, 2))
df$Blood.Pressure <- NULL
sleep_col <- df$Sleep.Disorder
df$Sleep.Disorder <- NULL
df$Sleep.Disorder <- sleep_col
numeric_cols <- c("Age", "Sleep.Duration", "Physical.Activity.Level", "Heart.Rate", "Daily.Steps", "BP_Systolic", "BP_Diastolic")
par(mfrow = c(2, ceiling(length(numeric_cols) / 2)))
for (col in numeric_cols) {
boxplot(df[[col]], main = col, col = "skyblue", horizontal = TRUE)
}
par(mfrow = c(1,1))
detect_outliers <- function(x) {
z <- scale(x)
which(abs(z) > 3)
}
outlier_index <- list()
for (var in numeric_cols) {
outlier_index[[var]] <- detect_outliers(df[[var]])
cat(var, ":", length(outlier_index[[var]]), "outlier\n")
}
## Age : 0 outlier
## Sleep.Duration : 0 outlier
## Physical.Activity.Level : 0 outlier
## Heart.Rate : 9 outlier
## Daily.Steps : 0 outlier
## BP_Systolic : 0 outlier
## BP_Diastolic : 0 outlier
Outlier diimputasi menggunakan nilai batas bawah atau batas atas
replace_outlier_with_bound <- function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR_val <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR_val
upper_bound <- Q3 + 1.5 * IQR_val
x[x < lower_bound] <- lower_bound
x[x > upper_bound] <- upper_bound
return(x)
}
numeric_cols <- c("Age", "Sleep.Duration", "Physical.Activity.Level", "Heart.Rate", "Daily.Steps", "BP_Systolic", "BP_Diastolic")
df[numeric_cols] <- lapply(df[numeric_cols], replace_outlier_with_bound)
# Pastikan library sudah terpasang
if (!require(car)) install.packages("car")
## Loading required package: car
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
library(car)
# Copy kolom numerik
numeric_data <- df[numeric_cols]
# Pastikan tidak ada nilai ≤ 0 (Box-Cox butuh nilai positif)
for (col in names(numeric_data)) {
if (any(numeric_data[[col]] <= 0)) {
shift <- abs(min(numeric_data[[col]], na.rm = TRUE)) + 0.01
numeric_data[[col]] <- numeric_data[[col]] + shift
}
}
# Lakukan powerTransform
boxcox_model <- powerTransform(numeric_data)
# Ambil nilai lambda untuk masing-masing kolom
lambdas <- boxcox_model$lambda
# Terapkan transformasi manual
for (col in names(numeric_data)) {
lambda <- lambdas[col]
if (abs(lambda) < 1e-6) {
numeric_data[[col]] <- log(numeric_data[[col]])
} else {
numeric_data[[col]] <- (numeric_data[[col]]^lambda - 1) / lambda
}
}
# Standardisasi hasil transformasi
numeric_data <- as.data.frame(scale(numeric_data))
# Masukkan ke df
df[numeric_cols] <- numeric_data
Beberapa kolom kategorikal perlu diubah menjadi representasi numerik
agar bisa digunakan dalam model. One Hot Encoding dilakukan pada kolom
Gender
, Occupation
, dan
BMI Category
, dengan baseline yang dijadikan referensi
sebagai berikut:
Gender
:Female
Occupation
: Software Engineer
BMI Category
: Normal
df$Gender <- factor(df$Gender, levels = c("Female", "Male"))
df$Occupation <- factor(df$Occupation, levels = c("Software Engineer", sort(unique(df$Occupation))[sort(unique(df$Occupation)) != "Software Engineer"]))
df$BMI.Category <- factor(df$BMI.Category, levels = c("Normal", "Overweight", "Obese"))
gender_dummies <- model.matrix(~ Gender, data = df)[, -1, drop = FALSE]
occupation_dummies <- model.matrix(~ Occupation, data = df)[, -1, drop = FALSE]
bmi_dummies <- model.matrix(~ BMI.Category, data = df)[, -1, drop = FALSE]
colnames(gender_dummies) <- gsub("Gender", "Gender_", colnames(gender_dummies))
colnames(occupation_dummies) <- gsub("Occupation", "Occupation_", colnames(occupation_dummies))
colnames(bmi_dummies) <- gsub("BMI.Category", "BMI.Category_", colnames(bmi_dummies))
colnames(gender_dummies) <- gsub("[() ]", "", colnames(gender_dummies))
colnames(occupation_dummies) <- gsub("[() ]", "", colnames(occupation_dummies))
colnames(bmi_dummies) <- gsub("[() ]", "", colnames(bmi_dummies))
df$Gender <- NULL
df$Occupation <- NULL
df$BMI.Category <- NULL
# Gabung ke df utama
df <- cbind(df, gender_dummies, occupation_dummies, bmi_dummies)
# atur urutan kolom
new_order <- c(
"Age",
"Gender_Male",
grep("^Occupation_", names(df), value = TRUE),
grep("^BMI.Category_", names(df), value = TRUE),
"Sleep.Duration",
"Quality.of.Sleep",
"Physical.Activity.Level",
"Stress.Level",
"Heart.Rate",
"Daily.Steps",
"BP_Systolic",
"BP_Diastolic",
"Sleep.Disorder"
)
cols_existing <- names(df)
cols_to_keep <- intersect(new_order, cols_existing)
cols_leftover <- setdiff(cols_existing, cols_to_keep)
df <- df[, c(cols_to_keep, cols_leftover)]
Kolom Quality of Sleep
dan Stress Level
menunjukan rating skala 1-10 yang menunjukan bahwa variabel tersebut
merupakan numerik ordinal. Maka dari itu perlu dilakukan ordered
factorization agar model dapat memahami urutan atau hierarki nilai dalam
variabel tersebut dan tidak memperlakukannya sebagai variabel numerik
biasa.
df$Quality.of.Sleep <- factor(df$Quality.of.Sleep,
ordered = TRUE,
levels = as.character(1:10))
df$Stress.Level <- factor(df$Stress.Level,
ordered = TRUE,
levels = as.character(1:10))
Sleep Disorder
Label kelas Sleep Disorder
dikonversi dari bentuk string
ke dalam tipe data faktor (unordered) agar algoritma klasifikasi seperti
Analisis Diskriminan Linear (LDA) dan Regresi Multinomial dapat
mengenali bahwa target merupakan kelas diskrit (nominal), bukan data
numerik berurutan (ordinal).
df$Sleep.Disorder <- factor(df$Sleep.Disorder, levels = c("None", "Sleep Apnea", "Insomnia"))
table(df$Sleep.Disorder)
##
## None Sleep Apnea Insomnia
## 219 78 77
barplot(table(df$Sleep.Disorder), col = "steelblue", main = "Distribusi Sleep Disorder")
library(corrplot)
## corrplot 0.95 loaded
# Ambil kolom numerik dan exclude kolom hasil one-hot encoding kategori
exclude_prefixes <- c("Gender_", "Occupation_", "BMI.Category_")
numeric_df <- df[sapply(df, is.numeric)]
numeric_df <- numeric_df[ , !grepl(paste0("^(", paste(exclude_prefixes, collapse = "|"), ")"), names(numeric_df))]
# Plot korelasi
corrplot::corrplot(cor(numeric_df), method = "color", tl.cex = 1, addCoef.col = "black", number.cex = 0.7)
Berdasarkan hasil visualisasi korelasi, ditemukan bahwa
BP_Systolic
dan BP_Diastolic
memiliki korelasi
tinggi (>0.9). Untuk menghindari masalah multikolinearitas, variabel
BP_Diastolic
dihapus dari dataset.
df$BP_Diastolic <- NULL
datatable(df)
if (!require(MVN)) install.packages("MVN")
## Loading required package: MVN
## Warning: package 'MVN' was built under R version 4.4.3
library(MVN)
numeric_cols <- c("Age", "Sleep.Duration", "Physical.Activity.Level", "Heart.Rate", "Daily.Steps", "BP_Systolic")
mvn(df[numeric_cols], mvnTest = "hz", univariateTest = "AD", multivariatePlot = "qq")
## $multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 20.3467 0 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling Age 4.1338 <0.001 NO
## 2 Anderson-Darling Sleep.Duration 8.6259 <0.001 NO
## 3 Anderson-Darling Physical.Activity.Level 11.3200 <0.001 NO
## 4 Anderson-Darling Heart.Rate 8.4324 <0.001 NO
## 5 Anderson-Darling Daily.Steps 8.5924 <0.001 NO
## 6 Anderson-Darling BP_Systolic 12.4378 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max
## Age 374 1.733384e-16 1 0.08369132 -1.722641 1.960770
## Sleep.Duration 374 2.304759e-15 1 0.19518123 -1.908123 1.519860
## Physical.Activity.Level 374 -4.539444e-18 1 0.04658604 -1.408226 1.472851
## Heart.Rate 374 -9.124020e-13 1 0.07757837 -1.496826 2.054074
## Daily.Steps 374 -2.800718e-16 1 0.20909022 -3.042675 1.714637
## BP_Systolic 374 1.715243e-16 1 0.05736587 -1.486531 2.005498
## 25th 75th Skew Kurtosis
## Age -0.8016912 0.8975289 0.28635006 -0.90704215
## Sleep.Duration -0.8922614 0.8622981 -0.19592632 -1.30632929
## Physical.Activity.Level -0.6761419 0.7624358 0.05880317 -1.27502654
## Heart.Rate -0.5152214 0.6268908 0.18999836 -0.64324618
## Daily.Steps -0.6902665 0.7627501 -0.34217946 -0.07328643
## BP_Systolic -0.5512504 0.7782190 0.37610587 -0.90691183
Hasil ini menunjukkan bahwa p-value<0.05 maka tolak H0, artinya populasi tidak berdistribusi normal multivariat, sehingga asumsi normal multivariat tidak terpenuhi
library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## Loading required package: MASS
## ---
## biotools version 4.3
box_m_result <- boxM(df[, numeric_cols], grouping = df$Sleep.Disorder)
print(box_m_result)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df[, numeric_cols]
## Chi-Sq (approx.) = 645.43, df = 42, p-value < 2.2e-16
Hasil ini menunjukkan bahwa p-value<0.05 maka tolak H0, artinya varians tidak homogen, sehingga asumsi homogenitas varians tidak terpenuhi
if (!require(MASS)) install.packages("MASS")
if (!require(caret)) install.packages("caret")
## Loading required package: caret
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
library(MASS)
library(caret)
set.seed(123)
train_index <- createDataPartition(df$Sleep.Disorder, p = 0.8, list = FALSE)
train_data <- df[train_index, ]
test_data <- df[-train_index, ]
lda_model <- lda(Sleep.Disorder ~ ., data = train_data[, c(numeric_cols, "Sleep.Disorder")])
print(lda_model)
## Call:
## lda(Sleep.Disorder ~ ., data = train_data[, c(numeric_cols, "Sleep.Disorder")])
##
## Prior probabilities of groups:
## None Sleep Apnea Insomnia
## 0.5847176 0.2093023 0.2059801
##
## Group means:
## Age Sleep.Duration Physical.Activity.Level Heart.Rate
## None -0.3515528 0.2797449 -0.08764367 -0.2565066
## Sleep Apnea 0.9300082 -0.1180294 0.76747289 0.6739888
## Insomnia 0.1407434 -0.6447329 -0.58068690 0.0114702
## Daily.Steps BP_Systolic
## None 0.001669081 -0.5858744
## Sleep Apnea 0.308105957 1.3568792
## Insomnia -0.526112582 0.3655702
##
## Coefficients of linear discriminants:
## LD1 LD2
## Age 0.4865472 0.14918642
## Sleep.Duration -0.2180813 -1.17156328
## Physical.Activity.Level 0.1822814 -0.00365290
## Heart.Rate 0.4131441 -0.78088041
## Daily.Steps -0.1577286 -0.58318167
## BP_Systolic 1.0857644 -0.02340594
##
## Proportion of trace:
## LD1 LD2
## 0.8571 0.1429
lda_pred <- predict(lda_model, newdata = test_data[, numeric_cols])
# Confusion matrix
conf_matrix <- confusionMatrix(as.factor(lda_pred$class), as.factor(test_data$Sleep.Disorder))
print(conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction None Sleep Apnea Insomnia
## None 41 3 1
## Sleep Apnea 0 11 1
## Insomnia 2 1 13
##
## Overall Statistics
##
## Accuracy : 0.8904
## 95% CI : (0.7954, 0.9515)
## No Information Rate : 0.589
## P-Value [Acc > NIR] : 1.509e-08
##
## Kappa : 0.8036
##
## Mcnemar's Test P-Value : 0.343
##
## Statistics by Class:
##
## Class: None Class: Sleep Apnea Class: Insomnia
## Sensitivity 0.9535 0.7333 0.8667
## Specificity 0.8667 0.9828 0.9483
## Pos Pred Value 0.9111 0.9167 0.8125
## Neg Pred Value 0.9286 0.9344 0.9649
## Prevalence 0.5890 0.2055 0.2055
## Detection Rate 0.5616 0.1507 0.1781
## Detection Prevalence 0.6164 0.1644 0.2192
## Balanced Accuracy 0.9101 0.8580 0.9075
Untuk mengetahui variabel mana yang berpengaruh terhadap gangguan tidur (Sleep Disorder), salah satu caranya adalah dengan melihat plot antara fungsi diskriminan.
plot(lda_model, col = as.integer(train_data[, c(numeric_cols, "Sleep.Disorder")]$Sleep.Disorder))
Beberapa asumsi yang harus dipenuhi dalam regresi multinomial antara lain:
Tidak membutuhkan hubungan linear antara variabel independen dengan variabel dependen
Variabel independen tidak memerlukan asumsi multivariate normality
Asumsi homoskedastisitas tidak diperlukan
Variabel independen tidak harus memiliki keragaman yang sama antar kelompok variabel
Tidak ada outlier
Berdasarkan dari beberapa asumsi yang disebutkan data penelitian kami telah memenuhi asumsi yang diperlukan karena outlier sudah diatasi di bagian preprocessing.
set.seed(42)
# Hitung 80% data buat training
sample_size <- floor(0.8 * nrow(df))
# Ambil indeks acak buat training
train_indices <- sample(seq_len(nrow(df)), size = sample_size)
# Pisah data training dan testing
train_data <- df[train_indices, ]
test_data <- df[-train_indices, ]
# Pastikan package nnet ada dan di-load
if (!require(nnet)) install.packages("nnet")
## Loading required package: nnet
library(nnet)
# Bangun model multinomial
model <- multinom(Sleep.Disorder ~ Age + Gender_Male + Occupation_Accountant + Occupation_Doctor + Occupation_Engineer +
Occupation_Lawyer + Occupation_Manager + Occupation_Nurse + Occupation_SalesRepresentative + Occupation_Salesperson +
Occupation_Scientist + Occupation_Teacher + BMI.Category_Overweight + BMI.Category_Obese + Sleep.Duration +
Quality.of.Sleep + Physical.Activity.Level + Stress.Level + Heart.Rate + Daily.Steps + BP_Systolic,
data = train_data)
## # weights: 117 (76 variable)
## initial value 328.485074
## iter 10 value 104.661088
## iter 20 value 68.841189
## iter 30 value 63.557038
## iter 40 value 60.635043
## iter 50 value 59.510889
## iter 60 value 59.249919
## iter 70 value 59.132934
## iter 80 value 59.115343
## iter 90 value 59.115054
## iter 90 value 59.115053
## iter 90 value 59.115053
## final value 59.115053
## converged
# Cek ringkasan model (optional)
summary(model)
## Warning in sqrt(diag(vc)): NaNs produced
## Call:
## multinom(formula = Sleep.Disorder ~ Age + Gender_Male + Occupation_Accountant +
## Occupation_Doctor + Occupation_Engineer + Occupation_Lawyer +
## Occupation_Manager + Occupation_Nurse + Occupation_SalesRepresentative +
## Occupation_Salesperson + Occupation_Scientist + Occupation_Teacher +
## BMI.Category_Overweight + BMI.Category_Obese + Sleep.Duration +
## Quality.of.Sleep + Physical.Activity.Level + Stress.Level +
## Heart.Rate + Daily.Steps + BP_Systolic, data = train_data)
##
## Coefficients:
## (Intercept) Age Gender_Male Occupation_Accountant
## Sleep Apnea -100.84644 -3.927069 -132.88398 52.9148
## Insomnia -57.02461 -6.381880 -41.44134 461.1708
## Occupation_Doctor Occupation_Engineer Occupation_Lawyer
## Sleep Apnea 267.26067 66.24596 -85.42297
## Insomnia 29.50959 156.89062 -140.86315
## Occupation_Manager Occupation_Nurse Occupation_SalesRepresentative
## Sleep Apnea -77.35975 106.8939 209.5922
## Insomnia -12.49881 -250.4093 -217.3117
## Occupation_Salesperson Occupation_Scientist Occupation_Teacher
## Sleep Apnea -23.33829 113.83429 223.27613
## Insomnia -213.15430 -64.77421 -34.41632
## BMI.Category_Overweight BMI.Category_Obese Sleep.Duration
## Sleep Apnea -10.39870 709.9247 -4.354337
## Insomnia -31.56327 586.6587 -6.894041
## Quality.of.Sleep.L Quality.of.Sleep.Q Quality.of.Sleep.C
## Sleep Apnea 29.269386 151.51119 109.85432
## Insomnia 8.415312 52.58519 48.02113
## Quality.of.Sleep^4 Quality.of.Sleep^5 Quality.of.Sleep^6
## Sleep Apnea -157.41378 -312.6801 -156.4284
## Insomnia -38.19799 -149.0627 -108.6535
## Quality.of.Sleep^7 Quality.of.Sleep^8 Quality.of.Sleep^9
## Sleep Apnea 34.68666 118.1819 228.20499
## Insomnia 116.58519 202.4417 -13.83074
## Physical.Activity.Level Stress.Level.L Stress.Level.Q
## Sleep Apnea -75.54054 134.1100 -79.633580
## Insomnia -139.17620 128.8959 -8.270823
## Stress.Level.C Stress.Level^4 Stress.Level^5 Stress.Level^6
## Sleep Apnea -199.0620 240.34769 63.95358 -210.02249
## Insomnia -209.6911 35.18554 126.57351 22.78636
## Stress.Level^7 Stress.Level^8 Stress.Level^9 Heart.Rate Daily.Steps
## Sleep Apnea 205.92936 -251.3072 -170.5654 -64.90433 28.75899
## Insomnia 92.10471 -239.6872 -182.0984 -68.09300 57.13493
## BP_Systolic
## Sleep Apnea 121.2995
## Insomnia 318.7089
##
## Std. Errors:
## (Intercept) Age Gender_Male Occupation_Accountant
## Sleep Apnea 67.993315 4.599652 22.475908 9.424010e-08
## Insomnia 8.118577 4.500010 5.884281 6.172147e+01
## Occupation_Doctor Occupation_Engineer Occupation_Lawyer
## Sleep Apnea 6.56349 16.4439 2.307771e+01
## Insomnia 5.71907 208.2383 1.364926e-05
## Occupation_Manager Occupation_Nurse Occupation_SalesRepresentative
## Sleep Apnea NaN 95.66309 NaN
## Insomnia 0 57.52108 1.429659e-54
## Occupation_Salesperson Occupation_Scientist Occupation_Teacher
## Sleep Apnea 68.43896 9.718002e-03 5.219744
## Insomnia 195.71910 9.124395e-43 14.366241
## BMI.Category_Overweight BMI.Category_Obese Sleep.Duration
## Sleep Apnea 59.91293 5.719083 3.663646
## Insomnia 101.04106 5.719083 4.172055
## Quality.of.Sleep.L Quality.of.Sleep.Q Quality.of.Sleep.C
## Sleep Apnea 3.883225 10.86726 6.264944
## Insomnia 38.902459 21.16316 73.921615
## Quality.of.Sleep^4 Quality.of.Sleep^5 Quality.of.Sleep^6
## Sleep Apnea 4.399947 36.03447 1.383468
## Insomnia 67.422052 83.45394 127.986260
## Quality.of.Sleep^7 Quality.of.Sleep^8 Quality.of.Sleep^9
## Sleep Apnea 63.28848 22.82821 74.41253
## Insomnia 56.65431 219.71209 153.48402
## Physical.Activity.Level Stress.Level.L Stress.Level.Q
## Sleep Apnea 42.43417 3.302262 17.07228
## Insomnia 42.36635 1.143929 12.15587
## Stress.Level.C Stress.Level^4 Stress.Level^5 Stress.Level^6
## Sleep Apnea 5.2684216 5.510153 2.330783 0.8402538
## Insomnia 0.7000193 37.668661 1.440946 51.9717283
## Stress.Level^7 Stress.Level^8 Stress.Level^9 Heart.Rate Daily.Steps
## Sleep Apnea 7.368896 9.551349 27.74399 113.2132 82.17048
## Insomnia 5.582918 29.387418 67.05145 293.6038 96.95923
## BP_Systolic
## Sleep Apnea 2.189933
## Insomnia 55.083901
##
## Residual Deviance: 118.2301
## AIC: 238.2301
# Prediksi kelas Sleep.Disorder di test data
mlr_pred <- predict(model, newdata = test_data)
# Confusion matrix
conf_matrix <- confusionMatrix(mlr_pred, test_data$Sleep.Disorder)
print(conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction None Sleep Apnea Insomnia
## None 44 1 4
## Sleep Apnea 0 8 0
## Insomnia 3 2 13
##
## Overall Statistics
##
## Accuracy : 0.8667
## 95% CI : (0.7684, 0.9342)
## No Information Rate : 0.6267
## P-Value [Acc > NIR] : 3.708e-06
##
## Kappa : 0.7439
##
## Mcnemar's Test P-Value : 0.3701
##
## Statistics by Class:
##
## Class: None Class: Sleep Apnea Class: Insomnia
## Sensitivity 0.9362 0.7273 0.7647
## Specificity 0.8214 1.0000 0.9138
## Pos Pred Value 0.8980 1.0000 0.7222
## Neg Pred Value 0.8846 0.9552 0.9298
## Prevalence 0.6267 0.1467 0.2267
## Detection Rate 0.5867 0.1067 0.1733
## Detection Prevalence 0.6533 0.1067 0.2400
## Balanced Accuracy 0.8788 0.8636 0.8392
# Prediksi probabilitas
pred_probs <- predict(model, newdata = test_data, type = "probs")
# Konversi jadi data frame + long format
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
prob_df <- as.data.frame(pred_probs)
prob_df$Observation <- 1:nrow(prob_df)
prob_df_long <- pivot_longer(prob_df, cols = -Observation, names_to = "Class", values_to = "Probability")
# Plot
library(ggplot2)
ggplot(prob_df_long, aes(x = Observation, y = Probability, color = Class)) +
geom_line(size = 1) +
theme_minimal() +
labs(title = "Multinomial Logistic Regression: Probability Plot",
x = "Observasi", y = "Probabilitas",
color = "Kelas Prediksi") +
theme(legend.position = "top")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.