Dataset yang digunakan bersumber dari Kaggle dan berisi informasi gaya hidup dan kesehatan individu, seperti aktivitas fisik, tekanan darah, serta gangguan tidur. Tujuan analisis ini adalah mengklasifikasi jenis gangguan tidur berdasarkan faktor gaya hidup.
library(DT)
## Warning: package 'DT' was built under R version 4.4.3
df <- read.csv("Sleep_health_and_lifestyle_dataset.csv")
datatable(df)
cat_cols <- sapply(df, function(x) is.factor(x) || is.character(x))
for (col_name in names(df)[cat_cols]) {
cat("Kolom:", col_name, "\n")
print(table(df[[col_name]]))
cat("\n---------------------\n\n")
}
## Kolom: Gender
##
## Female Male
## 185 189
##
## ---------------------
##
## Kolom: Occupation
##
## Accountant Doctor Engineer
## 37 71 63
## Lawyer Manager Nurse
## 47 1 73
## Sales Representative Salesperson Scientist
## 2 32 4
## Software Engineer Teacher
## 4 40
##
## ---------------------
##
## Kolom: BMI.Category
##
## Normal Normal Weight Obese Overweight
## 195 21 10 148
##
## ---------------------
##
## Kolom: Blood.Pressure
##
## 115/75 115/78 117/76 118/75 118/76 119/77 120/80 121/79 122/80 125/80 125/82
## 32 2 2 2 1 2 45 1 1 65 4
## 126/83 128/84 128/85 129/84 130/85 130/86 131/86 132/87 135/88 135/90 139/91
## 2 2 3 2 99 2 2 3 2 27 2
## 140/90 140/95 142/92
## 4 65 2
##
## ---------------------
##
## Kolom: Sleep.Disorder
##
## Insomnia None Sleep Apnea
## 77 219 78
##
## ---------------------
missing_counts <- colSums(is.na(df))
print(missing_counts)
## Person.ID Gender Age
## 0 0 0
## Occupation Sleep.Duration Quality.of.Sleep
## 0 0 0
## Physical.Activity.Level Stress.Level BMI.Category
## 0 0 0
## Blood.Pressure Heart.Rate Daily.Steps
## 0 0 0
## Sleep.Disorder
## 0
Person ID
)df$Person.ID <- NULL
Pada kolom BMI Category
terdapat nilai yang tidak
konsisten, yaitu Normal
dan Normal Weight
.
Nilai Normal Weight
diubah menjadi Normal
agar
konsisten.
df$BMI.Category <- ifelse(df$BMI.Category == "Normal Weight", "Normal", df$BMI.Category)
Blood Pressure
menjadi
BP_Systolic
dan BP_Diastolic
Tipe data pada kolom Blood Pressure
merupakan
kategorikal yang dimana berupa angka untuk merepresentasikan tekanan
darah dalam sistolik dan diastolik. Pada data baris pertama yaitu
126/83
, dimana nilai 126 adalah nilai tekanan darah
sistolik dan nilai 83 adalah nilai tekanan darah diastolik. Nilai ini
kemudian akan dipecah menjadi kolom BP_Systolic
dan
BP_Diastolic
agar menjadi tipe data numerik.
bp_split <- strsplit(as.character(df$Blood.Pressure), "/")
df$BP_Systolic <- as.numeric(sapply(bp_split, `[`, 1))
df$BP_Diastolic <- as.numeric(sapply(bp_split, `[`, 2))
df$Blood.Pressure <- NULL
sleep_col <- df$Sleep.Disorder
df$Sleep.Disorder <- NULL
df$Sleep.Disorder <- sleep_col
numeric_cols <- c("Age", "Sleep.Duration", "Physical.Activity.Level", "Heart.Rate", "Daily.Steps", "BP_Systolic", "BP_Diastolic")
par(mfrow = c(2, ceiling(length(numeric_cols) / 2)))
for (col in numeric_cols) {
boxplot(df[[col]], main = col, col = "skyblue", horizontal = TRUE)
}
par(mfrow = c(1,1))
detect_outliers <- function(x) {
z <- scale(x)
which(abs(z) > 3)
}
outlier_index <- list()
for (var in numeric_cols) {
outlier_index[[var]] <- detect_outliers(df[[var]])
cat(var, ":", length(outlier_index[[var]]), "outlier\n")
}
## Age : 0 outlier
## Sleep.Duration : 0 outlier
## Physical.Activity.Level : 0 outlier
## Heart.Rate : 9 outlier
## Daily.Steps : 0 outlier
## BP_Systolic : 0 outlier
## BP_Diastolic : 0 outlier
Outlier diimputasi menggunakan nilai batas bawah atau batas atas
replace_outlier_with_bound <- function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR_val <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR_val
upper_bound <- Q3 + 1.5 * IQR_val
x[x < lower_bound] <- lower_bound
x[x > upper_bound] <- upper_bound
return(x)
}
numeric_cols <- c("Age", "Sleep.Duration", "Physical.Activity.Level", "Heart.Rate", "Daily.Steps", "BP_Systolic", "BP_Diastolic")
df[numeric_cols] <- lapply(df[numeric_cols], replace_outlier_with_bound)
Beberapa kolom kategorikal perlu diubah menjadi representasi numerik
agar bisa digunakan dalam model. One Hot Encoding dilakukan pada kolom
Gender
, Occupation
, dan
BMI Category
, dengan baseline yang dijadikan referensi
sebagai berikut:
Gender
:Female
Occupation
: Software Engineer
BMI Category
: Normal
df$Gender <- factor(df$Gender, levels = c("Female", "Male"))
df$Occupation <- factor(df$Occupation, levels = c("Software Engineer", sort(unique(df$Occupation))[sort(unique(df$Occupation)) != "Software Engineer"]))
df$BMI.Category <- factor(df$BMI.Category, levels = c("Normal", "Overweight", "Obese"))
gender_dummies <- model.matrix(~ Gender, data = df)[, -1, drop = FALSE]
occupation_dummies <- model.matrix(~ Occupation, data = df)[, -1, drop = FALSE]
bmi_dummies <- model.matrix(~ BMI.Category, data = df)[, -1, drop = FALSE]
colnames(gender_dummies) <- gsub("Gender", "Gender_", colnames(gender_dummies))
colnames(occupation_dummies) <- gsub("Occupation", "Occupation_", colnames(occupation_dummies))
colnames(bmi_dummies) <- gsub("BMI.Category", "BMI.Category_", colnames(bmi_dummies))
colnames(gender_dummies) <- gsub("[() ]", "", colnames(gender_dummies))
colnames(occupation_dummies) <- gsub("[() ]", "", colnames(occupation_dummies))
colnames(bmi_dummies) <- gsub("[() ]", "", colnames(bmi_dummies))
df$Gender <- NULL
df$Occupation <- NULL
df$BMI.Category <- NULL
# Gabung ke df utama
df <- cbind(df, gender_dummies, occupation_dummies, bmi_dummies)
# atur urutan kolom
new_order <- c(
"Age",
"Gender_Male",
grep("^Occupation_", names(df), value = TRUE),
grep("^BMI.Category_", names(df), value = TRUE),
"Sleep.Duration",
"Quality.of.Sleep",
"Physical.Activity.Level",
"Stress.Level",
"Heart.Rate",
"Daily.Steps",
"BP_Systolic",
"BP_Diastolic",
"Sleep.Disorder"
)
cols_existing <- names(df)
cols_to_keep <- intersect(new_order, cols_existing)
cols_leftover <- setdiff(cols_existing, cols_to_keep)
df <- df[, c(cols_to_keep, cols_leftover)]
Kolom Quality of Sleep
dan Stress Level
menunjukan rating skala 1-10 yang menunjukan bahwa variabel tersebut
merupakan numerik ordinal. Maka dari itu perlu dilakukan ordered
factorization agar model dapat memahami urutan atau hierarki nilai dalam
variabel tersebut dan tidak memperlakukannya sebagai variabel numerik
biasa.
df$Quality.of.Sleep <- factor(df$Quality.of.Sleep,
ordered = TRUE,
levels = as.character(1:10))
df$Stress.Level <- factor(df$Stress.Level,
ordered = TRUE,
levels = as.character(1:10))
Sleep Disorder
Label kelas Sleep Disorder
dikonversi dari bentuk string
ke dalam tipe data faktor (unordered) agar algoritma klasifikasi seperti
Analisis Diskriminan Linear (LDA) dan Regresi Multinomial dapat
mengenali bahwa target merupakan kelas diskrit (nominal), bukan data
numerik berurutan (ordinal).
df$Sleep.Disorder <- factor(df$Sleep.Disorder, levels = c("None", "Sleep Apnea", "Insomnia"))
table(df$Sleep.Disorder)
##
## None Sleep Apnea Insomnia
## 219 78 77
barplot(table(df$Sleep.Disorder), col = "steelblue", main = "Distribusi Sleep Disorder")
library(corrplot)
## corrplot 0.95 loaded
# Ambil kolom numerik dan exclude kolom hasil one-hot encoding kategori
exclude_prefixes <- c("Gender_", "Occupation_", "BMI.Category_")
numeric_df <- df[sapply(df, is.numeric)]
numeric_df <- numeric_df[ , !grepl(paste0("^(", paste(exclude_prefixes, collapse = "|"), ")"), names(numeric_df))]
# Plot korelasi
corrplot::corrplot(cor(numeric_df), method = "color", tl.cex = 1, addCoef.col = "black", number.cex = 0.7)
Berdasarkan hasil visualisasi korelasi, ditemukan bahwa
BP_Systolic
dan BP_Diastolic
memiliki korelasi
tinggi (>0.9). Untuk menghindari masalah multikolinearitas, variabel
BP_Diastolic
dihapus dari dataset.
df$BP_Diastolic <- NULL
# Data tanpa transformasi dan standarisasi
datatable(df)
if (!require(MVN)) install.packages("MVN")
## Loading required package: MVN
## Warning: package 'MVN' was built under R version 4.4.3
library(MVN)
numeric_cols <- c("Age", "Sleep.Duration", "Physical.Activity.Level", "Heart.Rate", "Daily.Steps", "BP_Systolic")
mvn(df[numeric_cols], mvnTest = "hz", univariateTest = "AD", multivariatePlot = "qq")
## $multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 20.83933 0 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling Age 3.9372 <0.001 NO
## 2 Anderson-Darling Sleep.Duration 7.3040 <0.001 NO
## 3 Anderson-Darling Physical.Activity.Level 11.3729 <0.001 NO
## 4 Anderson-Darling Heart.Rate 9.1373 <0.001 NO
## 5 Anderson-Darling Daily.Steps 8.9070 <0.001 NO
## 6 Anderson-Darling BP_Systolic 9.0151 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max
## Age 374 42.184492 8.6731335 43.0 27.0 59.0
## Sleep.Duration 374 7.132086 0.7956567 7.2 5.8 8.5
## Physical.Activity.Level 374 59.171123 20.8308037 60.0 30.0 90.0
## Heart.Rate 374 69.965241 3.5672581 70.0 65.0 78.0
## Daily.Steps 374 6816.844920 1617.9156791 7000.0 3000.0 10000.0
## BP_Systolic 374 128.553476 7.7481176 130.0 115.0 142.0
## 25th 75th Skew Kurtosis
## Age 35.25 50.0 0.25516254 -0.9248041
## Sleep.Duration 6.40 7.8 0.03725369 -1.2945281
## Physical.Activity.Level 45.00 75.0 0.07389048 -1.2744695
## Heart.Rate 68.00 72.0 0.46555442 -0.3878657
## Daily.Steps 5600.00 8000.0 0.17684985 -0.4186421
## BP_Systolic 125.00 135.0 -0.03538326 -0.9088701
Hasil ini menunjukkan bahwa p-value<0.05 maka tolak H0, artinya populasi tidak berdistribusi normal multivariat, sehingga asumsi normal multivariat tidak terpenuhi
library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## Loading required package: MASS
## ---
## biotools version 4.3
box_m_result <- boxM(df[, numeric_cols], grouping = df$Sleep.Disorder)
print(box_m_result)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df[, numeric_cols]
## Chi-Sq (approx.) = 641.8, df = 42, p-value < 2.2e-16
Hasil ini menunjukkan bahwa p-value<0.05 maka tolak H0, artinya varians tidak homogen, sehingga asumsi homogenitas varians tidak terpenuhi
if (!require(MASS)) install.packages("MASS")
if (!require(caret)) install.packages("caret")
## Loading required package: caret
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
library(MASS)
library(caret)
set.seed(123)
train_index <- createDataPartition(df$Sleep.Disorder, p = 0.8, list = FALSE)
train_data <- df[train_index, ]
test_data <- df[-train_index, ]
lda_model <- lda(Sleep.Disorder ~ ., data = train_data[, c(numeric_cols, "Sleep.Disorder")])
print(lda_model)
## Call:
## lda(Sleep.Disorder ~ ., data = train_data[, c(numeric_cols, "Sleep.Disorder")])
##
## Prior probabilities of groups:
## None Sleep Apnea Insomnia
## 0.5847176 0.2093023 0.2059801
##
## Group means:
## Age Sleep.Duration Physical.Activity.Level Heart.Rate
## None 39.13068 7.351136 57.35227 68.99432
## Sleep Apnea 50.19048 7.085714 75.19048 72.46032
## Insomnia 43.46774 6.587097 47.01613 70.09677
## Daily.Steps BP_Systolic
## None 6782.386 124.0852
## Sleep Apnea 7493.651 138.1746
## Insomnia 5909.677 131.9677
##
## Coefficients of linear discriminants:
## LD1 LD2
## Age 0.0793187208 0.0150350896
## Sleep.Duration -0.5219865251 -1.5768313817
## Physical.Activity.Level 0.0182108838 0.0080401232
## Heart.Rate 0.1123632574 -0.2390564312
## Daily.Steps -0.0001783665 -0.0004843472
## BP_Systolic 0.1060692040 0.0011056403
##
## Proportion of trace:
## LD1 LD2
## 0.8076 0.1924
lda_pred <- predict(lda_model, newdata = test_data[, numeric_cols])
# Confusion matrix
conf_matrix <- confusionMatrix(as.factor(lda_pred$class), as.factor(test_data$Sleep.Disorder))
print(conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction None Sleep Apnea Insomnia
## None 41 3 1
## Sleep Apnea 0 11 1
## Insomnia 2 1 13
##
## Overall Statistics
##
## Accuracy : 0.8904
## 95% CI : (0.7954, 0.9515)
## No Information Rate : 0.589
## P-Value [Acc > NIR] : 1.509e-08
##
## Kappa : 0.8036
##
## Mcnemar's Test P-Value : 0.343
##
## Statistics by Class:
##
## Class: None Class: Sleep Apnea Class: Insomnia
## Sensitivity 0.9535 0.7333 0.8667
## Specificity 0.8667 0.9828 0.9483
## Pos Pred Value 0.9111 0.9167 0.8125
## Neg Pred Value 0.9286 0.9344 0.9649
## Prevalence 0.5890 0.2055 0.2055
## Detection Rate 0.5616 0.1507 0.1781
## Detection Prevalence 0.6164 0.1644 0.2192
## Balanced Accuracy 0.9101 0.8580 0.9075
Untuk mengetahui variabel mana yang berpengaruh terhadap gangguan tidur (Sleep Disorder), salah satu caranya adalah dengan melihat plot antara fungsi diskriminan.
plot(lda_model, col = as.integer(train_data[, c(numeric_cols, "Sleep.Disorder")]$Sleep.Disorder))
library(nnet)
library(ggplot2)
# Fit multinomial model
model <- multinom(Sleep.Disorder ~ Age + Sleep.Duration + Physical.Activity.Level + Heart.Rate + Daily.Steps + BP_Systolic,
data = df)
## # weights: 24 (14 variable)
## initial value 410.880996
## iter 10 value 321.301220
## iter 20 value 173.550134
## iter 30 value 156.485192
## iter 40 value 156.384860
## iter 50 value 153.722950
## final value 153.115710
## converged
# Prediksi probabilitas tiap kelas
probs <- predict(model, df, type = "probs")
# Ambil kelas baseline: "None"
# Logit kelas "Sleep Apnea" vs "None"
logit_SleepApnea <- log(probs[, "Sleep Apnea"] / probs[, "None"])
library(ggplot2)
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.4.3
##
## Attaching package: 'patchwork'
## The following object is masked from 'package:MASS':
##
## area
# List predictor yang mau dicek
predictors <- c("Age", "Sleep.Duration", "Physical.Activity.Level", "Heart.Rate", "Daily.Steps", "BP_Systolic")
# Probabilitas & logit udah ada dari sebelumnya
probs <- predict(model, df, type = "probs")
logit_SleepApnea <- log(probs[, "Sleep Apnea"] / probs[, "None"])
# Buat plot function biar gak ribet
plot_logit <- function(var) {
ggplot(df, aes_string(x = var, y = "logit_SleepApnea")) +
geom_point(alpha = 0.5) +
geom_smooth(method = "loess", se = TRUE, color = "blue") +
ggtitle(paste(var, "vs Sleep Apnea vs None")) +
theme_minimal()
}
# Generate semua plot
plots <- lapply(predictors, plot_logit)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Gabung jadi 2 kolom, baris otomatis
wrap_plots(plots, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Ouliers pada dataset sudah ditangani di preprocessing dengan menggunakan teknik imputasi berdasarkan batas atas atau batas bawah.
numeric_cols <- c("Age", "Sleep.Duration", "Physical.Activity.Level", "Heart.Rate", "Daily.Steps", "BP_Systolic")
detect_outliers <- function(x) {
z <- scale(x)
which(abs(z) > 3)
}
outlier_index <- list()
for (var in numeric_cols) {
outlier_index[[var]] <- detect_outliers(df[[var]])
cat(var, ":", length(outlier_index[[var]]), "outlier\n")
}
## Age : 0 outlier
## Sleep.Duration : 0 outlier
## Physical.Activity.Level : 0 outlier
## Heart.Rate : 0 outlier
## Daily.Steps : 0 outlier
## BP_Systolic : 0 outlier
df_person_id <- read.csv("Sleep_health_and_lifestyle_dataset.csv")
if("Person.ID" %in% names(df_person_id)) {
if(any(duplicated(df_person_id$Person.ID))) {
warning("Terdapat pengamatan berulang pada 'Person.ID'; asumsi independensi mungkin dilanggar.")
} else {
message("Tidak ditemukan duplikasi Person.ID; asumsi independensi kemungkinan terpenuhi.")
}
} else {
message("Tidak ada kolom Person.ID; diasumsikan pengamatan independen (tidak berulang per individu).")
}
## Tidak ditemukan duplikasi Person.ID; asumsi independensi kemungkinan terpenuhi.
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
vif_model <- lm(as.numeric(Sleep.Disorder) ~ Age + Gender_Male +
Occupation_Accountant + Occupation_Doctor + Occupation_Engineer +
Occupation_Lawyer + Occupation_Manager + Occupation_Nurse +
Occupation_SalesRepresentative + Occupation_Salesperson +
Occupation_Scientist + Occupation_Teacher +
BMI.Category_Overweight + BMI.Category_Obese +
Sleep.Duration + Quality.of.Sleep + Physical.Activity.Level +
Stress.Level + Heart.Rate + Daily.Steps + BP_Systolic, data = df)
# Ambil raw VIF
vif_raw <- vif(vif_model)
# Cek apakah output berupa matrix (GVIF), atau numeric biasa
if (is.matrix(vif_raw)) {
# Kalau GVIF, ambil GVIF^(1/(2*Df))
vif_values <- vif_raw[, "GVIF^(1/(2*Df))"]
} else {
# Kalau numeric, langsung pakai
vif_values <- vif_raw
}
# Print nilai VIF aja
print(vif_values)
## Age Gender_Male
## 4.944794 4.840664
## Occupation_Accountant Occupation_Doctor
## 4.168559 5.592044
## Occupation_Engineer Occupation_Lawyer
## 4.906945 4.028862
## Occupation_Manager Occupation_Nurse
## 1.271881 5.622822
## Occupation_SalesRepresentative Occupation_Salesperson
## 1.514146 4.056264
## Occupation_Scientist Occupation_Teacher
## 1.866671 4.116111
## BMI.Category_Overweight BMI.Category_Obese
## 5.184698 2.543089
## Sleep.Duration Quality.of.Sleep
## 5.980456 3.105311
## Physical.Activity.Level Stress.Level
## 3.703195 3.285491
## Heart.Rate Daily.Steps
## 3.975559 3.105926
## BP_Systolic
## 4.036231
# Interpretasi cepat
if(any(vif_values > 10)) {
cat("Terdeteksi multikolinearitas tinggi pada variabel:\n")
print(names(vif_values)[vif_values > 10])
} else {
cat("Tidak ditemukan multikolinearitas yang signifikan (semua VIF < 10).\n")
}
## Tidak ditemukan multikolinearitas yang signifikan (semua VIF < 10).
set.seed(42)
# Hitung 80% data buat training
sample_size <- floor(0.8 * nrow(df))
# Ambil indeks acak buat training
train_indices <- sample(seq_len(nrow(df)), size = sample_size)
# Pisah data training dan testing
train_data <- df[train_indices, ]
test_data <- df[-train_indices, ]
# Pastikan package nnet ada dan di-load
if (!require(nnet)) install.packages("nnet")
library(nnet)
# Bangun model multinomial
model <- multinom(Sleep.Disorder ~ Age + Gender_Male + Occupation_Accountant + Occupation_Doctor + Occupation_Engineer +
Occupation_Lawyer + Occupation_Manager + Occupation_Nurse + Occupation_SalesRepresentative + Occupation_Salesperson +
Occupation_Scientist + Occupation_Teacher + BMI.Category_Overweight + BMI.Category_Obese + Sleep.Duration +
Quality.of.Sleep + Physical.Activity.Level + Stress.Level + Heart.Rate + Daily.Steps + BP_Systolic,
data = train_data)
## # weights: 117 (76 variable)
## initial value 328.485074
## iter 10 value 184.429572
## iter 20 value 74.320998
## iter 30 value 63.148808
## iter 40 value 60.356073
## iter 50 value 59.676355
## iter 60 value 59.345493
## iter 70 value 59.285682
## iter 80 value 59.263849
## iter 90 value 59.261439
## final value 59.261426
## converged
# Cek ringkasan model (optional)
summary(model)
## Warning in sqrt(diag(vc)): NaNs produced
## Call:
## multinom(formula = Sleep.Disorder ~ Age + Gender_Male + Occupation_Accountant +
## Occupation_Doctor + Occupation_Engineer + Occupation_Lawyer +
## Occupation_Manager + Occupation_Nurse + Occupation_SalesRepresentative +
## Occupation_Salesperson + Occupation_Scientist + Occupation_Teacher +
## BMI.Category_Overweight + BMI.Category_Obese + Sleep.Duration +
## Quality.of.Sleep + Physical.Activity.Level + Stress.Level +
## Heart.Rate + Daily.Steps + BP_Systolic, data = train_data)
##
## Coefficients:
## (Intercept) Age Gender_Male Occupation_Accountant
## Sleep Apnea -35.20144 -0.4199811 -67.41520 -13.62271
## Insomnia -59.41418 -0.7209510 -83.11212 166.89993
## Occupation_Doctor Occupation_Engineer Occupation_Lawyer
## Sleep Apnea 113.59479 43.61202 -55.5854
## Insomnia 28.45637 110.21874 -173.9318
## Occupation_Manager Occupation_Nurse Occupation_SalesRepresentative
## Sleep Apnea -140.34399 83.59415 194.9785
## Insomnia -44.08542 -96.04647 -190.0224
## Occupation_Salesperson Occupation_Scientist Occupation_Teacher
## Sleep Apnea 7.439885 94.50775 147.679674
## Insomnia -146.633989 -28.92903 5.979174
## BMI.Category_Overweight BMI.Category_Obese Sleep.Duration
## Sleep Apnea 45.66677 443.2594 -4.031231
## Insomnia 53.32273 603.0971 -9.142610
## Quality.of.Sleep.L Quality.of.Sleep.Q Quality.of.Sleep.C
## Sleep Apnea 62.19214 127.787647 30.08441
## Insomnia -61.23879 -8.748068 95.56824
## Quality.of.Sleep^4 Quality.of.Sleep^5 Quality.of.Sleep^6
## Sleep Apnea -158.98632 -219.39281 -96.184425
## Insomnia 27.62727 -89.44627 5.491923
## Quality.of.Sleep^7 Quality.of.Sleep^8 Quality.of.Sleep^9
## Sleep Apnea 46.89133 81.76422 46.11610
## Insomnia 120.76570 30.47316 -31.03488
## Physical.Activity.Level Stress.Level.L Stress.Level.Q
## Sleep Apnea -2.551326 135.19718 -38.22392
## Insomnia -2.716705 92.55149 13.01444
## Stress.Level.C Stress.Level^4 Stress.Level^5 Stress.Level^6
## Sleep Apnea -203.6065 108.61064 76.89827 -94.19763
## Insomnia -160.1567 -10.01589 124.09150 44.10445
## Stress.Level^7 Stress.Level^8 Stress.Level^9 Heart.Rate Daily.Steps
## Sleep Apnea 176.05206 -110.9379 -101.0232 -8.901945 0.009035325
## Insomnia 16.55903 -140.8788 -168.8393 -15.807688 0.041340174
## BP_Systolic
## Sleep Apnea 5.459353
## Insomnia 8.895157
##
## Std. Errors:
## (Intercept) Age Gender_Male Occupation_Accountant
## Sleep Apnea 0.000815138 0.09064010 0.023095152 NaN
## Insomnia 0.001344494 0.09700026 0.009317922 0.007875283
## Occupation_Doctor Occupation_Engineer Occupation_Lawyer
## Sleep Apnea 1.396148e-03 0.008931239 2.330945e-02
## Insomnia 1.518267e-08 0.012522093 4.315654e-10
## Occupation_Manager Occupation_Nurse Occupation_SalesRepresentative
## Sleep Apnea 4.568149e-17 0.012218070 NaN
## Insomnia 7.261756e-114 0.004609442 8.134512e-104
## Occupation_Salesperson Occupation_Scientist Occupation_Teacher
## Sleep Apnea 0.010317102 2.039666e-17 0.01275244
## Insomnia 0.007766877 2.246845e-33 0.01461220
## BMI.Category_Overweight BMI.Category_Obese Sleep.Duration
## Sleep Apnea 0.02059691 4.316202e-10 0.03502367
## Insomnia 0.01440654 4.316202e-10 0.03082168
## Quality.of.Sleep.L Quality.of.Sleep.Q Quality.of.Sleep.C
## Sleep Apnea 0.005849946 0.007469515 0.005235265
## Insomnia 0.003539252 0.005155964 0.002094266
## Quality.of.Sleep^4 Quality.of.Sleep^5 Quality.of.Sleep^6
## Sleep Apnea 0.016136507 0.006908266 0.013615546
## Insomnia 0.008168043 0.009244142 0.006276364
## Quality.of.Sleep^7 Quality.of.Sleep^8 Quality.of.Sleep^9
## Sleep Apnea 0.016648434 0.002219380 0.010522608
## Insomnia 0.006368962 0.009777585 0.008959951
## Physical.Activity.Level Stress.Level.L Stress.Level.Q
## Sleep Apnea 0.03107878 0.006466150 0.004593091
## Insomnia 0.05704026 0.003495401 0.001140171
## Stress.Level.C Stress.Level^4 Stress.Level^5 Stress.Level^6
## Sleep Apnea 0.010386975 0.01260520 0.006788702 0.01873156
## Insomnia 0.006080048 0.00234763 0.006844985 0.00178138
## Stress.Level^7 Stress.Level^8 Stress.Level^9 Heart.Rate
## Sleep Apnea 0.001911183 0.017231947 0.021841580 0.1269392
## Insomnia 0.007678944 0.009152071 0.009588321 0.1010560
## Daily.Steps BP_Systolic
## Sleep Apnea 0.0004308384 0.08750845
## Insomnia 0.0008581895 0.06872888
##
## Residual Deviance: 118.5229
## AIC: 238.5229
# Prediksi kelas Sleep.Disorder di test data
mlr_pred <- predict(model, newdata = test_data)
# Confusion matrix
library(caret)
conf_matrix <- confusionMatrix(mlr_pred, test_data$Sleep.Disorder)
print(conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction None Sleep Apnea Insomnia
## None 45 1 4
## Sleep Apnea 0 8 0
## Insomnia 2 2 13
##
## Overall Statistics
##
## Accuracy : 0.88
## 95% CI : (0.7844, 0.9436)
## No Information Rate : 0.6267
## P-Value [Acc > NIR] : 9.101e-07
##
## Kappa : 0.7671
##
## Mcnemar's Test P-Value : 0.2998
##
## Statistics by Class:
##
## Class: None Class: Sleep Apnea Class: Insomnia
## Sensitivity 0.9574 0.7273 0.7647
## Specificity 0.8214 1.0000 0.9310
## Pos Pred Value 0.9000 1.0000 0.7647
## Neg Pred Value 0.9200 0.9552 0.9310
## Prevalence 0.6267 0.1467 0.2267
## Detection Rate 0.6000 0.1067 0.1733
## Detection Prevalence 0.6667 0.1067 0.2267
## Balanced Accuracy 0.8894 0.8636 0.8479