Analisis ini bertujuan untuk memodelkan jumlah dokter yang dikunjungi oleh lansia di Amerika Serikat menggunakan Regresi Logistik Multinomial. Dataset yang digunakan adalah National Poll on Healthy Aging (NPHA) yang berisi informasi kesehatan lansia.
Variabel Respon (Y): Number of Doctors Visited
Variabel Prediktor: kondisi kesehatan fisik, mental, gigi, status pekerjaan, gangguan tidur, ras, dan jenis kelamin.
# Hapus tanda '#' di bawah ini kalau belum install package-nya
# install.packages(c("nnet", "car", "DescTools", "tidyverse", "caret", "knitr", "kableExtra", "rcompanion"))
library(nnet)
library(car)
library(DescTools)
library(tidyverse)
library(caret)
library(knitr)
library(kableExtra)
library(rcompanion)## Jumlah baris: 714
## Jumlah kolom: 15
Kolom Age tidak informatif karena seluruh nilainya sama (= 2), sehingga dihapus.
## Kolom setelah Age dihapus: 14 kolom
colnames(df) <- c(
"Y", # Number of Doctors Visited (VARIABEL RESPON)
"PhysHealth", # Physical Health
"MentHealth", # Mental Health
"DentHealth", # Dental Health
"Employment", # Status pekerjaan
"Stress", # Stress mengganggu tidur
"Medication", # Obat mengganggu tidur
"Pain", # Nyeri mengganggu tidur
"Bathroom", # Kebutuhan kamar mandi mengganggu tidur
"Unknown", # Tidak diketahui mengganggu tidur
"TroubleSleep", # Masalah tidur
"SleepMed", # Obat tidur resep
"Race", # Ras
"Gender" # Jenis kelamin
)
cat("Rename kolom berhasil.\n")## Rename kolom berhasil.
Nilai -1 menandakan responden menolak menjawab (missing), sehingga perlu dihapus.
## Jumlah nilai -1 sebelum cleaning: 20
## Jumlah observasi setelah cleaning: 696
df$Y <- factor(df$Y, levels = c(1, 2, 3),
labels = c("Sedikit", "Sedang", "Banyak"))
df$PhysHealth <- factor(df$PhysHealth)
df$MentHealth <- factor(df$MentHealth)
df$DentHealth <- factor(df$DentHealth)
df$Employment <- factor(df$Employment)
df$Stress <- factor(df$Stress)
df$Medication <- factor(df$Medication)
df$Pain <- factor(df$Pain)
df$Bathroom <- factor(df$Bathroom)
df$Unknown <- factor(df$Unknown)
df$TroubleSleep <- factor(df$TroubleSleep)
df$SleepMed <- factor(df$SleepMed)
df$Race <- factor(df$Race)
df$Gender <- factor(df$Gender)
str(df)## 'data.frame': 696 obs. of 14 variables:
## $ Y : Factor w/ 3 levels "Sedikit","Sedang",..: 3 2 3 1 3 2 3 2 2 1 ...
## $ PhysHealth : Factor w/ 5 levels "1","2","3","4",..: 4 4 3 3 3 3 4 3 3 2 ...
## $ MentHealth : Factor w/ 5 levels "1","2","3","4",..: 3 2 2 2 3 2 1 2 1 1 ...
## $ DentHealth : Factor w/ 6 levels "1","2","3","4",..: 3 3 3 3 3 4 1 6 2 3 ...
## $ Employment : Factor w/ 4 levels "1","2","3","4": 3 3 3 3 3 3 3 3 3 1 ...
## $ Stress : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 2 1 1 ...
## $ Medication : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ Pain : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 2 1 ...
## $ Bathroom : Factor w/ 2 levels "0","1": 1 2 1 2 1 2 2 1 2 1 ...
## $ Unknown : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 1 1 2 ...
## $ TroubleSleep: Factor w/ 3 levels "1","2","3": 2 3 3 3 2 3 2 3 3 3 ...
## $ SleepMed : Factor w/ 3 levels "1","2","3": 3 3 3 3 3 3 1 3 3 3 ...
## $ Race : Factor w/ 5 levels "1","2","3","4",..: 1 1 4 4 1 1 1 1 1 1 ...
## $ Gender : Factor w/ 2 levels "1","2": 2 1 1 2 2 1 1 1 2 1 ...
tabel_Y <- table(df$Y)
persen_Y <- round(prop.table(tabel_Y) * 100, 2)
tabel_respon <- data.frame(
Kategori = names(tabel_Y),
Frekuensi = as.numeric(tabel_Y),
Persentase = paste0(formatC(as.numeric(persen_Y), format = "f", digits = 2), "%")
)
tabel_respon %>%
kable(caption = "Tabel 1. Distribusi Variabel Respon (Y)") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Kategori | Frekuensi | Persentase |
|---|---|---|
| Sedikit | 126 | 18.10% |
| Sedang | 363 | 52.16% |
| Banyak | 207 | 29.74% |
barplot(tabel_Y,
main = "Distribusi Jumlah Dokter yang Dikunjungi",
xlab = "Kategori",
ylab = "Frekuensi",
col = c("steelblue", "orange", "tomato"),
names.arg = c("Sedikit (1)", "Sedang (2)", "Banyak (3)"))Hipotesis:
prediktor <- c("PhysHealth", "MentHealth", "DentHealth", "Employment",
"Stress", "Medication", "Pain", "Bathroom", "Unknown",
"TroubleSleep", "SleepMed", "Race", "Gender")
hasil_chisq <- data.frame(
Variabel = prediktor,
ChiSquare = NA,
Pvalue = NA,
Keputusan = NA
)
for (i in seq_along(prediktor)) {
var <- prediktor[i]
tbl <- table(df[[var]], df$Y)
uji <- chisq.test(tbl)
hasil_chisq$ChiSquare[i] <- round(uji$statistic, 4)
hasil_chisq$Pvalue[i] <- round(uji$p.value, 4)
hasil_chisq$Keputusan[i] <- ifelse(uji$p.value < 0.05, "Tolak H0", "Gagal Tolak H0")
}
hasil_chisq %>%
kable(caption = "Tabel 2. Hasil Uji Independensi Chi-Square") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(which(hasil_chisq$Pvalue < 0.05), background = "#d4edda")| Variabel | ChiSquare | Pvalue | Keputusan |
|---|---|---|---|
| PhysHealth | 22.5502 | 0.0040 | Tolak H0 |
| MentHealth | 6.5532 | 0.5855 | Gagal Tolak H0 |
| DentHealth | 11.2035 | 0.3419 | Gagal Tolak H0 |
| Employment | 14.5235 | 0.0243 | Tolak H0 |
| Stress | 3.6019 | 0.1651 | Gagal Tolak H0 |
| Medication | 10.8690 | 0.0044 | Tolak H0 |
| Pain | 4.2467 | 0.1196 | Gagal Tolak H0 |
| Bathroom | 2.4595 | 0.2924 | Gagal Tolak H0 |
| Unknown | 0.4441 | 0.8009 | Gagal Tolak H0 |
| TroubleSleep | 5.5666 | 0.2339 | Gagal Tolak H0 |
| SleepMed | 16.9255 | 0.0020 | Tolak H0 |
| Race | 19.9840 | 0.0104 | Tolak H0 |
| Gender | 1.2798 | 0.5274 | Gagal Tolak H0 |
var_signifikan <- hasil_chisq$Variabel[hasil_chisq$Pvalue < 0.05]
cat("\nVariabel signifikan:", paste(var_signifikan, collapse = ", "), "\n")##
## Variabel signifikan: PhysHealth, Employment, Medication, SleepMed, Race
Model dibentuk menggunakan variabel yang signifikan dari uji Chi-Square dengan kategori referensi Y = Sedikit.
df$Y <- relevel(df$Y, ref = "Sedikit")
formula_model <- as.formula(paste("Y ~", paste(var_signifikan, collapse = " + ")))
cat("Formula model:\n")## Formula model:
## Y ~ PhysHealth + Employment + Medication + SleepMed + Race
## # weights: 48 (30 variable)
## initial value 764.634153
## iter 10 value 670.574394
## iter 20 value 663.823762
## iter 30 value 663.526216
## iter 40 value 663.471194
## final value 663.471116
## converged
## Call:
## multinom(formula = formula_model, data = df, maxit = 200)
##
## Coefficients:
## (Intercept) PhysHealth2 PhysHealth3 PhysHealth4 PhysHealth5 Employment2
## Sedang 2.188072 0.5034784 0.3932622 0.743801 0.5331025 -0.1037948
## Banyak 1.500083 0.3168909 0.7492710 1.290832 0.9367056 0.3444506
## Employment3 Employment4 Medication1 SleepMed2 SleepMed3 Race2
## Sedang 0.1047031 -1.5693460 1.029053 -1.254507 -1.690097 0.2453823
## Banyak 0.6834303 0.4958809 1.759296 -1.794207 -2.373488 -0.3951672
## Race3 Race4 Race5
## Sedang -1.2152522 -0.5540754 13.71819
## Banyak -0.3087676 -1.4578191 13.33296
##
## Std. Errors:
## (Intercept) PhysHealth2 PhysHealth3 PhysHealth4 PhysHealth5 Employment2
## Sedang 1.178804 0.4393285 0.4394547 0.5117139 0.8087895 0.5027683
## Banyak 1.242803 0.5434102 0.5365307 0.5976350 0.8769164 0.6261171
## Employment3 Employment4 Medication1 SleepMed2 SleepMed3 Race2
## Sedang 0.3862213 0.8523014 0.7669523 1.191086 1.056960 0.4241660
## Banyak 0.4978889 0.7925190 0.7673019 1.196659 1.048583 0.4993017
## Race3 Race4 Race5
## Sedang 0.6111763 0.3822433 0.2569032
## Banyak 0.5720616 0.5237545 0.2569031
##
## Residual Deviance: 1326.942
## AIC: 1386.942
Hipotesis:
## # weights: 6 (2 variable)
## initial value 764.634153
## final value 702.650827
## converged
lrt <- deviance(model_null) - deviance(model)
df_lrt <- attr(logLik(model), "df") - attr(logLik(model_null), "df")
p_value_lrt <- pchisq(lrt, df = df_lrt, lower.tail = FALSE)
hasil_serentak <- data.frame(
Statistik = c("G² (Chi-Square)", "Derajat Bebas", "P-value", "Keputusan"),
Nilai = c(round(lrt, 4), df_lrt, round(p_value_lrt, 4),
ifelse(p_value_lrt < 0.05, "Tolak H0 - Model Signifikan", "Gagal Tolak H0"))
)
hasil_serentak %>%
kable(caption = "Tabel 3. Hasil Uji Serentak (Likelihood Ratio Test)") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Statistik | Nilai |
|---|---|
| G² (Chi-Square) | 78.3594 |
| Derajat Bebas | 28 |
| P-value | 0 |
| Keputusan | Tolak H0 - Model Signifikan |
Hipotesis:
z <- summary(model)$coefficients / summary(model)$standard.errors
p_value_wald <- (1 - pnorm(abs(z))) * 2
cat("=== Nilai Wald (Z) ===\n")## === Nilai Wald (Z) ===
## (Intercept) PhysHealth2 PhysHealth3 PhysHealth4 PhysHealth5 Employment2
## Sedang 1.8562 1.1460 0.8949 1.4535 0.6591 -0.2064
## Banyak 1.2070 0.5832 1.3965 2.1599 1.0682 0.5501
## Employment3 Employment4 Medication1 SleepMed2 SleepMed3 Race2 Race3
## Sedang 0.2711 -1.8413 1.3417 -1.0532 -1.5990 0.5785 -1.9884
## Banyak 1.3727 0.6257 2.2928 -1.4993 -2.2635 -0.7914 -0.5397
## Race4 Race5
## Sedang -1.4495 53.3983
## Banyak -2.7834 51.8988
##
## === P-value ===
## (Intercept) PhysHealth2 PhysHealth3 PhysHealth4 PhysHealth5 Employment2
## Sedang 0.0634 0.2518 0.3708 0.1461 0.5098 0.8364
## Banyak 0.2274 0.5598 0.1626 0.0308 0.2854 0.5822
## Employment3 Employment4 Medication1 SleepMed2 SleepMed3 Race2 Race3
## Sedang 0.7863 0.0656 0.1797 0.2922 0.1098 0.5629 0.0468
## Banyak 0.1699 0.5315 0.0219 0.1338 0.0236 0.4287 0.5894
## Race4 Race5
## Sedang 0.1472 0
## Banyak 0.0054 0
## $Models
##
## Model: NA
## Null: NA
##
## $Pseudo.R.squared.for.model.vs.null
## Pseudo.R.squared
## McFadden NA
## Cox and Snell (ML) NA
## Nagelkerke (Cragg and Uhler) NA
##
## $Likelihood.ratio.test
## Df.diff LogLik.diff Chisq p.value
## NA NA NA NA
##
## $Number.of.observations
##
## Model: NA
## Null: NA
##
## $Messages
## [1] "This function will work with lm, gls, lme, lmer, glmer, glm, negbin, zeroinfl, glmmTMB, nls, clm, clmm, and vglm"
##
## $Warnings
## [1] "None"
r2 <- hasil_nagelkerke$Pseudo.R.squared.for.model.vs.null[3]
cat("\nNagelkerke R²:", round(r2, 4), "\n")##
## Nagelkerke R²: NA
## Model mampu menjelaskan NA % variasi variabel respon
Odds Ratio = exp(β). Interpretasi:
odds_ratio <- exp(coef(model))
odds_ratio %>%
round(4) %>%
kable(caption = "Tabel 4. Odds Ratio (exp(β))") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE)| (Intercept) | PhysHealth2 | PhysHealth3 | PhysHealth4 | PhysHealth5 | Employment2 | Employment3 | Employment4 | Medication1 | SleepMed2 | SleepMed3 | Race2 | Race3 | Race4 | Race5 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Sedang | 8.9180 | 1.6545 | 1.4818 | 2.1039 | 1.7042 | 0.9014 | 1.1104 | 0.2082 | 2.7984 | 0.2852 | 0.1845 | 1.2781 | 0.2966 | 0.5746 | 907263.1 |
| Banyak | 4.4821 | 1.3729 | 2.1155 | 3.6358 | 2.5516 | 1.4112 | 1.9807 | 1.6419 | 5.8083 | 0.1663 | 0.0932 | 0.6736 | 0.7344 | 0.2327 | 617207.8 |
pred <- predict(model, newdata = df)
conf_matrix <- table(Prediksi = pred, Aktual = df$Y)
conf_matrix %>%
kable(caption = "Tabel 5. Confusion Matrix") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Sedikit | Sedang | Banyak | |
|---|---|---|---|
| Sedikit | 5 | 4 | 4 |
| Sedang | 113 | 336 | 167 |
| Banyak | 8 | 23 | 36 |
##
## Akurasi Model: 54.17 %
## Ringkasan Hasil Analisis:
## - Total observasi : 696
## - Variabel signifikan : PhysHealth, Employment, Medication, SleepMed, Race
## - Kategori referensi : Sedikit (Y=1)
## - Model logit : 2 model (Sedang vs Sedikit, Banyak vs Sedikit)
## - Akurasi model : 54.17 %
Kelas 2024C | Dosen Pengampu: Ulfa Siti Nuraini, S.Stat., M.Stat