Analisis ini bertujuan untuk memodelkan jumlah dokter yang dikunjungi oleh lansia menggunakan Regresi Logistik Multinomial. Dataset yang digunakan adalah National Poll on Healthy Aging (NPHA) yang berisi informasi kesehatan lansia di Amerika Serikat.
Variabel Respon (Y): Number of Doctors Visited
- 1 = Sedikit
- 2 = Sedang
- 3 = Banyak
Variabel Prediktor: kondisi kesehatan fisik, mental, gigi, status pekerjaan, gangguan tidur, ras, dan jenis kelamin.
# install.packages("nnet")
# install.packages("car")
# install.packages("DescTools")
# install.packages("tidyverse")
# install.packages("caret")
# install.packages("knitr")
# install.packages("kableExtra")
library(nnet)
library(car)
library(DescTools)
library(tidyverse)
library(caret)
library(knitr)
library(kableExtra)
df <- read.csv("NPHA-doctor-visits.csv")
cat("Dimensi data:", dim(df), "\n")
## Dimensi data: 714 15
cat("Jumlah baris:", nrow(df), "\n")
## Jumlah baris: 714
cat("Jumlah kolom:", ncol(df), "\n")
## Jumlah kolom: 15
print(colnames(df))
## [1] "Number.of.Doctors.Visited"
## [2] "Age"
## [3] "Phyiscal.Health"
## [4] "Mental.Health"
## [5] "Dental.Health"
## [6] "Employment"
## [7] "Stress.Keeps.Patient.from.Sleeping"
## [8] "Medication.Keeps.Patient.from.Sleeping"
## [9] "Pain.Keeps.Patient.from.Sleeping"
## [10] "Bathroom.Needs.Keeps.Patient.from.Sleeping"
## [11] "Uknown.Keeps.Patient.from.Sleeping"
## [12] "Trouble.Sleeping"
## [13] "Prescription.Sleep.Medication"
## [14] "Race"
## [15] "Gender"
Kolom Age tidak informatif karena seluruh nilai nya sama (= 2), sehingga dihapus.
df <- df[, -2]
cat("Kolom setelah Age dihapus:", ncol(df), "kolom\n")
## Kolom setelah Age dihapus: 14 kolom
colnames(df) <- c(
"Y", # Number of Doctors Visited (VARIABEL RESPON)
"PhysHealth", # Physical Health
"MentHealth", # Mental Health
"DentHealth", # Dental Health
"Employment", # Status pekerjaan
"Stress", # Stress mengganggu tidur
"Medication", # Obat mengganggu tidur
"Pain", # Nyeri mengganggu tidur
"Bathroom", # Kebutuhan kamar mandi mengganggu tidur
"Unknown", # Tidak diketahui mengganggu tidur
"TroubleSleep", # Masalah tidur
"SleepMed", # Obat tidur resep
"Race", # Ras
"Gender" # Jenis kelamin
)
cat("Nama kolom setelah rename:\n")
## Nama kolom setelah rename:
print(colnames(df))
## [1] "Y" "PhysHealth" "MentHealth" "DentHealth" "Employment"
## [6] "Stress" "Medication" "Pain" "Bathroom" "Unknown"
## [11] "TroubleSleep" "SleepMed" "Race" "Gender"
Nilai -1 menandakan responden menolak menjawab (missing), sehingga perlu dihapus.
cat("Jumlah nilai -1 sebelum cleaning:", sum(df == -1), "\n")
## Jumlah nilai -1 sebelum cleaning: 20
df <- df[!apply(df == -1, 1, any), ]
cat("Dimensi data setelah cleaning:", dim(df), "\n")
## Dimensi data setelah cleaning: 696 14
cat("Jumlah observasi:", nrow(df), "\n")
## Jumlah observasi: 696
# Variabel respon
df$Y <- factor(df$Y, levels = c(1, 2, 3),
labels = c("Sedikit", "Sedang", "Banyak"))
# Variabel prediktor kategorik
df$PhysHealth <- factor(df$PhysHealth)
df$MentHealth <- factor(df$MentHealth)
df$DentHealth <- factor(df$DentHealth)
df$Employment <- factor(df$Employment)
df$Stress <- factor(df$Stress)
df$Medication <- factor(df$Medication)
df$Pain <- factor(df$Pain)
df$Bathroom <- factor(df$Bathroom)
df$Unknown <- factor(df$Unknown)
df$TroubleSleep <- factor(df$TroubleSleep)
df$SleepMed <- factor(df$SleepMed)
df$Race <- factor(df$Race)
df$Gender <- factor(df$Gender)
# Cek struktur akhir
str(df)
## 'data.frame': 696 obs. of 14 variables:
## $ Y : Factor w/ 3 levels "Sedikit","Sedang",..: 3 2 3 1 3 2 3 2 2 1 ...
## $ PhysHealth : Factor w/ 5 levels "1","2","3","4",..: 4 4 3 3 3 3 4 3 3 2 ...
## $ MentHealth : Factor w/ 5 levels "1","2","3","4",..: 3 2 2 2 3 2 1 2 1 1 ...
## $ DentHealth : Factor w/ 6 levels "1","2","3","4",..: 3 3 3 3 3 4 1 6 2 3 ...
## $ Employment : Factor w/ 4 levels "1","2","3","4": 3 3 3 3 3 3 3 3 3 1 ...
## $ Stress : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 2 1 1 ...
## $ Medication : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ Pain : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 2 1 ...
## $ Bathroom : Factor w/ 2 levels "0","1": 1 2 1 2 1 2 2 1 2 1 ...
## $ Unknown : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 1 1 2 ...
## $ TroubleSleep: Factor w/ 3 levels "1","2","3": 2 3 3 3 2 3 2 3 3 3 ...
## $ SleepMed : Factor w/ 3 levels "1","2","3": 3 3 3 3 3 3 1 3 3 3 ...
## $ Race : Factor w/ 5 levels "1","2","3","4",..: 1 1 4 4 1 1 1 1 1 1 ...
## $ Gender : Factor w/ 2 levels "1","2": 2 1 1 2 2 1 1 1 2 1 ...
tabel_Y <- table(df$Y) persen_Y <- round(prop.table(tabel_Y) * 100, 2) tabel_respon <- data.frame( Kategori = names(tabel_Y), Frekuensi = as.numeric(tabel_Y), Persentase = paste0(as.numeric(persen_Y), “%”) ) tabel_respon %>% kable(caption = “Distribusi Variabel Respon (Y)”) %>% kable_styling(bootstrap_options = c(“striped”, “hover”), full_width = FALSE)
barplot(tabel_Y, main = “Distribusi Jumlah Dokter yang Dikunjungi”, xlab = “Kategori”, ylab = “Frekuensi”, col = c(“steelblue”, “orange”, “tomato”), names.arg = c(“Sedikit (1)”, “Sedang (2)”, “Banyak (3)”))
Hipotesis:
- H₀: Tidak ada hubungan antara prediktor dengan Y
- H₁: Ada hubungan antara prediktor dengan Y
- Keputusan: Tolak H₀ jika p-value < 0.05
prediktor <- c("PhysHealth", "MentHealth", "DentHealth", "Employment",
"Stress", "Medication", "Pain", "Bathroom", "Unknown",
"TroubleSleep", "SleepMed", "Race", "Gender")
hasil_chisq <- data.frame(
Variabel = prediktor,
ChiSquare = NA,
Pvalue = NA,
Keputusan = NA
)
for (i in seq_along(prediktor)) {
var <- prediktor[i]
tbl <- table(df[[var]], df$Y)
uji <- chisq.test(tbl)
hasil_chisq$ChiSquare[i] <- round(uji$statistic, 4)
hasil_chisq$Pvalue[i] <- round(uji$p.value, 4)
hasil_chisq$Keputusan[i] <- ifelse(uji$p.value < 0.05, "Tolak H0", "Gagal Tolak H0")
}
print(hasil_chisq)
## Variabel ChiSquare Pvalue Keputusan
## 1 PhysHealth 22.5502 0.0040 Tolak H0
## 2 MentHealth 6.5532 0.5855 Gagal Tolak H0
## 3 DentHealth 11.2035 0.3419 Gagal Tolak H0
## 4 Employment 14.5235 0.0243 Tolak H0
## 5 Stress 3.6019 0.1651 Gagal Tolak H0
## 6 Medication 10.8690 0.0044 Tolak H0
## 7 Pain 4.2467 0.1196 Gagal Tolak H0
## 8 Bathroom 2.4595 0.2924 Gagal Tolak H0
## 9 Unknown 0.4441 0.8009 Gagal Tolak H0
## 10 TroubleSleep 5.5666 0.2339 Gagal Tolak H0
## 11 SleepMed 16.9255 0.0020 Tolak H0
## 12 Race 19.9840 0.0104 Tolak H0
## 13 Gender 1.2798 0.5274 Gagal Tolak H0
# Variabel signifikan
var_signifikan <- hasil_chisq$Variabel[hasil_chisq$Pvalue < 0.05]
cat("\nVariabel signifikan:", paste(var_signifikan, collapse = ", "), "\n")
##
## Variabel signifikan: PhysHealth, Employment, Medication, SleepMed, Race