# Multinomial Logistic Regression
# Install packages
# install.packages("nnet")
# install.packages("ucimlrepo")
# Load packages
library(nnet)
library(ucimlrepo)
## Warning: package 'ucimlrepo' was built under R version 4.4.1
# Multinomial Logistic Regression
library(nnet)
# Load kembali
obesity_data <- read.csv("~/Documents/S1 Aida/Semester 2 2025/STIK4362 Analisis Data Kategorik/Tugas 2/ObesityDataSet_raw_and_data_sinthetic.csv")
# Lihat 6 baris pertama
head(obesity_data)
## Gender Age Height Weight family_history_with_overweight FAVC FCVC NCP
## 1 Female 21 1.62 64.0 yes no 2 3
## 2 Female 21 1.52 56.0 yes no 3 3
## 3 Male 23 1.80 77.0 yes no 2 3
## 4 Male 27 1.80 87.0 no no 3 3
## 5 Male 22 1.78 89.8 no no 2 1
## 6 Male 29 1.62 53.0 no yes 2 3
## CAEC SMOKE CH2O SCC FAF TUE CALC MTRANS
## 1 Sometimes no 2 no 0 1 no Public_Transportation
## 2 Sometimes yes 3 yes 3 0 Sometimes Public_Transportation
## 3 Sometimes no 2 no 2 1 Frequently Public_Transportation
## 4 Sometimes no 2 no 2 0 Frequently Walking
## 5 Sometimes no 2 no 0 0 Sometimes Public_Transportation
## 6 Sometimes no 2 no 0 0 Sometimes Automobile
## NObeyesdad
## 1 Normal_Weight
## 2 Normal_Weight
## 3 Normal_Weight
## 4 Overweight_Level_I
## 5 Overweight_Level_II
## 6 Normal_Weight
# Ubah target menjadi faktor nominal (multinomial)
obesity_data$NObeyesdad <- factor(obesity_data$NObeyesdad)
levels(obesity_data$NObeyesdad)
## [1] "Insufficient_Weight" "Normal_Weight" "Obesity_Type_I"
## [4] "Obesity_Type_II" "Obesity_Type_III" "Overweight_Level_I"
## [7] "Overweight_Level_II"
# Cek variabel yang bermasalah (jika ada)
problem_vars <- names(which(sapply(obesity_data, function(x) length(unique(x)) < 2)))
problem_vars
## character(0)
head(obesity_data)
## Gender Age Height Weight family_history_with_overweight FAVC FCVC NCP
## 1 Female 21 1.62 64.0 yes no 2 3
## 2 Female 21 1.52 56.0 yes no 3 3
## 3 Male 23 1.80 77.0 yes no 2 3
## 4 Male 27 1.80 87.0 no no 3 3
## 5 Male 22 1.78 89.8 no no 2 1
## 6 Male 29 1.62 53.0 no yes 2 3
## CAEC SMOKE CH2O SCC FAF TUE CALC MTRANS
## 1 Sometimes no 2 no 0 1 no Public_Transportation
## 2 Sometimes yes 3 yes 3 0 Sometimes Public_Transportation
## 3 Sometimes no 2 no 2 1 Frequently Public_Transportation
## 4 Sometimes no 2 no 2 0 Frequently Walking
## 5 Sometimes no 2 no 0 0 Sometimes Public_Transportation
## 6 Sometimes no 2 no 0 0 Sometimes Automobile
## NObeyesdad
## 1 Normal_Weight
## 2 Normal_Weight
## 3 Normal_Weight
## 4 Overweight_Level_I
## 5 Overweight_Level_II
## 6 Normal_Weight
# Define X variables (beberapa kategorikal harus difaktorkan)
obesity_data$Gender <- factor(obesity_data$Gender)
obesity_data$FAVC <- factor(obesity_data$FAVC)
obesity_data$Age <- as.numeric(obesity_data$Age)
obesity_data$Weight <- as.numeric(obesity_data$Weight)
obesity_data$CAEC <- factor(obesity_data$CAEC)
levels(obesity_data$Gender)
## [1] "Female" "Male"
levels(obesity_data$FAVC)
## [1] "no" "yes"
levels(obesity_data$CAEC)
## [1] "Always" "Frequently" "no" "Sometimes"
# Multinomial logistic regression Using multinom
model_mul <- multinom(NObeyesdad ~ Gender + Weight + FAVC + Age + CAEC,
data = obesity_data)
## # weights: 63 (48 variable)
## initial value 4107.816325
## iter 10 value 3878.637204
## iter 20 value 2187.218470
## iter 30 value 1689.213946
## iter 40 value 1243.072066
## iter 50 value 1192.362846
## iter 60 value 1174.898832
## iter 70 value 1170.663917
## iter 80 value 1169.639516
## iter 90 value 1168.653376
## iter 100 value 1168.079757
## final value 1168.079757
## stopped after 100 iterations
# Summary of model
summary(model_mul)
## Call:
## multinom(formula = NObeyesdad ~ Gender + Weight + FAVC + Age +
## CAEC, data = obesity_data)
##
## Coefficients:
## (Intercept) GenderMale Weight FAVCyes Age
## Normal_Weight -15.85475 -2.903833 0.3593537 -0.9404953 0.09528993
## Obesity_Type_I -74.04415 -11.615202 1.1030183 -1.2788254 0.18357115
## Obesity_Type_II -103.71274 -11.296107 1.3183863 -0.8235578 0.32504625
## Obesity_Type_III -140.78921 -34.860692 1.7199104 -1.1488116 0.32948047
## Overweight_Level_I -37.27957 -6.429666 0.6381488 0.5081753 0.10863023
## Overweight_Level_II -50.73985 -6.962977 0.8011605 -2.2965782 0.19106307
## CAECFrequently CAECno CAECSometimes
## Normal_Weight -4.450959 -3.4806716 -3.56687305
## Obesity_Type_I -5.366421 0.1612546 -0.04692189
## Obesity_Type_II -3.594695 1.1781689 2.22783607
## Obesity_Type_III -2.008939 13.6185212 3.04464754
## Overweight_Level_I -3.956591 1.6779495 -0.56797082
## Overweight_Level_II -3.170271 0.2644646 0.77222312
##
## Std. Errors:
## (Intercept) GenderMale Weight FAVCyes Age
## Normal_Weight 1.5565252 0.3962635 0.03035098 0.3529192 0.04134082
## Obesity_Type_I 1.5254576 0.6104703 0.03540896 0.6713518 0.04724586
## Obesity_Type_II 2.1863195 1.0119013 0.03661424 1.1102770 0.05106199
## Obesity_Type_III 0.3017596 1.7482096 0.04382323 0.4028078 0.11650705
## Overweight_Level_I 1.6191294 0.5300536 0.03384563 0.4839264 0.04570086
## Overweight_Level_II 1.6093100 0.5499429 0.03421123 0.5073307 0.04640003
## CAECFrequently CAECno CAECSometimes
## Normal_Weight 1.0115244 1.282190 0.9674804
## Obesity_Type_I 1.4551093 1.323655 1.2038099
## Obesity_Type_II 2.1412354 1.306965 1.5665825
## Obesity_Type_III 0.3569354 1.537263 2.2118558
## Overweight_Level_I 1.1880666 1.379769 1.0763519
## Overweight_Level_II 1.2859180 1.777602 1.1537295
##
## Residual Deviance: 2336.16
## AIC: 2432.16
# Extract coefficients
coefs <- summary(model_mul)$coefficients
std_err <- summary(model_mul)$standard.errors
# Generate p-value (Wald test)
z_values <- coefs / std_err
p_values <- 2 * (1 - pnorm(abs(z_values)))
# Gabungkan ke dalam tabel
coefs_table <- cbind(coefs, "p value" = p_values)
coefs_table
## (Intercept) GenderMale Weight FAVCyes Age
## Normal_Weight -15.85475 -2.903833 0.3593537 -0.9404953 0.09528993
## Obesity_Type_I -74.04415 -11.615202 1.1030183 -1.2788254 0.18357115
## Obesity_Type_II -103.71274 -11.296107 1.3183863 -0.8235578 0.32504625
## Obesity_Type_III -140.78921 -34.860692 1.7199104 -1.1488116 0.32948047
## Overweight_Level_I -37.27957 -6.429666 0.6381488 0.5081753 0.10863023
## Overweight_Level_II -50.73985 -6.962977 0.8011605 -2.2965782 0.19106307
## CAECFrequently CAECno CAECSometimes (Intercept)
## Normal_Weight -4.450959 -3.4806716 -3.56687305 0
## Obesity_Type_I -5.366421 0.1612546 -0.04692189 0
## Obesity_Type_II -3.594695 1.1781689 2.22783607 0
## Obesity_Type_III -2.008939 13.6185212 3.04464754 0
## Overweight_Level_I -3.956591 1.6779495 -0.56797082 0
## Overweight_Level_II -3.170271 0.2644646 0.77222312 0
## GenderMale Weight FAVCyes Age
## Normal_Weight 2.335909e-13 0 7.701063e-03 2.116748e-02
## Obesity_Type_I 0.000000e+00 0 5.679942e-02 1.021431e-04
## Obesity_Type_II 0.000000e+00 0 4.582335e-01 1.943776e-10
## Obesity_Type_III 0.000000e+00 0 4.344383e-03 4.684166e-03
## Overweight_Level_I 0.000000e+00 0 2.936682e-01 1.745484e-02
## Overweight_Level_II 0.000000e+00 0 5.988716e-06 3.826129e-05
## CAECFrequently CAECno CAECSometimes
## Normal_Weight 1.081270e-05 0.006635003 0.0002271229
## Obesity_Type_I 2.260370e-04 0.903037460 0.9689080696
## Obesity_Type_II 9.319207e-02 0.367346893 0.1549973850
## Obesity_Type_III 1.819954e-08 0.000000000 0.1686629761
## Overweight_Level_I 8.675949e-04 0.223943562 0.5977205280
## Overweight_Level_II 1.368696e-02 0.881730345 0.5032864407
# Odds Ratio
OR <- exp(coefs)
OR
## (Intercept) GenderMale Weight FAVCyes Age
## Normal_Weight 1.301283e-07 5.481273e-02 1.432403 0.3904344 1.099978
## Obesity_Type_I 6.966789e-33 9.027794e-06 3.013247 0.2783641 1.201500
## Obesity_Type_II 9.080876e-46 1.242119e-05 3.737385 0.4388675 1.384095
## Obesity_Type_III 7.178294e-62 7.247590e-16 5.584028 0.3170133 1.390246
## Overweight_Level_I 6.451901e-17 1.612990e-03 1.892973 1.6622553 1.114750
## Overweight_Level_II 9.203714e-23 9.462757e-04 2.228125 0.1006025 1.210536
## CAECFrequently CAECno CAECSometimes
## Normal_Weight 0.011667378 3.078673e-02 0.02824403
## Obesity_Type_I 0.004670817 1.174984e+00 0.95416192
## Obesity_Type_II 0.027469073 3.248421e+00 9.27976360
## Obesity_Type_III 0.134130868 8.211993e+05 21.00262725
## Overweight_Level_I 0.019128202 5.354565e+00 0.56667415
## Overweight_Level_II 0.041992212 1.302733e+00 2.16457302
# Model null (tanpa prediktor)
model_null <- multinom(NObeyesdad ~ 1, data = obesity_data, trace = FALSE)
# Likelihood ratio test manual
loglik_full <- logLik(model_mul)
loglik_null <- logLik(model_null)
# Hitung G
G <- -2 * (loglik_null - loglik_full)
G
## 'log Lik.' 5865.469 (df=6)
# Derajat bebas
df_lr <- attr(loglik_full, "df") - attr(loglik_null, "df")
# p-value LRT
p_lr <- 1 - pchisq(G, df_lr)
p_lr
## 'log Lik.' 0 (df=6)
# Bila ingin via anova
anova(model_null, model_mul)
## Model Resid. df Resid. Dev Test Df
## 1 1 12660 8201.628 NA
## 2 Gender + Weight + FAVC + Age + CAEC 12618 2336.160 1 vs 2 42
## LR stat. Pr(Chi)
## 1 NA NA
## 2 5865.469 0
# --- Wald Test untuk seluruh koefisien model_multinom ---
# Ambil koefisien dan standard errors
coefs <- summary(model_mul)$coefficients
std_err <- summary(model_mul)$standard.errors
# Hitung z-statistic
z_values <- coefs / std_err
# Hitung p-value
p_values <- 2 * (1 - pnorm(abs(z_values)))
# Buat interpretasi otomatis
signif_label <- ifelse(p_values <= 0.05, "Signifikan (p <= 0.05)", "Tidak Signifikan (p > 0.05)")
# Bulatkan semua nilai numerik menjadi 3 angka di belakang koma
coefs_round <- round(coefs, 3)
std_err_round <- round(std_err, 3)
z_values_round <- round(z_values, 3)
p_values_round <- round(p_values, 3)
# Gabungkan ke dalam tabel
wald_table <- cbind(coefs_round, "p-value" = p_values_round, Interpretasi = signif_label)
# Cetak tabel
wald_table
## (Intercept) GenderMale Weight FAVCyes Age
## Normal_Weight "-15.855" "-2.904" "0.359" "-0.94" "0.095"
## Obesity_Type_I "-74.044" "-11.615" "1.103" "-1.279" "0.184"
## Obesity_Type_II "-103.713" "-11.296" "1.318" "-0.824" "0.325"
## Obesity_Type_III "-140.789" "-34.861" "1.72" "-1.149" "0.329"
## Overweight_Level_I "-37.28" "-6.43" "0.638" "0.508" "0.109"
## Overweight_Level_II "-50.74" "-6.963" "0.801" "-2.297" "0.191"
## CAECFrequently CAECno CAECSometimes (Intercept)
## Normal_Weight "-4.451" "-3.481" "-3.567" "0"
## Obesity_Type_I "-5.366" "0.161" "-0.047" "0"
## Obesity_Type_II "-3.595" "1.178" "2.228" "0"
## Obesity_Type_III "-2.009" "13.619" "3.045" "0"
## Overweight_Level_I "-3.957" "1.678" "-0.568" "0"
## Overweight_Level_II "-3.17" "0.264" "0.772" "0"
## GenderMale Weight FAVCyes Age CAECFrequently CAECno
## Normal_Weight "0" "0" "0.008" "0.021" "0" "0.007"
## Obesity_Type_I "0" "0" "0.057" "0" "0" "0.903"
## Obesity_Type_II "0" "0" "0.458" "0" "0.093" "0.367"
## Obesity_Type_III "0" "0" "0.004" "0.005" "0" "0"
## Overweight_Level_I "0" "0" "0.294" "0.017" "0.001" "0.224"
## Overweight_Level_II "0" "0" "0" "0" "0.014" "0.882"
## CAECSometimes (Intercept)
## Normal_Weight "0" "Signifikan (p <= 0.05)"
## Obesity_Type_I "0.969" "Signifikan (p <= 0.05)"
## Obesity_Type_II "0.155" "Signifikan (p <= 0.05)"
## Obesity_Type_III "0.169" "Signifikan (p <= 0.05)"
## Overweight_Level_I "0.598" "Signifikan (p <= 0.05)"
## Overweight_Level_II "0.503" "Signifikan (p <= 0.05)"
## GenderMale Weight
## Normal_Weight "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_I "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_II "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_III "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Overweight_Level_I "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Overweight_Level_II "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## FAVCyes Age
## Normal_Weight "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_I "Tidak Signifikan (p > 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_II "Tidak Signifikan (p > 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_III "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Overweight_Level_I "Tidak Signifikan (p > 0.05)" "Signifikan (p <= 0.05)"
## Overweight_Level_II "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## CAECFrequently CAECno
## Normal_Weight "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_I "Signifikan (p <= 0.05)" "Tidak Signifikan (p > 0.05)"
## Obesity_Type_II "Tidak Signifikan (p > 0.05)" "Tidak Signifikan (p > 0.05)"
## Obesity_Type_III "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Overweight_Level_I "Signifikan (p <= 0.05)" "Tidak Signifikan (p > 0.05)"
## Overweight_Level_II "Signifikan (p <= 0.05)" "Tidak Signifikan (p > 0.05)"
## CAECSometimes
## Normal_Weight "Signifikan (p <= 0.05)"
## Obesity_Type_I "Tidak Signifikan (p > 0.05)"
## Obesity_Type_II "Tidak Signifikan (p > 0.05)"
## Obesity_Type_III "Tidak Signifikan (p > 0.05)"
## Overweight_Level_I "Tidak Signifikan (p > 0.05)"
## Overweight_Level_II "Tidak Signifikan (p > 0.05)"
library(ResourceSelection)
## ResourceSelection 0.3-6 2023-06-27
prob_pred <- predict(model_mul, newdata = obesity_data, type = "probs")
head(prob_pred) # probabilitas prediksi untuk setiap kelas
## Insufficient_Weight Normal_Weight Obesity_Type_I Obesity_Type_II
## 1 1.500139e-03 0.396891519 2.143946e-03 5.135969e-08
## 2 5.730588e-02 0.855494295 1.205046e-05 5.154032e-11
## 3 7.738641e-05 0.145112061 2.433485e-03 1.749979e-06
## 4 1.497370e-08 0.001494709 6.055309e-02 6.607532e-04
## 5 3.807196e-09 0.000645500 1.349243e-01 1.326406e-03
## 6 8.102988e-01 0.188785103 6.797223e-11 1.024973e-15
## Obesity_Type_III Overweight_Level_I Overweight_Level_II
## 1 1.458797e-12 0.2931759403 3.062884e-01
## 2 5.894973e-17 0.0679266433 1.926113e-02
## 3 5.410327e-19 0.1214861897 7.308891e-01
## 4 1.152736e-14 0.0214459981 9.158454e-01
## 5 6.966292e-14 0.0189120915 8.441917e-01
## 6 1.534930e-32 0.0009053199 1.080828e-05
# Target
y <- obesity_data$NObeyesdad
# Loop untuk setiap kelas
hl_results <- list()
classes <- levels(y)
for (cls in classes) {
# Buat biner: 1 jika kelas ini, 0 jika bukan
y_bin <- ifelse(y == cls, 1, 0)
prob_cls <- prob_pred[, cls]
# Hosmer-Lemeshow test
hl <- hoslem.test(y_bin, prob_cls, g = 10) # g = jumlah grup
hl_results[[cls]] <- hl
print(paste("Hosmer-Lemeshow Test for class:", cls))
print(hl)
}
## [1] "Hosmer-Lemeshow Test for class: Insufficient_Weight"
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: y_bin, prob_cls
## X-squared = 12.881, df = 8, p-value = 0.116
##
## [1] "Hosmer-Lemeshow Test for class: Normal_Weight"
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: y_bin, prob_cls
## X-squared = 12.792, df = 8, p-value = 0.1192
##
## [1] "Hosmer-Lemeshow Test for class: Obesity_Type_I"
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: y_bin, prob_cls
## X-squared = 14.115, df = 8, p-value = 0.07883
##
## [1] "Hosmer-Lemeshow Test for class: Obesity_Type_II"
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: y_bin, prob_cls
## X-squared = 6.8535, df = 8, p-value = 0.5525
##
## [1] "Hosmer-Lemeshow Test for class: Obesity_Type_III"
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: y_bin, prob_cls
## X-squared = 0.025853, df = 8, p-value = 1
##
## [1] "Hosmer-Lemeshow Test for class: Overweight_Level_I"
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: y_bin, prob_cls
## X-squared = 13.275, df = 8, p-value = 0.1027
##
## [1] "Hosmer-Lemeshow Test for class: Overweight_Level_II"
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: y_bin, prob_cls
## X-squared = 9.7703, df = 8, p-value = 0.2815
# Interpretasi sederhana
# Loop untuk cetak interpretasi tiap kelas
for (cls in names(hl_results)) {
hl <- hl_results[[cls]]
cat("\n===========================\n")
cat("Kelas:", cls, "\n")
cat("p-value Hosmer-Lemeshow:", round(hl$p.value, 5), "\n")
if (hl$p.value > 0.05) {
cat("Interpretasi: Model fit untuk kelas", cls, "(tidak ada bukti penolakan H0)\n")
} else {
cat("Interpretasi: Model TIDAK fit untuk kelas", cls, "(ada bukti model kurang sesuai)\n")
}
}
##
## ===========================
## Kelas: Insufficient_Weight
## p-value Hosmer-Lemeshow: 0.116
## Interpretasi: Model fit untuk kelas Insufficient_Weight (tidak ada bukti penolakan H0)
##
## ===========================
## Kelas: Normal_Weight
## p-value Hosmer-Lemeshow: 0.1192
## Interpretasi: Model fit untuk kelas Normal_Weight (tidak ada bukti penolakan H0)
##
## ===========================
## Kelas: Obesity_Type_I
## p-value Hosmer-Lemeshow: 0.07883
## Interpretasi: Model fit untuk kelas Obesity_Type_I (tidak ada bukti penolakan H0)
##
## ===========================
## Kelas: Obesity_Type_II
## p-value Hosmer-Lemeshow: 0.55252
## Interpretasi: Model fit untuk kelas Obesity_Type_II (tidak ada bukti penolakan H0)
##
## ===========================
## Kelas: Obesity_Type_III
## p-value Hosmer-Lemeshow: 1
## Interpretasi: Model fit untuk kelas Obesity_Type_III (tidak ada bukti penolakan H0)
##
## ===========================
## Kelas: Overweight_Level_I
## p-value Hosmer-Lemeshow: 0.10274
## Interpretasi: Model fit untuk kelas Overweight_Level_I (tidak ada bukti penolakan H0)
##
## ===========================
## Kelas: Overweight_Level_II
## p-value Hosmer-Lemeshow: 0.28152
## Interpretasi: Model fit untuk kelas Overweight_Level_II (tidak ada bukti penolakan H0)
# Opsional: ringkasan semua p-value
hl_summary <- sapply(hl_results, function(x) x$p.value)
hl_summary
## Insufficient_Weight Normal_Weight Obesity_Type_I Obesity_Type_II
## 0.11599976 0.11919787 0.07882662 0.55251932
## Obesity_Type_III Overweight_Level_I Overweight_Level_II
## 1.00000000 0.10273600 0.28151686