# Multinomial Logistic Regression
# Install packages
# install.packages("nnet")
# install.packages("ucimlrepo")

# Load packages
library(nnet)
library(ucimlrepo)
## Warning: package 'ucimlrepo' was built under R version 4.4.1
# Multinomial Logistic Regression
library(nnet)

# Load kembali
obesity_data <- read.csv("~/Documents/S1 Aida/Semester 2 2025/STIK4362 Analisis Data Kategorik/Tugas 2/ObesityDataSet_raw_and_data_sinthetic.csv")

# Lihat 6 baris pertama
head(obesity_data)
##   Gender Age Height Weight family_history_with_overweight FAVC FCVC NCP
## 1 Female  21   1.62   64.0                            yes   no    2   3
## 2 Female  21   1.52   56.0                            yes   no    3   3
## 3   Male  23   1.80   77.0                            yes   no    2   3
## 4   Male  27   1.80   87.0                             no   no    3   3
## 5   Male  22   1.78   89.8                             no   no    2   1
## 6   Male  29   1.62   53.0                             no  yes    2   3
##        CAEC SMOKE CH2O SCC FAF TUE       CALC                MTRANS
## 1 Sometimes    no    2  no   0   1         no Public_Transportation
## 2 Sometimes   yes    3 yes   3   0  Sometimes Public_Transportation
## 3 Sometimes    no    2  no   2   1 Frequently Public_Transportation
## 4 Sometimes    no    2  no   2   0 Frequently               Walking
## 5 Sometimes    no    2  no   0   0  Sometimes Public_Transportation
## 6 Sometimes    no    2  no   0   0  Sometimes            Automobile
##            NObeyesdad
## 1       Normal_Weight
## 2       Normal_Weight
## 3       Normal_Weight
## 4  Overweight_Level_I
## 5 Overweight_Level_II
## 6       Normal_Weight
# Ubah target menjadi faktor nominal (multinomial)
obesity_data$NObeyesdad <- factor(obesity_data$NObeyesdad)
levels(obesity_data$NObeyesdad)
## [1] "Insufficient_Weight" "Normal_Weight"       "Obesity_Type_I"     
## [4] "Obesity_Type_II"     "Obesity_Type_III"    "Overweight_Level_I" 
## [7] "Overweight_Level_II"
# Cek variabel yang bermasalah (jika ada)
problem_vars <- names(which(sapply(obesity_data, function(x) length(unique(x)) < 2)))
problem_vars
## character(0)
head(obesity_data)
##   Gender Age Height Weight family_history_with_overweight FAVC FCVC NCP
## 1 Female  21   1.62   64.0                            yes   no    2   3
## 2 Female  21   1.52   56.0                            yes   no    3   3
## 3   Male  23   1.80   77.0                            yes   no    2   3
## 4   Male  27   1.80   87.0                             no   no    3   3
## 5   Male  22   1.78   89.8                             no   no    2   1
## 6   Male  29   1.62   53.0                             no  yes    2   3
##        CAEC SMOKE CH2O SCC FAF TUE       CALC                MTRANS
## 1 Sometimes    no    2  no   0   1         no Public_Transportation
## 2 Sometimes   yes    3 yes   3   0  Sometimes Public_Transportation
## 3 Sometimes    no    2  no   2   1 Frequently Public_Transportation
## 4 Sometimes    no    2  no   2   0 Frequently               Walking
## 5 Sometimes    no    2  no   0   0  Sometimes Public_Transportation
## 6 Sometimes    no    2  no   0   0  Sometimes            Automobile
##            NObeyesdad
## 1       Normal_Weight
## 2       Normal_Weight
## 3       Normal_Weight
## 4  Overweight_Level_I
## 5 Overweight_Level_II
## 6       Normal_Weight
# Define X variables (beberapa kategorikal harus difaktorkan)
obesity_data$Gender <- factor(obesity_data$Gender)
obesity_data$FAVC <- factor(obesity_data$FAVC)
obesity_data$Age <- as.numeric(obesity_data$Age)
obesity_data$Weight <- as.numeric(obesity_data$Weight)
obesity_data$CAEC <- factor(obesity_data$CAEC)
levels(obesity_data$Gender)
## [1] "Female" "Male"
levels(obesity_data$FAVC)
## [1] "no"  "yes"
levels(obesity_data$CAEC)
## [1] "Always"     "Frequently" "no"         "Sometimes"
# Multinomial logistic regression Using multinom
model_mul <- multinom(NObeyesdad ~ Gender + Weight + FAVC + Age + CAEC,
                      data = obesity_data)
## # weights:  63 (48 variable)
## initial  value 4107.816325 
## iter  10 value 3878.637204
## iter  20 value 2187.218470
## iter  30 value 1689.213946
## iter  40 value 1243.072066
## iter  50 value 1192.362846
## iter  60 value 1174.898832
## iter  70 value 1170.663917
## iter  80 value 1169.639516
## iter  90 value 1168.653376
## iter 100 value 1168.079757
## final  value 1168.079757 
## stopped after 100 iterations
# Summary of model
summary(model_mul)
## Call:
## multinom(formula = NObeyesdad ~ Gender + Weight + FAVC + Age + 
##     CAEC, data = obesity_data)
## 
## Coefficients:
##                     (Intercept) GenderMale    Weight    FAVCyes        Age
## Normal_Weight         -15.85475  -2.903833 0.3593537 -0.9404953 0.09528993
## Obesity_Type_I        -74.04415 -11.615202 1.1030183 -1.2788254 0.18357115
## Obesity_Type_II      -103.71274 -11.296107 1.3183863 -0.8235578 0.32504625
## Obesity_Type_III     -140.78921 -34.860692 1.7199104 -1.1488116 0.32948047
## Overweight_Level_I    -37.27957  -6.429666 0.6381488  0.5081753 0.10863023
## Overweight_Level_II   -50.73985  -6.962977 0.8011605 -2.2965782 0.19106307
##                     CAECFrequently     CAECno CAECSometimes
## Normal_Weight            -4.450959 -3.4806716   -3.56687305
## Obesity_Type_I           -5.366421  0.1612546   -0.04692189
## Obesity_Type_II          -3.594695  1.1781689    2.22783607
## Obesity_Type_III         -2.008939 13.6185212    3.04464754
## Overweight_Level_I       -3.956591  1.6779495   -0.56797082
## Overweight_Level_II      -3.170271  0.2644646    0.77222312
## 
## Std. Errors:
##                     (Intercept) GenderMale     Weight   FAVCyes        Age
## Normal_Weight         1.5565252  0.3962635 0.03035098 0.3529192 0.04134082
## Obesity_Type_I        1.5254576  0.6104703 0.03540896 0.6713518 0.04724586
## Obesity_Type_II       2.1863195  1.0119013 0.03661424 1.1102770 0.05106199
## Obesity_Type_III      0.3017596  1.7482096 0.04382323 0.4028078 0.11650705
## Overweight_Level_I    1.6191294  0.5300536 0.03384563 0.4839264 0.04570086
## Overweight_Level_II   1.6093100  0.5499429 0.03421123 0.5073307 0.04640003
##                     CAECFrequently   CAECno CAECSometimes
## Normal_Weight            1.0115244 1.282190     0.9674804
## Obesity_Type_I           1.4551093 1.323655     1.2038099
## Obesity_Type_II          2.1412354 1.306965     1.5665825
## Obesity_Type_III         0.3569354 1.537263     2.2118558
## Overweight_Level_I       1.1880666 1.379769     1.0763519
## Overweight_Level_II      1.2859180 1.777602     1.1537295
## 
## Residual Deviance: 2336.16 
## AIC: 2432.16
# Extract coefficients
coefs <- summary(model_mul)$coefficients
std_err <- summary(model_mul)$standard.errors

# Generate p-value (Wald test)
z_values <- coefs / std_err
p_values <- 2 * (1 - pnorm(abs(z_values)))

# Gabungkan ke dalam tabel
coefs_table <- cbind(coefs, "p value" = p_values)
coefs_table
##                     (Intercept) GenderMale    Weight    FAVCyes        Age
## Normal_Weight         -15.85475  -2.903833 0.3593537 -0.9404953 0.09528993
## Obesity_Type_I        -74.04415 -11.615202 1.1030183 -1.2788254 0.18357115
## Obesity_Type_II      -103.71274 -11.296107 1.3183863 -0.8235578 0.32504625
## Obesity_Type_III     -140.78921 -34.860692 1.7199104 -1.1488116 0.32948047
## Overweight_Level_I    -37.27957  -6.429666 0.6381488  0.5081753 0.10863023
## Overweight_Level_II   -50.73985  -6.962977 0.8011605 -2.2965782 0.19106307
##                     CAECFrequently     CAECno CAECSometimes (Intercept)
## Normal_Weight            -4.450959 -3.4806716   -3.56687305           0
## Obesity_Type_I           -5.366421  0.1612546   -0.04692189           0
## Obesity_Type_II          -3.594695  1.1781689    2.22783607           0
## Obesity_Type_III         -2.008939 13.6185212    3.04464754           0
## Overweight_Level_I       -3.956591  1.6779495   -0.56797082           0
## Overweight_Level_II      -3.170271  0.2644646    0.77222312           0
##                       GenderMale Weight      FAVCyes          Age
## Normal_Weight       2.335909e-13      0 7.701063e-03 2.116748e-02
## Obesity_Type_I      0.000000e+00      0 5.679942e-02 1.021431e-04
## Obesity_Type_II     0.000000e+00      0 4.582335e-01 1.943776e-10
## Obesity_Type_III    0.000000e+00      0 4.344383e-03 4.684166e-03
## Overweight_Level_I  0.000000e+00      0 2.936682e-01 1.745484e-02
## Overweight_Level_II 0.000000e+00      0 5.988716e-06 3.826129e-05
##                     CAECFrequently      CAECno CAECSometimes
## Normal_Weight         1.081270e-05 0.006635003  0.0002271229
## Obesity_Type_I        2.260370e-04 0.903037460  0.9689080696
## Obesity_Type_II       9.319207e-02 0.367346893  0.1549973850
## Obesity_Type_III      1.819954e-08 0.000000000  0.1686629761
## Overweight_Level_I    8.675949e-04 0.223943562  0.5977205280
## Overweight_Level_II   1.368696e-02 0.881730345  0.5032864407
# Odds Ratio
OR <- exp(coefs)
OR
##                      (Intercept)   GenderMale   Weight   FAVCyes      Age
## Normal_Weight       1.301283e-07 5.481273e-02 1.432403 0.3904344 1.099978
## Obesity_Type_I      6.966789e-33 9.027794e-06 3.013247 0.2783641 1.201500
## Obesity_Type_II     9.080876e-46 1.242119e-05 3.737385 0.4388675 1.384095
## Obesity_Type_III    7.178294e-62 7.247590e-16 5.584028 0.3170133 1.390246
## Overweight_Level_I  6.451901e-17 1.612990e-03 1.892973 1.6622553 1.114750
## Overweight_Level_II 9.203714e-23 9.462757e-04 2.228125 0.1006025 1.210536
##                     CAECFrequently       CAECno CAECSometimes
## Normal_Weight          0.011667378 3.078673e-02    0.02824403
## Obesity_Type_I         0.004670817 1.174984e+00    0.95416192
## Obesity_Type_II        0.027469073 3.248421e+00    9.27976360
## Obesity_Type_III       0.134130868 8.211993e+05   21.00262725
## Overweight_Level_I     0.019128202 5.354565e+00    0.56667415
## Overweight_Level_II    0.041992212 1.302733e+00    2.16457302
# Model null (tanpa prediktor)
model_null <- multinom(NObeyesdad ~ 1, data = obesity_data, trace = FALSE)

# Likelihood ratio test manual
loglik_full <- logLik(model_mul)
loglik_null <- logLik(model_null)

# Hitung G
G <- -2 * (loglik_null - loglik_full)
G
## 'log Lik.' 5865.469 (df=6)
# Derajat bebas
df_lr <- attr(loglik_full, "df") - attr(loglik_null, "df")

# p-value LRT
p_lr <- 1 - pchisq(G, df_lr)
p_lr
## 'log Lik.' 0 (df=6)
# Bila ingin via anova
anova(model_null, model_mul)
##                                 Model Resid. df Resid. Dev   Test    Df
## 1                                   1     12660   8201.628           NA
## 2 Gender + Weight + FAVC + Age + CAEC     12618   2336.160 1 vs 2    42
##   LR stat. Pr(Chi)
## 1       NA      NA
## 2 5865.469       0
# --- Wald Test untuk seluruh koefisien model_multinom ---

# Ambil koefisien dan standard errors
coefs <- summary(model_mul)$coefficients
std_err <- summary(model_mul)$standard.errors

# Hitung z-statistic
z_values <- coefs / std_err

# Hitung p-value
p_values <- 2 * (1 - pnorm(abs(z_values)))

# Buat interpretasi otomatis
signif_label <- ifelse(p_values <= 0.05, "Signifikan (p <= 0.05)", "Tidak Signifikan (p > 0.05)")

# Bulatkan semua nilai numerik menjadi 3 angka di belakang koma
coefs_round <- round(coefs, 3)
std_err_round <- round(std_err, 3)
z_values_round <- round(z_values, 3)
p_values_round <- round(p_values, 3)

# Gabungkan ke dalam tabel
wald_table <- cbind(coefs_round, "p-value" = p_values_round, Interpretasi = signif_label)

# Cetak tabel
wald_table
##                     (Intercept) GenderMale Weight  FAVCyes  Age    
## Normal_Weight       "-15.855"   "-2.904"   "0.359" "-0.94"  "0.095"
## Obesity_Type_I      "-74.044"   "-11.615"  "1.103" "-1.279" "0.184"
## Obesity_Type_II     "-103.713"  "-11.296"  "1.318" "-0.824" "0.325"
## Obesity_Type_III    "-140.789"  "-34.861"  "1.72"  "-1.149" "0.329"
## Overweight_Level_I  "-37.28"    "-6.43"    "0.638" "0.508"  "0.109"
## Overweight_Level_II "-50.74"    "-6.963"   "0.801" "-2.297" "0.191"
##                     CAECFrequently CAECno   CAECSometimes (Intercept)
## Normal_Weight       "-4.451"       "-3.481" "-3.567"      "0"        
## Obesity_Type_I      "-5.366"       "0.161"  "-0.047"      "0"        
## Obesity_Type_II     "-3.595"       "1.178"  "2.228"       "0"        
## Obesity_Type_III    "-2.009"       "13.619" "3.045"       "0"        
## Overweight_Level_I  "-3.957"       "1.678"  "-0.568"      "0"        
## Overweight_Level_II "-3.17"        "0.264"  "0.772"       "0"        
##                     GenderMale Weight FAVCyes Age     CAECFrequently CAECno 
## Normal_Weight       "0"        "0"    "0.008" "0.021" "0"            "0.007"
## Obesity_Type_I      "0"        "0"    "0.057" "0"     "0"            "0.903"
## Obesity_Type_II     "0"        "0"    "0.458" "0"     "0.093"        "0.367"
## Obesity_Type_III    "0"        "0"    "0.004" "0.005" "0"            "0"    
## Overweight_Level_I  "0"        "0"    "0.294" "0.017" "0.001"        "0.224"
## Overweight_Level_II "0"        "0"    "0"     "0"     "0.014"        "0.882"
##                     CAECSometimes (Intercept)             
## Normal_Weight       "0"           "Signifikan (p <= 0.05)"
## Obesity_Type_I      "0.969"       "Signifikan (p <= 0.05)"
## Obesity_Type_II     "0.155"       "Signifikan (p <= 0.05)"
## Obesity_Type_III    "0.169"       "Signifikan (p <= 0.05)"
## Overweight_Level_I  "0.598"       "Signifikan (p <= 0.05)"
## Overweight_Level_II "0.503"       "Signifikan (p <= 0.05)"
##                     GenderMale               Weight                  
## Normal_Weight       "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_I      "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_II     "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_III    "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Overweight_Level_I  "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
## Overweight_Level_II "Signifikan (p <= 0.05)" "Signifikan (p <= 0.05)"
##                     FAVCyes                       Age                     
## Normal_Weight       "Signifikan (p <= 0.05)"      "Signifikan (p <= 0.05)"
## Obesity_Type_I      "Tidak Signifikan (p > 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_II     "Tidak Signifikan (p > 0.05)" "Signifikan (p <= 0.05)"
## Obesity_Type_III    "Signifikan (p <= 0.05)"      "Signifikan (p <= 0.05)"
## Overweight_Level_I  "Tidak Signifikan (p > 0.05)" "Signifikan (p <= 0.05)"
## Overweight_Level_II "Signifikan (p <= 0.05)"      "Signifikan (p <= 0.05)"
##                     CAECFrequently                CAECno                       
## Normal_Weight       "Signifikan (p <= 0.05)"      "Signifikan (p <= 0.05)"     
## Obesity_Type_I      "Signifikan (p <= 0.05)"      "Tidak Signifikan (p > 0.05)"
## Obesity_Type_II     "Tidak Signifikan (p > 0.05)" "Tidak Signifikan (p > 0.05)"
## Obesity_Type_III    "Signifikan (p <= 0.05)"      "Signifikan (p <= 0.05)"     
## Overweight_Level_I  "Signifikan (p <= 0.05)"      "Tidak Signifikan (p > 0.05)"
## Overweight_Level_II "Signifikan (p <= 0.05)"      "Tidak Signifikan (p > 0.05)"
##                     CAECSometimes                
## Normal_Weight       "Signifikan (p <= 0.05)"     
## Obesity_Type_I      "Tidak Signifikan (p > 0.05)"
## Obesity_Type_II     "Tidak Signifikan (p > 0.05)"
## Obesity_Type_III    "Tidak Signifikan (p > 0.05)"
## Overweight_Level_I  "Tidak Signifikan (p > 0.05)"
## Overweight_Level_II "Tidak Signifikan (p > 0.05)"
library(ResourceSelection)
## ResourceSelection 0.3-6   2023-06-27
prob_pred <- predict(model_mul, newdata = obesity_data, type = "probs")
head(prob_pred)  # probabilitas prediksi untuk setiap kelas
##   Insufficient_Weight Normal_Weight Obesity_Type_I Obesity_Type_II
## 1        1.500139e-03   0.396891519   2.143946e-03    5.135969e-08
## 2        5.730588e-02   0.855494295   1.205046e-05    5.154032e-11
## 3        7.738641e-05   0.145112061   2.433485e-03    1.749979e-06
## 4        1.497370e-08   0.001494709   6.055309e-02    6.607532e-04
## 5        3.807196e-09   0.000645500   1.349243e-01    1.326406e-03
## 6        8.102988e-01   0.188785103   6.797223e-11    1.024973e-15
##   Obesity_Type_III Overweight_Level_I Overweight_Level_II
## 1     1.458797e-12       0.2931759403        3.062884e-01
## 2     5.894973e-17       0.0679266433        1.926113e-02
## 3     5.410327e-19       0.1214861897        7.308891e-01
## 4     1.152736e-14       0.0214459981        9.158454e-01
## 5     6.966292e-14       0.0189120915        8.441917e-01
## 6     1.534930e-32       0.0009053199        1.080828e-05
# Target
y <- obesity_data$NObeyesdad

# Loop untuk setiap kelas
hl_results <- list()
classes <- levels(y)

for (cls in classes) {
  # Buat biner: 1 jika kelas ini, 0 jika bukan
  y_bin <- ifelse(y == cls, 1, 0)
  prob_cls <- prob_pred[, cls]
  
  # Hosmer-Lemeshow test
  hl <- hoslem.test(y_bin, prob_cls, g = 10)  # g = jumlah grup
  hl_results[[cls]] <- hl
  print(paste("Hosmer-Lemeshow Test for class:", cls))
  print(hl)
}
## [1] "Hosmer-Lemeshow Test for class: Insufficient_Weight"
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  y_bin, prob_cls
## X-squared = 12.881, df = 8, p-value = 0.116
## 
## [1] "Hosmer-Lemeshow Test for class: Normal_Weight"
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  y_bin, prob_cls
## X-squared = 12.792, df = 8, p-value = 0.1192
## 
## [1] "Hosmer-Lemeshow Test for class: Obesity_Type_I"
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  y_bin, prob_cls
## X-squared = 14.115, df = 8, p-value = 0.07883
## 
## [1] "Hosmer-Lemeshow Test for class: Obesity_Type_II"
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  y_bin, prob_cls
## X-squared = 6.8535, df = 8, p-value = 0.5525
## 
## [1] "Hosmer-Lemeshow Test for class: Obesity_Type_III"
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  y_bin, prob_cls
## X-squared = 0.025853, df = 8, p-value = 1
## 
## [1] "Hosmer-Lemeshow Test for class: Overweight_Level_I"
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  y_bin, prob_cls
## X-squared = 13.275, df = 8, p-value = 0.1027
## 
## [1] "Hosmer-Lemeshow Test for class: Overweight_Level_II"
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  y_bin, prob_cls
## X-squared = 9.7703, df = 8, p-value = 0.2815
  # Interpretasi sederhana
  # Loop untuk cetak interpretasi tiap kelas
for (cls in names(hl_results)) {
  hl <- hl_results[[cls]]
  
  cat("\n===========================\n")
  cat("Kelas:", cls, "\n")
  cat("p-value Hosmer-Lemeshow:", round(hl$p.value, 5), "\n")
  
  if (hl$p.value > 0.05) {
    cat("Interpretasi: Model fit untuk kelas", cls, "(tidak ada bukti penolakan H0)\n")
  } else {
    cat("Interpretasi: Model TIDAK fit untuk kelas", cls, "(ada bukti model kurang sesuai)\n")
  }
}
## 
## ===========================
## Kelas: Insufficient_Weight 
## p-value Hosmer-Lemeshow: 0.116 
## Interpretasi: Model fit untuk kelas Insufficient_Weight (tidak ada bukti penolakan H0)
## 
## ===========================
## Kelas: Normal_Weight 
## p-value Hosmer-Lemeshow: 0.1192 
## Interpretasi: Model fit untuk kelas Normal_Weight (tidak ada bukti penolakan H0)
## 
## ===========================
## Kelas: Obesity_Type_I 
## p-value Hosmer-Lemeshow: 0.07883 
## Interpretasi: Model fit untuk kelas Obesity_Type_I (tidak ada bukti penolakan H0)
## 
## ===========================
## Kelas: Obesity_Type_II 
## p-value Hosmer-Lemeshow: 0.55252 
## Interpretasi: Model fit untuk kelas Obesity_Type_II (tidak ada bukti penolakan H0)
## 
## ===========================
## Kelas: Obesity_Type_III 
## p-value Hosmer-Lemeshow: 1 
## Interpretasi: Model fit untuk kelas Obesity_Type_III (tidak ada bukti penolakan H0)
## 
## ===========================
## Kelas: Overweight_Level_I 
## p-value Hosmer-Lemeshow: 0.10274 
## Interpretasi: Model fit untuk kelas Overweight_Level_I (tidak ada bukti penolakan H0)
## 
## ===========================
## Kelas: Overweight_Level_II 
## p-value Hosmer-Lemeshow: 0.28152 
## Interpretasi: Model fit untuk kelas Overweight_Level_II (tidak ada bukti penolakan H0)
# Opsional: ringkasan semua p-value
hl_summary <- sapply(hl_results, function(x) x$p.value)
hl_summary
## Insufficient_Weight       Normal_Weight      Obesity_Type_I     Obesity_Type_II 
##          0.11599976          0.11919787          0.07882662          0.55251932 
##    Obesity_Type_III  Overweight_Level_I Overweight_Level_II 
##          1.00000000          0.10273600          0.28151686