1. Library & Data

library(MASS)        # lda()
library(nnet)        # multinom()
library(tidyverse)   # manipulasi & visualisasi
library(kableExtra)  # tabel rapi
library(MVN)         # uji normalitas multivariat
library(biotools)    # Box's M Test
library(caret)       # confusionMatrix()
df <- read.csv("penguins_size.csv", stringsAsFactors = TRUE)

# Cleaning: pilih variabel relevan, hapus NA
df_clean <- df %>%
  select(species, culmen_length_mm, culmen_depth_mm,
         flipper_length_mm, body_mass_g) %>%
  na.omit()

# Distribusi kelas
table(df_clean$species)
## 
##    Adelie Chinstrap    Gentoo 
##       151        68       123

2. Statistika Deskriptif

df_clean %>%
  group_by(species) %>%
  summarise(
    n             = n(),
    Mean_CulLen   = round(mean(culmen_length_mm), 2),
    SD_CulLen     = round(sd(culmen_length_mm), 2),
    Mean_CulDep   = round(mean(culmen_depth_mm), 2),
    SD_CulDep     = round(sd(culmen_depth_mm), 2),
    Mean_Flip     = round(mean(flipper_length_mm), 2),
    SD_Flip       = round(sd(flipper_length_mm), 2),
    Mean_Mass     = round(mean(body_mass_g), 2),
    SD_Mass       = round(sd(body_mass_g), 2)
  ) %>%
  kable(caption = "Statistika Deskriptif per Spesies",
        col.names = c("Spesies", "n",
                      "Mean", "SD", "Mean", "SD",
                      "Mean", "SD", "Mean", "SD")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  add_header_above(c(" " = 2,
                     "Culmen Length (mm)" = 2,
                     "Culmen Depth (mm)"  = 2,
                     "Flipper Length (mm)"= 2,
                     "Body Mass (g)"      = 2))
Statistika Deskriptif per Spesies
Culmen Length (mm)
Culmen Depth (mm)
Flipper Length (mm)
Body Mass (g)
Spesies n Mean SD Mean SD Mean SD Mean SD
Adelie 151 38.79 2.66 18.35 1.22 189.95 6.54 3700.66 458.57
Chinstrap 68 48.83 3.34 18.42 1.14 195.82 7.13 3733.09 384.34
Gentoo 123 47.50 3.08 14.98 0.98 217.19 6.48 5076.02 504.12
# Distribusi spesies
df_clean %>%
  count(species) %>%
  mutate(pct   = round(n / sum(n) * 100, 1),
         label = paste0(species, "\n", n, " (", pct, "%)")) %>%
  ggplot(aes(x = "", y = n, fill = species)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5), size = 3.5) +
  scale_fill_manual(values = c("#F4A460", "#6495ED", "#90EE90")) +
  labs(title = "Distribusi Spesies Penguin", fill = "Spesies") +
  theme_void() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 13))

# Boxplot variabel prediktor per spesies
df_clean %>%
  pivot_longer(cols = -species, names_to = "Variabel", values_to = "Nilai") %>%
  ggplot(aes(x = species, y = Nilai, fill = species)) +
  geom_boxplot(alpha = 0.7, outlier.color = "red", outlier.size = 1.5) +
  facet_wrap(~Variabel, scales = "free_y", ncol = 2,
             labeller = as_labeller(c(
               culmen_length_mm  = "Culmen Length (mm)",
               culmen_depth_mm   = "Culmen Depth (mm)",
               flipper_length_mm = "Flipper Length (mm)",
               body_mass_g       = "Body Mass (g)"))) +
  scale_fill_manual(values = c("#F4A460", "#6495ED", "#90EE90")) +
  labs(title = "Distribusi Variabel Prediktor per Spesies",
       x = "Spesies", y = "Nilai", fill = "Spesies") +
  theme_bw() +
  theme(plot.title  = element_text(hjust = 0.5, face = "bold"),
        legend.position = "none")


3. Uji Asumsi

3.1 Normalitas Multivariat (Mardia’s Test)

LDA mengasumsikan data pada setiap kelompok berdistribusi normal multivariat.

prediktor <- df_clean %>%
  select(culmen_length_mm, culmen_depth_mm, flipper_length_mm, body_mass_g)

# Handle perbedaan versi MVN
mvn_args <- names(formals(MVN::mvn))
if ("mvnTest" %in% mvn_args) {
  mvn_result <- mvn(data = prediktor, mvnTest = "mardia")
} else {
  mvn_result <- mvn(data = prediktor)
}
# Chi-Square Q-Q Plot (alternatif visual uji normalitas multivariat)
n  <- nrow(prediktor)
p  <- ncol(prediktor)
mu <- colMeans(prediktor)
S  <- cov(prediktor)
d2 <- mahalanobis(prediktor, center = mu, cov = S)
q_teoritis <- qchisq(ppoints(n), df = p)

qqplot(q_teoritis, sort(d2),
       main = "Chi-Square Q-Q Plot (Normalitas Multivariat)",
       xlab = "Kuantil Chi-Square Teoritis",
       ylab = "Jarak Mahalanobis (Data)",
       pch  = 19, col = "#4472C4", cex = 0.8)
abline(0, 1, col = "red", lwd = 2, lty = 2)
legend("topleft", legend = "Garis Referensi", col = "red", lwd = 2, lty = 2, bty = "n")

Jika titik-titik mendekati garis referensi, asumsi normalitas multivariat terpenuhi.

3.2 Homogenitas Matriks Kovarians (Box’s M Test)

boxm_result <- boxM(
  data     = df_clean[, c("culmen_length_mm", "culmen_depth_mm",
                           "flipper_length_mm", "body_mass_g")],
  grouping = df_clean$species
)
print(boxm_result)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  df_clean[, c("culmen_length_mm", "culmen_depth_mm", "flipper_length_mm",     "body_mass_g")]
## Chi-Sq (approx.) = 76.795, df = 20, p-value = 1.365e-08

Jika p-value < 0.05 matriks kovarians tidak homogen; LDA tetap cukup robust untuk sampel besar, namun QDA dapat dipertimbangkan sebagai alternatif.


4. Split Data

set.seed(42)
idx   <- createDataPartition(df_clean$species, p = 0.8, list = FALSE)
train <- df_clean[idx, ]
test  <- df_clean[-idx, ]

cat("Training:", nrow(train), "| Testing:", nrow(test))
## Training: 275 | Testing: 67

5. Linear Discriminant Analysis (LDA)

5.1 Pembentukan Fungsi Diskriminan

lda_model <- lda(species ~ culmen_length_mm + culmen_depth_mm +
                            flipper_length_mm + body_mass_g,
                 data  = train,
                 prior = as.vector(prop.table(table(train$species))))
print(lda_model)
## Call:
## lda(species ~ culmen_length_mm + culmen_depth_mm + flipper_length_mm + 
##     body_mass_g, data = train, prior = as.vector(prop.table(table(train$species))))
## 
## Prior probabilities of groups:
##    Adelie Chinstrap    Gentoo 
##      0.44      0.20      0.36 
## 
## Group means:
##           culmen_length_mm culmen_depth_mm flipper_length_mm body_mass_g
## Adelie            38.79752        18.39174          190.3471    3704.132
## Chinstrap         48.80909        18.36182          195.4182    3721.364
## Gentoo            47.66061        14.99091          217.9293    5101.263
## 
## Coefficients of linear discriminants:
##                            LD1          LD2
## culmen_length_mm  -0.086609377 -0.419396547
## culmen_depth_mm    1.011226772 -0.005156790
## flipper_length_mm -0.089996132  0.013219110
## body_mass_g       -0.001281395  0.001769146
## 
## Proportion of trace:
##    LD1    LD2 
## 0.8659 0.1341

5.2 Koefisien Fungsi Diskriminan

lda_model$scaling %>%
  as.data.frame() %>%
  rownames_to_column("Variabel") %>%
  kable(caption = "Koefisien Fungsi Diskriminan Linear", digits = 4) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Koefisien Fungsi Diskriminan Linear
Variabel LD1 LD2
culmen_length_mm -0.0866 -0.4194
culmen_depth_mm 1.0112 -0.0052
flipper_length_mm -0.0900 0.0132
body_mass_g -0.0013 0.0018

Dengan 3 kelas, LDA menghasilkan s = g − 1 = 2 fungsi diskriminan (LD1 dan LD2).

5.3 Proporsi Variansi yang Dijelaskan

prop_var <- lda_model$svd^2 / sum(lda_model$svd^2) * 100

data.frame(
  `Fungsi Diskriminan` = c("LD1", "LD2"),
  Eigenvalue           = round(lda_model$svd^2, 4),
  `Proporsi (%)`       = round(prop_var, 2),
  `Kumulatif (%)`      = round(cumsum(prop_var), 2)
) %>%
  kable(caption = "Proporsi Variansi yang Dijelaskan Setiap Fungsi Diskriminan") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Proporsi Variansi yang Dijelaskan Setiap Fungsi Diskriminan
Fungsi.Diskriminan Eigenvalue Proporsi…. Kumulatif….
LD1 2088.3517 86.59 86.59
LD2 323.5356 13.41 100.00

5.4 Visualisasi LDA

lda_pred_train <- predict(lda_model, train)

lda_plot_df <- data.frame(
  LD1     = lda_pred_train$x[, 1],
  LD2     = lda_pred_train$x[, 2],
  species = train$species
)

ggplot(lda_plot_df, aes(x = LD1, y = LD2, color = species, shape = species)) +
  geom_point(size = 2.5, alpha = 0.8) +
  stat_ellipse(aes(fill = species), geom = "polygon",
               alpha = 0.1, level = 0.95) +
  scale_color_manual(values = c("#E07B39", "#4472C4", "#5BAD5B")) +
  scale_fill_manual(values  = c("#E07B39", "#4472C4", "#5BAD5B")) +
  labs(title    = "Plot Fungsi Diskriminan Linear",
       subtitle = "Pemisahan Tiga Spesies Penguin",
       x        = paste0("LD1 (", round(prop_var[1], 1), "%)"),
       y        = paste0("LD2 (", round(prop_var[2], 1), "%)"),
       color = "Spesies", shape = "Spesies", fill = "Spesies") +
  theme_bw() +
  theme(plot.title    = element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5))

5.5 Evaluasi Model LDA

# Prediksi
lda_pred_test  <- predict(lda_model, test)
lda_pred_train_class <- predict(lda_model, train)$class

# Confusion Matrix
cm_lda <- confusionMatrix(lda_pred_test$class, test$species)
print(cm_lda)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Adelie Chinstrap Gentoo
##   Adelie        30         1      0
##   Chinstrap      0        12      0
##   Gentoo         0         0     24
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9851          
##                  95% CI : (0.9196, 0.9996)
##     No Information Rate : 0.4478          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9763          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Adelie Class: Chinstrap Class: Gentoo
## Sensitivity                 1.0000           0.9231        1.0000
## Specificity                 0.9730           1.0000        1.0000
## Pos Pred Value              0.9677           1.0000        1.0000
## Neg Pred Value              1.0000           0.9818        1.0000
## Prevalence                  0.4478           0.1940        0.3582
## Detection Rate              0.4478           0.1791        0.3582
## Detection Prevalence        0.4627           0.1791        0.3582
## Balanced Accuracy           0.9865           0.9615        1.0000
# Tabel Confusion Matrix rapi
cm_lda$table %>%
  as.data.frame() %>%
  pivot_wider(names_from = Reference, values_from = Freq) %>%
  rename(Prediksi = Prediction) %>%
  kable(caption = "Confusion Matrix — Linear Discriminant Analysis") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  add_header_above(c(" " = 1, "Aktual" = 3))
Confusion Matrix — Linear Discriminant Analysis
Aktual
Prediksi Adelie Chinstrap Gentoo
Adelie 30 1 0
Chinstrap 0 12 0
Gentoo 0 0 24
# APER Training & Testing
n_salah_train <- sum(lda_pred_train_class != train$species)
n_salah_test  <- sum(lda_pred_test$class  != test$species)
aper_train    <- n_salah_train / nrow(train)
aper_test     <- n_salah_test  / nrow(test)

data.frame(
  Data        = c("Training", "Testing"),
  `N Total`   = c(nrow(train), nrow(test)),
  `N Salah`   = c(n_salah_train, n_salah_test),
  `APER (%)`  = round(c(aper_train, aper_test) * 100, 2),
  `Akurasi (%)` = round((1 - c(aper_train, aper_test)) * 100, 2)
) %>%
  kable(caption = "APER dan Akurasi Model LDA") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
APER dan Akurasi Model LDA
Data N.Total N.Salah APER…. Akurasi….
Training 275 3 1.09 98.91
Testing 67 1 1.49 98.51

APER yang kecil menunjukkan fungsi diskriminan memiliki kemampuan klasifikasi yang baik.


6. Regresi Logistik Multinomial

6.1 Pembentukan Model

# Kategori referensi = Gentoo (terbanyak kedua, sering dijadikan baseline)
train$species <- relevel(train$species, ref = "Gentoo")
test$species  <- relevel(test$species,  ref = "Gentoo")

multinom_model <- multinom(
  species ~ culmen_length_mm + culmen_depth_mm +
            flipper_length_mm + body_mass_g,
  data  = train,
  trace = FALSE,
  Hess  = TRUE
)
summary(multinom_model)
## Call:
## multinom(formula = species ~ culmen_length_mm + culmen_depth_mm + 
##     flipper_length_mm + body_mass_g, data = train, Hess = TRUE, 
##     trace = FALSE)
## 
## Coefficients:
##           (Intercept) culmen_length_mm culmen_depth_mm flipper_length_mm
## Adelie       28.10100        -19.61744       36.333602           2.41380
## Chinstrap   -21.02514         25.72737        1.335902          -1.96246
##           body_mass_g
## Adelie     -0.0673793
## Chinstrap  -0.1910147
## 
## Std. Errors:
##           (Intercept) culmen_length_mm culmen_depth_mm flipper_length_mm
## Adelie      0.8292441         14.10153        20.24099          5.101465
## Chinstrap   0.9970848         15.28610        35.34625          5.513135
##           body_mass_g
## Adelie      0.1515416
## Chinstrap   0.1491054
## 
## Residual Deviance: 0.0007645983 
## AIC: 20.00076

6.2 Estimasi Parameter

koef <- summary(multinom_model)$coefficients
se   <- summary(multinom_model)$standard.errors

koef %>%
  as.data.frame() %>%
  rownames_to_column("Model") %>%
  kable(caption = "Estimasi Koefisien (β) Model Regresi Logistik Multinomial",
        digits = 4) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE)
Estimasi Koefisien (β) Model Regresi Logistik Multinomial
Model (Intercept) culmen_length_mm culmen_depth_mm flipper_length_mm body_mass_g
Adelie 28.1010 -19.6174 36.3336 2.4138 -0.0674
Chinstrap -21.0251 25.7274 1.3359 -1.9625 -0.1910

Kategori referensi adalah Gentoo. Model Logit 1 membandingkan Adelie vs Gentoo, Model Logit 2 membandingkan Chinstrap vs Gentoo.

6.3 Uji Signifikansi Parameter

6.3.1 Uji Serentak (Likelihood Ratio Test / G²)

multinom_null <- multinom(species ~ 1, data = train, trace = FALSE)
lrt <- anova(multinom_null, multinom_model, test = "Chisq")
print(lrt)
## Likelihood ratio tests of Multinomial Models
## 
## Response: species
##                                                                  Model
## 1                                                                    1
## 2 culmen_length_mm + culmen_depth_mm + flipper_length_mm + body_mass_g
##   Resid. df   Resid. Dev   Test    Df LR stat. Pr(Chi)
## 1       548 5.780024e+02                              
## 2       540 7.645983e-04 1 vs 2     8 578.0016       0

Tolak H₀ jika p-value < 0.05 → minimal satu variabel prediktor berpengaruh signifikan terhadap model.

6.3.2 Uji Parsial (Wald Test)

z_val <- koef / se
p_val <- 2 * (1 - pnorm(abs(z_val)))

# Logit 1: Adelie vs Gentoo
data.frame(
  Variabel = colnames(koef),
  Beta     = round(koef["Adelie", ], 4),
  SE       = round(se["Adelie", ], 4),
  Wald_Z   = round(z_val["Adelie", ], 4),
  P_value  = round(p_val["Adelie", ], 4),
  Sig      = ifelse(p_val["Adelie", ] < 0.001, "***",
             ifelse(p_val["Adelie", ] < 0.01,  "**",
             ifelse(p_val["Adelie", ] < 0.05,  "*", "ns")))
) %>%
  kable(caption = "Uji Parsial (Wald) — Model Logit 1: Adelie vs Gentoo",
        row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  footnote(general = "*** p<0.001  ** p<0.01  * p<0.05  ns = tidak signifikan")
Uji Parsial (Wald) — Model Logit 1: Adelie vs Gentoo
Variabel Beta SE Wald_Z P_value Sig
(Intercept) 28.1010 0.8292 33.8875 0.0000 ***
culmen_length_mm -19.6174 14.1015 -1.3912 0.1642 ns
culmen_depth_mm 36.3336 20.2410 1.7951 0.0726 ns
flipper_length_mm 2.4138 5.1015 0.4732 0.6361 ns
body_mass_g -0.0674 0.1515 -0.4446 0.6566 ns
Note:
*** p<0.001 ** p<0.01 * p<0.05 ns = tidak signifikan
# Logit 2: Chinstrap vs Gentoo
data.frame(
  Variabel = colnames(koef),
  Beta     = round(koef["Chinstrap", ], 4),
  SE       = round(se["Chinstrap", ], 4),
  Wald_Z   = round(z_val["Chinstrap", ], 4),
  P_value  = round(p_val["Chinstrap", ], 4),
  Sig      = ifelse(p_val["Chinstrap", ] < 0.001, "***",
             ifelse(p_val["Chinstrap", ] < 0.01,  "**",
             ifelse(p_val["Chinstrap", ] < 0.05,  "*", "ns")))
) %>%
  kable(caption = "Uji Parsial (Wald) — Model Logit 2: Chinstrap vs Gentoo",
        row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  footnote(general = "*** p<0.001  ** p<0.01  * p<0.05  ns = tidak signifikan")
Uji Parsial (Wald) — Model Logit 2: Chinstrap vs Gentoo
Variabel Beta SE Wald_Z P_value Sig
(Intercept) -21.0251 0.9971 -21.0866 0.0000 ***
culmen_length_mm 25.7274 15.2861 1.6831 0.0924 ns
culmen_depth_mm 1.3359 35.3462 0.0378 0.9699 ns
flipper_length_mm -1.9625 5.5131 -0.3560 0.7219 ns
body_mass_g -0.1910 0.1491 -1.2811 0.2002 ns
Note:
*** p<0.001 ** p<0.01 * p<0.05 ns = tidak signifikan

6.4 Persamaan Model

b <- round(koef, 4)

cat("**Model Logit 1 (Adelie vs Gentoo):**\n")
## **Model Logit 1 (Adelie vs Gentoo):**
cat(sprintf(
  "ln(P(Adelie)/P(Gentoo)) = %.4f + %.4f*X1 + %.4f*X2 + %.4f*X3 + %.4f*X4\n",
  b["Adelie","(Intercept)"], b["Adelie","culmen_length_mm"],
  b["Adelie","culmen_depth_mm"], b["Adelie","flipper_length_mm"],
  b["Adelie","body_mass_g"]))
## ln(P(Adelie)/P(Gentoo)) = 28.1010 + -19.6174*X1 + 36.3336*X2 + 2.4138*X3 + -0.0674*X4
cat("\n**Model Logit 2 (Chinstrap vs Gentoo):**\n")
## 
## **Model Logit 2 (Chinstrap vs Gentoo):**
cat(sprintf(
  "ln(P(Chinstrap)/P(Gentoo)) = %.4f + %.4f*X1 + %.4f*X2 + %.4f*X3 + %.4f*X4\n",
  b["Chinstrap","(Intercept)"], b["Chinstrap","culmen_length_mm"],
  b["Chinstrap","culmen_depth_mm"], b["Chinstrap","flipper_length_mm"],
  b["Chinstrap","body_mass_g"]))
## ln(P(Chinstrap)/P(Gentoo)) = -21.0251 + 25.7274*X1 + 1.3359*X2 + -1.9625*X3 + -0.1910*X4
cat("\nDi mana: X1=culmen_length, X2=culmen_depth, X3=flipper_length, X4=body_mass\n")
## 
## Di mana: X1=culmen_length, X2=culmen_depth, X3=flipper_length, X4=body_mass

6.5 Odds Ratio

or_adelie   <- round(exp(koef["Adelie", ]), 4)
or_chinstrap <- round(exp(koef["Chinstrap", ]), 4)

data.frame(
  Variabel        = colnames(koef),
  OR_Adelie       = or_adelie,
  OR_Chinstrap    = or_chinstrap
) %>%
  kable(caption = "Odds Ratio — Model Regresi Logistik Multinomial",
        col.names = c("Variabel", "OR (Adelie vs Gentoo)", "OR (Chinstrap vs Gentoo)"),
        row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  footnote(general = "OR > 1: hubungan positif; OR < 1: hubungan negatif terhadap kategori tersebut dibanding Gentoo")
Odds Ratio — Model Regresi Logistik Multinomial
Variabel OR (Adelie vs Gentoo) OR (Chinstrap vs Gentoo)
(Intercept) 1.599955e+12 0.000000e+00
culmen_length_mm 0.000000e+00 1.490229e+11
culmen_depth_mm 6.018423e+15 3.803400e+00
flipper_length_mm 1.117630e+01 1.405000e-01
body_mass_g 9.348000e-01 8.261000e-01
Note:
OR > 1: hubungan positif; OR < 1: hubungan negatif terhadap kategori tersebut dibanding Gentoo

6.6 Evaluasi Model Multinomial

multinom_pred <- predict(multinom_model, newdata = test, type = "class")
cm_multi      <- confusionMatrix(multinom_pred, test$species)
print(cm_multi)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Gentoo Adelie Chinstrap
##   Gentoo        24      1         0
##   Adelie         0     28         1
##   Chinstrap      0      1        12
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9552          
##                  95% CI : (0.8747, 0.9907)
##     No Information Rate : 0.4478          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9295          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Gentoo Class: Adelie Class: Chinstrap
## Sensitivity                 1.0000        0.9333           0.9231
## Specificity                 0.9767        0.9730           0.9815
## Pos Pred Value              0.9600        0.9655           0.9231
## Neg Pred Value              1.0000        0.9474           0.9815
## Prevalence                  0.3582        0.4478           0.1940
## Detection Rate              0.3582        0.4179           0.1791
## Detection Prevalence        0.3731        0.4328           0.1940
## Balanced Accuracy           0.9884        0.9532           0.9523
# Tabel confusion matrix rapi
cm_multi$table %>%
  as.data.frame() %>%
  pivot_wider(names_from = Reference, values_from = Freq) %>%
  rename(Prediksi = Prediction) %>%
  kable(caption = "Confusion Matrix — Regresi Logistik Multinomial") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  add_header_above(c(" " = 1, "Aktual" = 3))
Confusion Matrix — Regresi Logistik Multinomial
Aktual
Prediksi Gentoo Adelie Chinstrap
Gentoo 24 1 0
Adelie 0 28 1
Chinstrap 0 1 12
# APER Multinomial
multinom_pred_train <- predict(multinom_model, newdata = train, type = "class")
n_salah_multi_train <- sum(multinom_pred_train != train$species)
n_salah_multi_test  <- sum(multinom_pred       != test$species)
aper_multi_train    <- n_salah_multi_train / nrow(train)
aper_multi_test     <- n_salah_multi_test  / nrow(test)

data.frame(
  Data          = c("Training", "Testing"),
  `N Total`     = c(nrow(train), nrow(test)),
  `N Salah`     = c(n_salah_multi_train, n_salah_multi_test),
  `APER (%)`    = round(c(aper_multi_train, aper_multi_test) * 100, 2),
  `Akurasi (%)` = round((1 - c(aper_multi_train, aper_multi_test)) * 100, 2)
) %>%
  kable(caption = "APER dan Akurasi Model Regresi Logistik Multinomial") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
APER dan Akurasi Model Regresi Logistik Multinomial
Data N.Total N.Salah APER…. Akurasi….
Training 275 0 0.00 100.00
Testing 67 3 4.48 95.52

7. Perbandingan LDA vs Regresi Logistik Multinomial

data.frame(
  Aspek              = c("Metode", "Asumsi Utama", "Variabel Dependen",
                         "APER Testing (%)", "Akurasi Testing (%)", "Kelebihan"),
  LDA                = c("Linear Discriminant Analysis",
                         "Normalitas multivariat & homogenitas kovarians",
                         "Nominal ≥ 2 kategori",
                         paste0(round(aper_test * 100, 2), "%"),
                         paste0(round((1 - aper_test) * 100, 2), "%"),
                         "Interpretasi geometris jelas, visualisasi mudah"),
  Multinomial        = c("Regresi Logistik Multinomial",
                         "Tidak memerlukan normalitas prediktor",
                         "Nominal ≥ 3 kategori",
                         paste0(round(aper_multi_test * 100, 2), "%"),
                         paste0(round((1 - aper_multi_test) * 100, 2), "%"),
                         "Lebih fleksibel, menghasilkan odds ratio")
) %>%
  kable(caption = "Perbandingan LDA dan Regresi Logistik Multinomial",
        col.names = c("Aspek", "LDA", "Multinomial Logistik")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE)
Perbandingan LDA dan Regresi Logistik Multinomial
Aspek LDA Multinomial Logistik
Metode Linear Discriminant Analysis Regresi Logistik Multinomial
Asumsi Utama Normalitas multivariat & homogenitas kovarians Tidak memerlukan normalitas prediktor
Variabel Dependen Nominal ≥ 2 kategori Nominal ≥ 3 kategori
APER Testing (%) 1.49% 4.48%
Akurasi Testing (%) 98.51% 95.52%
Kelebihan Interpretasi geometris jelas, visualisasi mudah Lebih fleksibel, menghasilkan odds ratio

8. Kesimpulan

cat(sprintf(
"LDA Multinomial:
  - Menghasilkan 2 fungsi diskriminan (LD1: %.2f%%, LD2: %.2f%%)
  - APER Training: %.2f%% | APER Testing: %.2f%%
  - Akurasi Testing: %.2f%%

Regresi Logistik Multinomial:
  - Kategori referensi: Gentoo
  - Menghasilkan 2 persamaan logit (Adelie vs Gentoo, Chinstrap vs Gentoo)
  - APER Training: %.2f%% | APER Testing: %.2f%%
  - Akurasi Testing: %.2f%%
",
prop_var[1], prop_var[2],
aper_train * 100, aper_test * 100, (1 - aper_test) * 100,
aper_multi_train * 100, aper_multi_test * 100, (1 - aper_multi_test) * 100
))
## LDA Multinomial:
##   - Menghasilkan 2 fungsi diskriminan (LD1: 86.59%, LD2: 13.41%)
##   - APER Training: 1.09% | APER Testing: 1.49%
##   - Akurasi Testing: 98.51%
## 
## Regresi Logistik Multinomial:
##   - Kategori referensi: Gentoo
##   - Menghasilkan 2 persamaan logit (Adelie vs Gentoo, Chinstrap vs Gentoo)
##   - APER Training: 0.00% | APER Testing: 4.48%
##   - Akurasi Testing: 95.52%