0.1 Variabel Penelitian

Variabel yang digunakan dalam penelitian ini meliputi:

  • Variabel Independen (Fitur CBC):

- HGB (Hemoglobin)

- PLT (Platelet/Trombosit)

- WBC (White Blood Cell/Leukosit)

- RBC (Red Blood Cell/Eritrosit)

- MCV (Mean Corpuscular Volume)

- MCH (Mean Corpuscular Hemoglobin)

- MCHC (Mean Corpuscular Hemoglobin Concentration)

  • Variabel Dependen: Diagnosis (9 kategori penyakit darah)

1 Loading Data dan Library

Pada tahap ini, kita akan memuat semua library yang diperlukan dan membaca dataset CBC yang akan dianalisis.

install.packages("caret", dependencies = TRUE)
install.packages("patchwork")
install.packages("car")
install.packages("kableExtra")
install.packages("energy")
install.packages("future")
install.packages("biotools")
library(readr)
library(caret)
library(MASS)
library(dplyr)
library(energy)
library(biotools)
library(dplyr)
library(ggplot2)
library(nnet)
library(patchwork)
library(car)
library(nnet)
library(dplyr)
library(broom)      
library(kableExtra) 
library(knitr)    
library(ggplot2)
# Loading Data--------------------------------
diagnosed_cbc_data_v4 <- read_csv("diagnosed_cbc_data_v4.csv")
spec(diagnosed_cbc_data_v4)
## cols(
##   WBC = col_double(),
##   LYMp = col_double(),
##   NEUTp = col_double(),
##   LYMn = col_double(),
##   NEUTn = col_double(),
##   RBC = col_double(),
##   HGB = col_double(),
##   HCT = col_double(),
##   MCV = col_double(),
##   MCH = col_double(),
##   MCHC = col_double(),
##   PLT = col_double(),
##   PDW = col_double(),
##   PCT = col_double(),
##   Diagnosis = col_character()
## )
data_use = diagnosed_cbc_data_v4[, c("HGB", "PLT", "WBC", "RBC", "MCV", "MCH", "MCHC", "Diagnosis")]
summary(data_use)
##       HGB              PLT           WBC              RBC        
##  Min.   :-10.00   Min.   : 10   Min.   : 0.800   Min.   : 1.360  
##  1st Qu.: 10.80   1st Qu.:157   1st Qu.: 6.000   1st Qu.: 4.190  
##  Median : 12.30   Median :213   Median : 7.400   Median : 4.600  
##  Mean   : 12.18   Mean   :230   Mean   : 7.863   Mean   : 4.708  
##  3rd Qu.: 13.50   3rd Qu.:293   3rd Qu.: 8.680   3rd Qu.: 5.100  
##  Max.   : 87.10   Max.   :660   Max.   :45.700   Max.   :90.800  
##       MCV              MCH               MCHC        Diagnosis        
##  Min.   :-79.30   Min.   :  10.90   Min.   :11.50   Length:1281       
##  1st Qu.: 81.20   1st Qu.:  25.50   1st Qu.:30.60   Class :character  
##  Median : 86.60   Median :  27.80   Median :32.00   Mode  :character  
##  Mean   : 85.79   Mean   :  32.08   Mean   :31.74                     
##  3rd Qu.: 90.20   3rd Qu.:  29.60   3rd Qu.:32.90                     
##  Max.   :990.00   Max.   :3117.00   Max.   :92.80
unique(data_use$Diagnosis)
## [1] "Normocytic hypochromic anemia"  "Iron deficiency anemia"        
## [3] "Other microcytic anemia"        "Leukemia"                      
## [5] "Healthy"                        "Thrombocytopenia"              
## [7] "Normocytic normochromic anemia" "Leukemia with thrombocytopenia"
## [9] "Macrocytic anemia"

Dataset yang digunakan berisi data Complete Blood Count (CBC) dengan berbagai parameter pemeriksaan darah. Kita memilih 7 parameter utama CBC sebagai fitur untuk klasifikasi diagnosa.

2 Pre-processing Data

Pre-processing data merupakan tahap krusial untuk memastikan kualitas data sebelum dilakukan analisis. Tahapan yang dilakukan meliputi handling outliers dan standarisasi data.

2.1 Handling Outliers dengan Mahalanobis Distance

Outliers dideteksi menggunakan Mahalanobis Distance, yang mengukur jarak multivariat antara setiap observasi dengan pusat data. Metode ini efektif untuk mendeteksi outliers dalam data multivariat.

# Handling Outliers
fitur = data_use[, c("HGB", "PLT", "WBC", "RBC", "MCV", "MCH", "MCHC")]

mahal_dist <- mahalanobis(fitur,
                          center = colMeans(fitur, na.rm = TRUE),
                          cov = cov(fitur, use = "complete.obs"))
p <- ncol(fitur)
cutoff <- qchisq(0.95, df = p)

outliers <- mahal_dist > cutoff

data_fitur <- fitur[!outliers, ]
data_diagnosis <- data_use$Diagnosis[!outliers]
data_clean <- cbind(data_fitur, Diagnosis = data_diagnosis)

qqplot(qchisq(ppoints(length(mahal_dist)), df = p), 
       mahal_dist, 
       main = "QQ Plot Mahalanobis vs Chi-Square",
       xlab = "Theoretical Chi-Square Quantiles", 
       ylab = "Mahalanobis Distance")
abline(0, 1, col = "red", lwd = 2)

summary(data_clean)
##       HGB             PLT             WBC              RBC       
##  Min.   : 0.40   Min.   : 10.0   Min.   : 0.800   Min.   :1.360  
##  1st Qu.:10.80   1st Qu.:158.0   1st Qu.: 6.000   1st Qu.:4.200  
##  Median :12.30   Median :214.0   Median : 7.400   Median :4.600  
##  Mean   :12.08   Mean   :229.8   Mean   : 7.573   Mean   :4.587  
##  3rd Qu.:13.50   3rd Qu.:294.0   3rd Qu.: 8.600   3rd Qu.:5.100  
##  Max.   :19.60   Max.   :534.0   Max.   :20.300   Max.   :6.900  
##       MCV              MCH              MCHC        Diagnosis        
##  Min.   : 44.90   Min.   : 14.20   Min.   :22.90   Length:1241       
##  1st Qu.: 81.40   1st Qu.: 25.60   1st Qu.:30.60   Class :character  
##  Median : 86.70   Median : 27.80   Median :32.00   Mode  :character  
##  Mean   : 85.47   Mean   : 27.77   Mean   :31.68                     
##  3rd Qu.: 90.20   3rd Qu.: 29.60   3rd Qu.:32.90                     
##  Max.   :124.10   Max.   :275.00   Max.   :42.00

QQ Plot di atas menunjukkan perbandingan antara Mahalanobis Distance dengan distribusi Chi-Square teoretis. Titik-titik yang menyimpang jauh dari garis merah mengindikasikan outliers.

2.1.1 Visualisasi Distribusi Data Setelah Handling Outliers

### Data Setelah Handling Outliers
par(mfrow = c(3, 4))
for (col in names(data_fitur)) {
  plot(density(data_fitur[[col]], na.rm = TRUE), main=paste("Density of", col), xlab=col)
}

2.2 Standarisasi Data

Standarisasi dilakukan untuk menyamakan skala semua variabel, mengingat parameter CBC memiliki satuan dan rentang nilai yang berbeda-beda.

## Standarisasi
data_scaled <- scale(data_fitur)
data_scaled <- as.data.frame(data_scaled)

data_diagnosis <- data_clean$Diagnosis
data_fix <- cbind(data_scaled, Diagnosis = data_diagnosis)

par(mfrow = c(3, 4))
for (col in names(data_scaled)) {
  plot(density(data_scaled[[col]], na.rm = TRUE), main=paste("Density of", col), xlab=col)
}

summary(data_fix)
##       HGB               PLT              WBC                RBC          
##  Min.   :-5.5707   Min.   :-2.426   Min.   :-2.68461   Min.   :-4.52274  
##  1st Qu.:-0.6095   1st Qu.:-0.792   1st Qu.:-0.62341   1st Qu.:-0.54229  
##  Median : 0.1061   Median :-0.174   Median :-0.06847   Median : 0.01834  
##  Mean   : 0.0000   Mean   : 0.000   Mean   : 0.00000   Mean   : 0.00000  
##  3rd Qu.: 0.6785   3rd Qu.: 0.709   3rd Qu.: 0.40719   3rd Qu.: 0.71913  
##  Max.   : 3.5884   Max.   : 3.358   Max.   : 5.04488   Max.   : 3.24195  
##       MCV               MCH                 MCHC          Diagnosis        
##  Min.   :-4.9212   Min.   :-1.326177   Min.   :-4.6086   Length:1241       
##  1st Qu.:-0.4939   1st Qu.:-0.211707   1st Qu.:-0.5671   Class :character  
##  Median : 0.1489   Median : 0.003366   Median : 0.1677   Mode  :character  
##  Mean   : 0.0000   Mean   : 0.000000   Mean   : 0.0000                     
##  3rd Qu.: 0.5735   3rd Qu.: 0.179335   3rd Qu.: 0.6401                     
##  Max.   : 4.6854   Max.   :24.169772   Max.   : 5.4163

2.3 Menyimpan Data Pre-processing

# Simpan CSV data pre-processing---------------------------------------------------------
write_csv(data_fix, "data_fix_uas.csv")

3 Uji Asumsi

Sebelum melakukan analisis diskriminan dan regresi logistik multinomial, perlu dilakukan pengujian asumsi-asumsi yang harus dipenuhi.

3.1 Loading Data Pre-processed

# Uji Asumsi---------------------------------------------------------------------------------
library(readr)
data_fix_uas <- read_csv("data_fix_uas.csv")
spec(data_fix_uas)
## cols(
##   HGB = col_double(),
##   PLT = col_double(),
##   WBC = col_double(),
##   RBC = col_double(),
##   MCV = col_double(),
##   MCH = col_double(),
##   MCHC = col_double(),
##   Diagnosis = col_character()
## )
fiturnya = data_fix_uas[, c("HGB", "PLT", "WBC", "RBC", "MCV", "MCH", "MCHC")]

3.2 Asumsi untuk Analisis Diskriminan

3.2.1 Uji Normalitas Multivariat

Analisis diskriminan mengasumsikan data mengikuti distribusi normal multivariat. Kita menggunakan Energy Test untuk menguji normalitas multivariat.

### Normalitas Multivariat

mvnorm.etest(fiturnya, R = 1000)
## 
##  Energy test of multivariate normality: estimated parameters
## 
## data:  x, sample size 1241, dimension 7, replicates 1000
## E-statistic = 96.118, p-value < 2.2e-16
shapiro.test(fiturnya$HGB)
## 
##  Shapiro-Wilk normality test
## 
## data:  fiturnya$HGB
## W = 0.96509, p-value < 2.2e-16
shapiro.test(fiturnya$PLT)
## 
##  Shapiro-Wilk normality test
## 
## data:  fiturnya$PLT
## W = 0.97438, p-value = 4.953e-14
shapiro.test(fiturnya$WBC)
## 
##  Shapiro-Wilk normality test
## 
## data:  fiturnya$WBC
## W = 0.93961, p-value < 2.2e-16
shapiro.test(fiturnya$RBC)
## 
##  Shapiro-Wilk normality test
## 
## data:  fiturnya$RBC
## W = 0.98092, p-value = 1.005e-11
shapiro.test(fiturnya$MCV)
## 
##  Shapiro-Wilk normality test
## 
## data:  fiturnya$MCV
## W = 0.94745, p-value < 2.2e-16
shapiro.test(fiturnya$MCH)
## 
##  Shapiro-Wilk normality test
## 
## data:  fiturnya$MCH
## W = 0.22993, p-value < 2.2e-16
shapiro.test(fiturnya$MCHC)
## 
##  Shapiro-Wilk normality test
## 
## data:  fiturnya$MCHC
## W = 0.91836, p-value < 2.2e-16

Energy Test menguji hipotesis nol bahwa data mengikuti distribusi normal multivariat. Selain itu, Shapiro-Wilk test digunakan untuk menguji normalitas univariat masing-masing variabel.

3.2.2 Uji Homogenitas Matriks Kovarian

Box’s M test digunakan untuk menguji homogenitas matriks kovarian antar grup diagnosa.

### Homogenitas Varians
boxM(fiturnya, data_fix_uas$Diagnosis)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  fiturnya
## Chi-Sq (approx.) = 5322.7, df = 224, p-value < 2.2e-16

3.3 Asumsi untuk Multinomial Logistic Regression

3.3.1 Uji Linearitas

Untuk MLR, kita perlu memeriksa linearitas hubungan antara variabel independen dengan log odds dari setiap kategori outcome.

### Uji Linearitas

data_fix_uas$Diagnosis <- as.factor(data_fix_uas$Diagnosis)
model <- multinom(Diagnosis ~ HGB + PLT + WBC + RBC + MCV + MCH + MCHC, data = data_fix_uas)
## # weights:  81 (64 variable)
## initial  value 2726.755700 
## iter  10 value 740.690928
## iter  20 value 657.125878
## iter  30 value 549.199780
## iter  40 value 392.500790
## iter  50 value 338.874398
## iter  60 value 322.688846
## iter  70 value 320.145209
## iter  80 value 319.377374
## iter  90 value 319.171672
## iter 100 value 319.084221
## final  value 319.084221 
## stopped after 100 iterations
probs <- predict(model, type = "probs")

data_fix_uas$prob_IDA <- probs[, "Iron deficiency anemia"]
data_fix_uas$prob_NHA <- probs[, "Normocytic hypochromic anemia"]
data_fix_uas$prob_OMA <- probs[, "Other microcytic anemia"]
data_fix_uas$prob_L <- probs[, "Leukemia"]
data_fix_uas$prob_H <- probs[, "Healthy"]
data_fix_uas$prob_T <- probs[, "Thrombocytopenia"]
data_fix_uas$prob_NMA <- probs[, "Normocytic normochromic anemia"]
data_fix_uas$prob_LWT <- probs[, "Leukemia with thrombocytopenia"]
data_fix_uas$prob_MA <- probs[, "Macrocytic anemia"]

class_labels <- c(
  "Iron deficiency anemia" = "prob_IDA",
  "Normocytic hypochromic anemia" = "prob_NHA",
  "Other microcytic anemia" = "prob_OMA",
  "Leukemia" = "prob_L",
  "Healthy" = "prob_H",
  "Thrombocytopenia" = "prob_T",
  "Normocytic normochromic anemia" = "prob_NMA",
  "Leukemia with thrombocytopenia" = "prob_LWT",
  "Macrocytic anemia" = 'prob_MA'
)

plot_list <- list()

3.3.1.1 Visualisasi Linearitas untuk HGB

#### HGB
for (label in names(class_labels)) {
  prob_col <- class_labels[[label]]
  
  p <- ggplot(data_fix_uas, aes_string(x = "HGB", y = prob_col)) +
    geom_point(alpha = 0.4, color = "darkgreen") +
    labs(
      title = label,
      x = "HGB",
      y = "Predicted Probability"
    ) +
    theme_minimal(base_size = 9)
  
  plot_list[[label]] <- p
}

wrap_plots(plot_list, ncol = 5)

3.3.1.2 Visualisasi Linearitas untuk PLT

#### PLT
for (label in names(class_labels)) {
  prob_col <- class_labels[[label]]
  
  p <- ggplot(data_fix_uas, aes_string(x = "PLT", y = prob_col)) +
    geom_point(alpha = 0.4, color = "darkgreen") +
    labs(
      title = label,
      x = "PLT",
      y = "Predicted Probability"
    ) +
    theme_minimal(base_size = 9)
  
  plot_list[[label]] <- p
}

wrap_plots(plot_list, ncol = 5)

3.3.1.3 Visualisasi Linearitas untuk WBC

#### WBC
for (label in names(class_labels)) {
  prob_col <- class_labels[[label]]
  
  p <- ggplot(data_fix_uas, aes_string(x = "WBC", y = prob_col)) +
    geom_point(alpha = 0.4, color = "darkgreen") +
    labs(
      title = label,
      x = "WBC",
      y = "Predicted Probability"
    ) +
    theme_minimal(base_size = 9)
  
  plot_list[[label]] <- p
}

wrap_plots(plot_list, ncol = 5)

3.3.1.4 Visualisasi Linearitas untuk RBC

#### RBC
for (label in names(class_labels)) {
  prob_col <- class_labels[[label]]
  
  p <- ggplot(data_fix_uas, aes_string(x = "RBC", y = prob_col)) +
    geom_point(alpha = 0.4, color = "darkgreen") +
    labs(
      title = label,
      x = "RBC",
      y = "Predicted Probability"
    ) +
    theme_minimal(base_size = 9)
  
  plot_list[[label]] <- p
}

wrap_plots(plot_list, ncol = 5)

3.3.1.5 Visualisasi Linearitas untuk MCV

#### MCV
for (label in names(class_labels)) {
  prob_col <- class_labels[[label]]
  
  p <- ggplot(data_fix_uas, aes_string(x = "MCV", y = prob_col)) +
    geom_point(alpha = 0.4, color = "darkgreen") +
    labs(
      title = label,
      x = "MCV",
      y = "Predicted Probability"
    ) +
    theme_minimal(base_size = 9)
  
  plot_list[[label]] <- p
}

wrap_plots(plot_list, ncol = 5)

3.3.1.6 Visualisasi Linearitas untuk MCH

#### MCH
for (label in names(class_labels)) {
  prob_col <- class_labels[[label]]
  
  p <- ggplot(data_fix_uas, aes_string(x = "MCH", y = prob_col)) +
    geom_point(alpha = 0.4, color = "darkgreen") +
    labs(
      title = label,
      x = "MCH",
      y = "Predicted Probability"
    ) +
    theme_minimal(base_size = 9)
  
  plot_list[[label]] <- p
}

wrap_plots(plot_list, ncol = 5)

3.3.1.7 Visualisasi Linearitas untuk MCHC

#### MCHC
for (label in names(class_labels)) {
  prob_col <- class_labels[[label]]
  
  p <- ggplot(data_fix_uas, aes_string(x = "MCHC", y = prob_col)) +
    geom_point(alpha = 0.4, color = "darkgreen") +
    labs(
      title = label,
      x = "MCHC",
      y = "Predicted Probability"
    ) +
    theme_minimal(base_size = 9)
  
  plot_list[[label]] <- p
}

wrap_plots(plot_list, ncol = 5)

3.3.2 Deteksi Outliers dengan Boxplot

### Uji Outliers
par(mfrow = c(3, 4))
for (i in seq_along(fiturnya)) {
  boxplot(fiturnya[[i]], main = paste("Boxplot of", colnames(fiturnya)[i]))
}

3.3.3 Uji Independensi Observasi

### Uji Independen
data_fix_uas$DiagnosisNum <- as.numeric(as.factor(data_fix_uas$Diagnosis))
acf(data_fix_uas$DiagnosisNum, main = "Autocorrelation of Diagnosis (numeric)")

Autocorrelation Function (ACF) plot digunakan untuk memeriksa independensi antar observasi. Nilai ACF yang mendekati 0 pada lag > 0 mengindikasikan tidak ada autokorelasi.

3.3.4 Uji Multikolinearitas

Variance Inflation Factor (VIF) digunakan untuk mendeteksi multikolinearitas antar variabel independen. VIF > 10 mengindikasikan multikolinearitas yang serius.

### Non Multikolinearitas
lm_model_hgb <- lm(HGB ~ PLT + WBC + RBC + MCV + MCH + MCHC, data = data_fix_uas)
vif(lm_model_hgb)
##      PLT      WBC      RBC      MCV      MCH     MCHC 
## 1.282583 1.087971 1.119157 1.335031 1.105043 1.227623
lm_model_plt <- lm(PLT ~ HGB + WBC + RBC + MCV + MCH + MCHC, data = data_fix_uas)
vif(lm_model_plt)
##      HGB      WBC      RBC      MCV      MCH     MCHC 
## 5.229722 1.017716 3.575398 1.954192 1.105003 1.666713
lm_model_wbc <- lm(WBC ~ PLT + HGB + RBC + MCV + MCH + MCHC, data = data_fix_uas)
vif(lm_model_wbc)
##      PLT      HGB      RBC      MCV      MCH     MCHC 
## 1.280932 5.583540 4.142533 2.224100 1.104541 1.698546
lm_model_rbc <- lm(RBC ~ PLT + WBC + HGB + MCV + MCH + MCHC, data = data_fix_uas)
vif(lm_model_rbc)
##      PLT      WBC      HGB      MCV      MCH     MCHC 
## 1.184181 1.090085 1.511394 1.474325 1.105357 1.390199
lm_model_mcv <- lm(MCV ~ PLT + WBC + RBC + HGB + MCH + MCHC, data = data_fix_uas)
vif(lm_model_mcv)
##      PLT      WBC      RBC      HGB      MCH     MCHC 
## 1.203960 1.088679 2.742486 3.353739 1.075094 1.682064
lm_model_mch <- lm(MCH ~ PLT + WBC + RBC + MCV + HGB + MCHC, data = data_fix_uas)
vif(lm_model_mch)
##      PLT      WBC      RBC      MCV      HGB     MCHC 
## 1.372513 1.090023 4.145356 2.167478 5.596611 1.668663
lm_model_mchc <- lm(MCHC ~ PLT + WBC + RBC + MCV + MCH + HGB, data = data_fix_uas)
vif(lm_model_mchc)
##      PLT      WBC      RBC      MCV      MCH      HGB 
## 1.346610 1.090333 3.391288 2.205865 1.085418 4.044262

4 Pemodelan

4.1 Linear Discriminant Analysis (LDA)

LDA adalah metode klasifikasi yang mencari kombinasi linear dari fitur yang memaksimalkan pemisahan antar kelas.

## Analisis Diskriminan

data_fix_uas <- read_csv("data_fix_uas.csv")
table(data_fix_uas$Diagnosis)
## 
##                        Healthy         Iron deficiency anemia 
##                            335                            181 
##                       Leukemia Leukemia with thrombocytopenia 
##                             39                             11 
##              Macrocytic anemia  Normocytic hypochromic anemia 
##                             18                            268 
## Normocytic normochromic anemia        Other microcytic anemia 
##                            265                             53 
##               Thrombocytopenia 
##                             71
lda_model <- lda(Diagnosis ~ ., data = data_fix_uas)
print(lda_model)
## Call:
## lda(Diagnosis ~ ., data = data_fix_uas)
## 
## Prior probabilities of groups:
##                        Healthy         Iron deficiency anemia 
##                     0.26994359                     0.14585012 
##                       Leukemia Leukemia with thrombocytopenia 
##                     0.03142627                     0.00886382 
##              Macrocytic anemia  Normocytic hypochromic anemia 
##                     0.01450443                     0.21595488 
## Normocytic normochromic anemia        Other microcytic anemia 
##                     0.21353747                     0.04270749 
##               Thrombocytopenia 
##                     0.05721193 
## 
## Group means:
##                                       HGB        PLT         WBC         RBC
## Healthy                         0.9103344  0.7480709  0.05926766  0.89325508
## Iron deficiency anemia         -1.1424139 -0.3006542  0.02216826 -0.04771061
## Leukemia                        1.0943881  0.1254524  1.97513854  0.71050077
## Leukemia with thrombocytopenia  1.2162612 -1.3057518  1.30661726  0.90005564
## Macrocytic anemia              -1.0467863 -0.1353314 -0.62561400 -2.26777373
## Normocytic hypochromic anemia  -0.5272640 -0.3791166 -0.02969389 -0.57659347
## Normocytic normochromic anemia -0.1018577  0.1099184 -0.13590457 -0.48876233
## Other microcytic anemia        -0.8202075 -0.4420758 -0.23675061 -0.78108205
## Thrombocytopenia                1.0755925 -1.2447022 -0.66885644  0.53593480
##                                        MCV         MCH       MCHC
## Healthy                         0.60577381  0.17808022  0.4025371
## Iron deficiency anemia         -1.55023641 -0.62840467 -1.2439369
## Leukemia                        0.15952038  0.03770761  0.1434565
## Leukemia with thrombocytopenia  0.24046823  0.01758580  0.1819954
## Macrocytic anemia               2.70017450  0.60024851 -0.1909748
## Normocytic hypochromic anemia   0.02909973 -0.06674428 -0.5575295
## Normocytic normochromic anemia  0.21763531  0.22809359  0.6163488
## Other microcytic anemia        -1.26974243 -0.14401255  0.6004445
## Thrombocytopenia                0.31004563  0.09424208  0.5690895
## 
## Coefficients of linear discriminants:
##              LD1         LD2         LD3          LD4         LD5        LD6
## HGB  -0.74363381 -0.39903890 -0.02034195  1.212340268  0.51135564  2.0130536
## PLT  -0.09639994 -0.04005166  0.57971947 -0.901767293  0.41210603  0.6360359
## WBC   0.13850601 -0.25915576 -0.38757262  0.330806340  0.90346454 -0.4192133
## RBC  -0.27082217 -0.80460827 -0.37107187 -0.855641657 -0.79770641 -1.5979721
## MCV  -1.09761866  0.84339812 -0.85115765 -0.530391602 -0.28736043 -0.8579140
## MCH  -0.08327803  0.05053576  0.06768092  0.007641023  0.10798663  0.1137145
## MCHC -0.36461524 -0.04021414  1.10864909  0.122286635 -0.08307077 -0.8920187
##             LD7
## HGB   0.2218894
## PLT   0.1003626
## WBC  -0.0238291
## RBC  -0.2518877
## MCV   0.1370443
## MCH  -1.0342708
## MCHC  0.1297507
## 
## Proportion of trace:
##    LD1    LD2    LD3    LD4    LD5    LD6    LD7 
## 0.5993 0.1726 0.1012 0.0852 0.0396 0.0021 0.0001
pred <- predict(lda_model)$class
actual <- data_fix_uas$Diagnosis
akurasi <- mean(pred == actual)
cat("Akurasi:", round(akurasi * 100, 2), "%\n")
## Akurasi: 81.55 %

Model LDA menghasilkan fungsi diskriminan yang memisahkan kategori diagnosa berdasarkan kombinasi linear dari parameter CBC.

4.2 Multinomial Logistic Regression (MLR)

MLR adalah perluasan dari regresi logistik untuk outcome dengan lebih dari dua kategori.

##Multinomial Logistic Regression
  
mlr_model <- multinom(Diagnosis ~ ., data = data_fix_uas)
## # weights:  81 (64 variable)
## initial  value 2726.755700 
## iter  10 value 740.690928
## iter  20 value 657.125878
## iter  30 value 549.199780
## iter  40 value 392.500790
## iter  50 value 338.874398
## iter  60 value 322.688846
## iter  70 value 320.145209
## iter  80 value 319.377374
## iter  90 value 319.171672
## iter 100 value 319.084221
## final  value 319.084221 
## stopped after 100 iterations
summary(mlr_model)
## Call:
## multinom(formula = Diagnosis ~ ., data = data_fix_uas)
## 
## Coefficients:
##                                (Intercept)         HGB         PLT        WBC
## Iron deficiency anemia           13.005144 -55.2221168  -3.4055955  2.9127787
## Leukemia                         -7.419724   0.2015158   0.4505091  8.5785618
## Leukemia with thrombocytopenia  -21.863825   1.0893960 -13.8919632  6.5045355
## Macrocytic anemia                14.400552 -55.5724329  -1.5376843  0.6483029
## Normocytic hypochromic anemia    21.743726 -55.1026902  -3.4693530  2.5660324
## Normocytic normochromic anemia   21.609758 -54.8720902  -1.9817465  2.3975380
## Other microcytic anemia          13.887330 -55.0138759  -3.5497297  2.2403073
## Thrombocytopenia                -13.957538   1.5332077 -13.1007342 -0.4069878
##                                       RBC        MCV         MCH       MCHC
## Iron deficiency anemia         -3.3510202  -5.320957 -20.2102832 -3.3842272
## Leukemia                       -0.8553774  -2.850774  -0.1148362  0.2867024
## Leukemia with thrombocytopenia  0.4323400   3.167952 -20.6850393  2.7425162
## Macrocytic anemia              -3.0619407   2.654945   5.8563190 -4.6197260
## Normocytic hypochromic anemia  -3.4473124  -2.682358   5.9818122 -6.5970097
## Normocytic normochromic anemia -3.0863902  -1.810473   6.2051163  1.6130402
## Other microcytic anemia        -3.6148712 -11.335171   3.4068374  1.9323235
## Thrombocytopenia               -0.9093644   2.243692 -10.2328656  1.5300415
## 
## Std. Errors:
##                                (Intercept)      HGB       PLT       WBC
## Iron deficiency anemia            2.577876 5.928742 0.6911825 0.6696204
## Leukemia                          1.698924 1.996968 0.5699311 1.1358446
## Leukemia with thrombocytopenia    4.937015 2.538351 2.6967112 1.6677771
## Macrocytic anemia                 3.236223 7.296607 1.0114487 1.1490552
## Normocytic hypochromic anemia     2.410877 5.918962 0.6591104 0.6335449
## Normocytic normochromic anemia    2.369581 5.890294 0.6463903 0.6407417
## Other microcytic anemia           2.496802 5.920130 0.8254676 0.6962921
## Thrombocytopenia                  2.266390 2.410623 1.8635111 0.3991859
##                                     RBC       MCV        MCH      MCHC
## Iron deficiency anemia         1.133765 3.1403400 12.2238033 2.0083858
## Leukemia                       1.280478 1.0929227  0.6389328 0.7024687
## Leukemia with thrombocytopenia 1.880618 9.6244815 37.0388563 6.2087090
## Macrocytic anemia              4.802673 2.7312055 10.8374412 2.2530717
## Normocytic hypochromic anemia  1.120961 0.8221749  0.8665384 0.8066260
## Normocytic normochromic anemia 1.097145 0.8005210  0.8634502 0.5401769
## Other microcytic anemia        1.166462 1.2408430  1.1001979 0.6402067
## Thrombocytopenia               2.254204 2.1611542 10.2199131 1.2737297
## 
## Residual Deviance: 638.1684 
## AIC: 766.1684
tidy(mlr_model, conf.int = TRUE) %>%
  kable() %>%
  kable_styling("basic", full_width = FALSE)
y.level term estimate std.error statistic p.value conf.low conf.high
Iron deficiency anemia (Intercept) 13.0051444 2.5778755 5.0449078 0.0000005 7.9526013 18.0576876
Iron deficiency anemia HGB -55.2221168 5.9287424 -9.3143053 0.0000000 -66.8422383 -43.6019952
Iron deficiency anemia PLT -3.4055955 0.6911825 -4.9272014 0.0000008 -4.7602884 -2.0509026
Iron deficiency anemia WBC 2.9127787 0.6696204 4.3498953 0.0000136 1.6003468 4.2252106
Iron deficiency anemia RBC -3.3510202 1.1337651 -2.9556566 0.0031200 -5.5731589 -1.1288815
Iron deficiency anemia MCV -5.3209566 3.1403400 -1.6943887 0.0901915 -11.4759100 0.8339968
Iron deficiency anemia MCH -20.2102832 12.2238033 -1.6533547 0.0982587 -44.1684975 3.7479311
Iron deficiency anemia MCHC -3.3842272 2.0083858 -1.6850484 0.0919792 -7.3205910 0.5521365
Leukemia (Intercept) -7.4197236 1.6989240 -4.3673076 0.0000126 -10.7495535 -4.0898938
Leukemia HGB 0.2015158 1.9969680 0.1009109 0.9196212 -3.7124696 4.1155012
Leukemia PLT 0.4505091 0.5699311 0.7904624 0.4292578 -0.6665353 1.5675535
Leukemia WBC 8.5785618 1.1358446 7.5525843 0.0000000 6.3523473 10.8047762
Leukemia RBC -0.8553774 1.2804777 -0.6680143 0.5041245 -3.3650677 1.6543129
Leukemia MCV -2.8507735 1.0929227 -2.6083944 0.0090968 -4.9928627 -0.7086843
Leukemia MCH -0.1148362 0.6389328 -0.1797312 0.8573636 -1.3671215 1.1374491
Leukemia MCHC 0.2867024 0.7024687 0.4081355 0.6831742 -1.0901109 1.6635158
Leukemia with thrombocytopenia (Intercept) -21.8638254 4.9370154 -4.4285512 0.0000095 -31.5401978 -12.1874529
Leukemia with thrombocytopenia HGB 1.0893960 2.5383514 0.4291746 0.6677962 -3.8856814 6.0644733
Leukemia with thrombocytopenia PLT -13.8919632 2.6967112 -5.1514463 0.0000003 -19.1774201 -8.6065063
Leukemia with thrombocytopenia WBC 6.5045355 1.6677771 3.9001227 0.0000961 3.2357524 9.7733185
Leukemia with thrombocytopenia RBC 0.4323400 1.8806182 0.2298925 0.8181753 -3.2536039 4.1182840
Leukemia with thrombocytopenia MCV 3.1679523 9.6244815 0.3291556 0.7420381 -15.6956847 22.0315894
Leukemia with thrombocytopenia MCH -20.6850393 37.0388563 -0.5584686 0.5765244 -93.2798636 51.9097851
Leukemia with thrombocytopenia MCHC 2.7425162 6.2087090 0.4417209 0.6586912 -9.4263297 14.9113622
Macrocytic anemia (Intercept) 14.4005516 3.2362227 4.4498024 0.0000086 8.0576717 20.7434315
Macrocytic anemia HGB -55.5724329 7.2966073 -7.6162017 0.0000000 -69.8735205 -41.2713454
Macrocytic anemia PLT -1.5376843 1.0114487 -1.5202790 0.1284409 -3.5200873 0.4447188
Macrocytic anemia WBC 0.6483029 1.1490552 0.5642052 0.5726145 -1.6038039 2.9004097
Macrocytic anemia RBC -3.0619407 4.8026726 -0.6375493 0.5237671 -12.4750060 6.3511245
Macrocytic anemia MCV 2.6549455 2.7312055 0.9720782 0.3310116 -2.6981190 8.0080099
Macrocytic anemia MCH 5.8563190 10.8374412 0.5403784 0.5889361 -15.3846755 27.0973135
Macrocytic anemia MCHC -4.6197260 2.2530717 -2.0504124 0.0403242 -9.0356653 -0.2037867
Normocytic hypochromic anemia (Intercept) 21.7437261 2.4108775 9.0190092 0.0000000 17.0184931 26.4689592
Normocytic hypochromic anemia HGB -55.1026902 5.9189618 -9.3095196 0.0000000 -66.7036421 -43.5017384
Normocytic hypochromic anemia PLT -3.4693530 0.6591104 -5.2636902 0.0000001 -4.7611856 -2.1775203
Normocytic hypochromic anemia WBC 2.5660324 0.6335449 4.0502774 0.0000512 1.3243073 3.8077575
Normocytic hypochromic anemia RBC -3.4473124 1.1209612 -3.0753182 0.0021028 -5.6443560 -1.2502688
Normocytic hypochromic anemia MCV -2.6823584 0.8221749 -3.2625157 0.0011043 -4.2937915 -1.0709252
Normocytic hypochromic anemia MCH 5.9818122 0.8665384 6.9031128 0.0000000 4.2834282 7.6801962
Normocytic hypochromic anemia MCHC -6.5970097 0.8066260 -8.1785231 0.0000000 -8.1779677 -5.0160517
Normocytic normochromic anemia (Intercept) 21.6097584 2.3695811 9.1196535 0.0000000 16.9654649 26.2540520
Normocytic normochromic anemia HGB -54.8720902 5.8902936 -9.3156800 0.0000000 -66.4168535 -43.3273270
Normocytic normochromic anemia PLT -1.9817465 0.6463903 -3.0658669 0.0021704 -3.2486481 -0.7148449
Normocytic normochromic anemia WBC 2.3975380 0.6407417 3.7418165 0.0001827 1.1417073 3.6533688
Normocytic normochromic anemia RBC -3.0863902 1.0971446 -2.8131115 0.0049065 -5.2367542 -0.9360262
Normocytic normochromic anemia MCV -1.8104731 0.8005210 -2.2616185 0.0237210 -3.3794654 -0.2414807
Normocytic normochromic anemia MCH 6.2051163 0.8634502 7.1864203 0.0000000 4.5127850 7.8974477
Normocytic normochromic anemia MCHC 1.6130402 0.5401769 2.9861334 0.0028253 0.5543130 2.6717674
Other microcytic anemia (Intercept) 13.8873304 2.4968019 5.5620473 0.0000000 8.9936885 18.7809722
Other microcytic anemia HGB -55.0138759 5.9201296 -9.2926811 0.0000000 -66.6171167 -43.4106352
Other microcytic anemia PLT -3.5497297 0.8254676 -4.3002652 0.0000171 -5.1676166 -1.9318429
Other microcytic anemia WBC 2.2403073 0.6962921 3.2174820 0.0012932 0.8755999 3.6050148
Other microcytic anemia RBC -3.6148712 1.1664618 -3.0990053 0.0019417 -5.9010943 -1.3286481
Other microcytic anemia MCV -11.3351707 1.2408430 -9.1350562 0.0000000 -13.7671784 -8.9031631
Other microcytic anemia MCH 3.4068374 1.1001979 3.0965679 0.0019578 1.2504892 5.5631856
Other microcytic anemia MCHC 1.9323235 0.6402067 3.0182805 0.0025421 0.6775414 3.1871057
Thrombocytopenia (Intercept) -13.9575376 2.2663896 -6.1584900 0.0000000 -18.3995797 -9.5154956
Thrombocytopenia HGB 1.5332077 2.4106235 0.6360212 0.5247626 -3.1915275 6.2579428
Thrombocytopenia PLT -13.1007342 1.8635111 -7.0301349 0.0000000 -16.7531487 -9.4483196
Thrombocytopenia WBC -0.4069878 0.3991859 -1.0195445 0.3079445 -1.1893777 0.3754022
Thrombocytopenia RBC -0.9093644 2.2542035 -0.4034083 0.6866479 -5.3275222 3.5087933
Thrombocytopenia MCV 2.2436917 2.1611542 1.0381914 0.2991809 -1.9920926 6.4794761
Thrombocytopenia MCH -10.2328656 10.2199131 -1.0012674 0.3166976 -30.2635273 9.7977960
Thrombocytopenia MCHC 1.5300415 1.2737297 1.2012294 0.2296622 -0.9664227 4.0265058
output_tidy <- tidy(mlr_model, conf.int = TRUE)
write.csv(output_tidy, "Interpret for the relative log odds.csv", row.names = FALSE)
exp(coef(mlr_model))
##                                 (Intercept)          HGB          PLT
## Iron deficiency anemia         4.446952e+05 1.040733e-24 3.318705e-02
## Leukemia                       5.993148e-04 1.223256e+00 1.569111e+00
## Leukemia with thrombocytopenia 3.196401e-10 2.972478e+00 9.263968e-07
## Macrocytic anemia              1.795065e+06 7.331605e-25 2.148781e-01
## Normocytic hypochromic anemia  2.774472e+09 1.172751e-24 3.113717e-02
## Normocytic normochromic anemia 2.426604e+09 1.476910e-24 1.378283e-01
## Other microcytic anemia        1.074462e+06 1.281673e-24 2.873240e-02
## Thrombocytopenia               8.675978e-07 4.633014e+00 2.043730e-06
##                                         WBC        RBC          MCV
## Iron deficiency anemia           18.4078775 0.03504858 4.888076e-03
## Leukemia                       5316.4538372 0.42512272 5.779959e-02
## Leukemia with thrombocytopenia  668.1652059 1.54085897 2.375878e+01
## Macrocytic anemia                 1.9122927 0.04679679 1.422421e+01
## Normocytic hypochromic anemia    13.0140875 0.03183107 6.840165e-02
## Normocytic normochromic anemia   10.9960712 0.04566650 1.635767e-01
## Other microcytic anemia           9.3962185 0.02692039 1.194532e-05
## Thrombocytopenia                  0.6656523 0.40278014 9.428073e+00
##                                         MCH         MCHC
## Iron deficiency anemia         1.670266e-09  0.033903832
## Leukemia                       8.915122e-01  1.332027773
## Leukemia with thrombocytopenia 1.038967e-09 15.526002922
## Macrocytic anemia              3.494355e+02  0.009855496
## Normocytic hypochromic anemia  3.961576e+02  0.001364442
## Normocytic normochromic anemia 4.952766e+02  5.018043834
## Other microcytic anemia        3.016968e+01  6.905536861
## Thrombocytopenia               3.596855e-05  4.618368547
###atau
tidy(mlr_model, conf.int = TRUE, exponentiate = TRUE) %>%
  kable() %>%
  kable_styling("basic", full_width = FALSE)
y.level term estimate std.error statistic p.value conf.low conf.high
Iron deficiency anemia (Intercept) 4.446952e+05 2.5778755 5.0449078 0.0000005 2.842961e+03 6.955912e+07
Iron deficiency anemia HGB 0.000000e+00 5.9287424 -9.3143053 0.0000000 0.000000e+00 0.000000e+00
Iron deficiency anemia PLT 3.318710e-02 0.6911825 -4.9272014 0.0000008 8.563100e-03 1.286188e-01
Iron deficiency anemia WBC 1.840788e+01 0.6696204 4.3498953 0.0000136 4.954750e+00 6.838890e+01
Iron deficiency anemia RBC 3.504860e-02 1.1337651 -2.9556566 0.0031200 3.798500e-03 3.233948e-01
Iron deficiency anemia MCV 4.888100e-03 3.1403400 -1.6943887 0.0901915 1.040000e-05 2.302503e+00
Iron deficiency anemia MCH 0.000000e+00 12.2238033 -1.6533547 0.0982587 0.000000e+00 4.243320e+01
Iron deficiency anemia MCHC 3.390380e-02 2.0083858 -1.6850484 0.0919792 6.618000e-04 1.736960e+00
Leukemia (Intercept) 5.993000e-04 1.6989240 -4.3673076 0.0000126 2.150000e-05 1.674100e-02
Leukemia HGB 1.223256e+00 1.9969680 0.1009109 0.9196212 2.441710e-02 6.128292e+01
Leukemia PLT 1.569111e+00 0.5699311 0.7904624 0.4292578 5.134846e-01 4.794903e+00
Leukemia WBC 5.316454e+03 1.1358446 7.5525843 0.0000000 5.738381e+02 4.925550e+04
Leukemia RBC 4.251227e-01 1.2804777 -0.6680143 0.5041245 3.455970e-02 5.229485e+00
Leukemia MCV 5.779960e-02 1.0929227 -2.6083944 0.0090968 6.786200e-03 4.922915e-01
Leukemia MCH 8.915122e-01 0.6389328 -0.1797312 0.8573636 2.548395e-01 3.118803e+00
Leukemia MCHC 1.332028e+00 0.7024687 0.4081355 0.6831742 3.361792e-01 5.277834e+00
Leukemia with thrombocytopenia (Intercept) 0.000000e+00 4.9370154 -4.4285512 0.0000095 0.000000e+00 5.100000e-06
Leukemia with thrombocytopenia HGB 2.972478e+00 2.5383514 0.4291746 0.6677962 2.053380e-02 4.302960e+02
Leukemia with thrombocytopenia PLT 9.000000e-07 2.6967112 -5.1514463 0.0000003 0.000000e+00 1.829000e-04
Leukemia with thrombocytopenia WBC 6.681652e+02 1.6677771 3.9001227 0.0000961 2.542549e+01 1.755894e+04
Leukemia with thrombocytopenia RBC 1.540859e+00 1.8806182 0.2298925 0.8181753 3.863470e-02 6.145370e+01
Leukemia with thrombocytopenia MCV 2.375878e+01 9.6244815 0.3291556 0.7420381 2.000000e-07 3.699966e+09
Leukemia with thrombocytopenia MCH 0.000000e+00 37.0388563 -0.5584686 0.5765244 0.000000e+00 3.500525e+22
Leukemia with thrombocytopenia MCHC 1.552600e+01 6.2087090 0.4417209 0.6586912 8.060000e-05 2.991729e+06
Macrocytic anemia (Intercept) 1.795065e+06 3.2362227 4.4498024 0.0000086 3.157929e+03 1.020370e+09
Macrocytic anemia HGB 0.000000e+00 7.2966073 -7.6162017 0.0000000 0.000000e+00 0.000000e+00
Macrocytic anemia PLT 2.148781e-01 1.0114487 -1.5202790 0.1284409 2.959690e-02 1.560051e+00
Macrocytic anemia WBC 1.912293e+00 1.1490552 0.5642052 0.5726145 2.011300e-01 1.818159e+01
Macrocytic anemia RBC 4.679680e-02 4.8026726 -0.6375493 0.5237671 3.800000e-06 5.731369e+02
Macrocytic anemia MCV 1.422421e+01 2.7312055 0.9720782 0.3310116 6.733200e-02 3.004931e+03
Macrocytic anemia MCH 3.494355e+02 10.8374412 0.5403784 0.5889361 2.000000e-07 5.864267e+11
Macrocytic anemia MCHC 9.855500e-03 2.2530717 -2.0504124 0.0403242 1.191000e-04 8.156364e-01
Normocytic hypochromic anemia (Intercept) 2.774472e+09 2.4108775 9.0190092 0.0000000 2.460581e+07 3.128405e+11
Normocytic hypochromic anemia HGB 0.000000e+00 5.9189618 -9.3095196 0.0000000 0.000000e+00 0.000000e+00
Normocytic hypochromic anemia PLT 3.113720e-02 0.6591104 -5.2636902 0.0000001 8.555500e-03 1.133222e-01
Normocytic hypochromic anemia WBC 1.301409e+01 0.6335449 4.0502774 0.0000512 3.759580e+00 4.504930e+01
Normocytic hypochromic anemia RBC 3.183110e-02 1.1209612 -3.0753182 0.0021028 3.537400e-03 2.864278e-01
Normocytic hypochromic anemia MCV 6.840160e-02 0.8221749 -3.2625157 0.0011043 1.365310e-02 3.426913e-01
Normocytic hypochromic anemia MCH 3.961576e+02 0.8665384 6.9031128 0.0000000 7.248852e+01 2.165045e+03
Normocytic hypochromic anemia MCHC 1.364400e-03 0.8066260 -8.1785231 0.0000000 2.808000e-04 6.630700e-03
Normocytic normochromic anemia (Intercept) 2.426604e+09 2.3695811 9.1196535 0.0000000 2.333500e+07 2.523422e+11
Normocytic normochromic anemia HGB 0.000000e+00 5.8902936 -9.3156800 0.0000000 0.000000e+00 0.000000e+00
Normocytic normochromic anemia PLT 1.378283e-01 0.6463903 -3.0658669 0.0021704 3.882670e-02 4.892680e-01
Normocytic normochromic anemia WBC 1.099607e+01 0.6407417 3.7418165 0.0001827 3.132111e+00 3.860450e+01
Normocytic normochromic anemia RBC 4.566650e-02 1.0971446 -2.8131115 0.0049065 5.317500e-03 3.921832e-01
Normocytic normochromic anemia MCV 1.635767e-01 0.8005210 -2.2616185 0.0237210 3.406570e-02 7.854639e-01
Normocytic normochromic anemia MCH 4.952766e+02 0.8634502 7.1864203 0.0000000 9.117538e+01 2.690407e+03
Normocytic normochromic anemia MCHC 5.018044e+00 0.5401769 2.9861334 0.0028253 1.740745e+00 1.446551e+01
Other microcytic anemia (Intercept) 1.074462e+06 2.4968019 5.5620473 0.0000000 8.052103e+03 1.433747e+08
Other microcytic anemia HGB 0.000000e+00 5.9201296 -9.2926811 0.0000000 0.000000e+00 0.000000e+00
Other microcytic anemia PLT 2.873240e-02 0.8254676 -4.3002652 0.0000171 5.698100e-03 1.448810e-01
Other microcytic anemia WBC 9.396218e+00 0.6962921 3.2174820 0.0012932 2.400315e+00 3.678223e+01
Other microcytic anemia RBC 2.692040e-02 1.1664618 -3.0990053 0.0019417 2.736400e-03 2.648350e-01
Other microcytic anemia MCV 1.190000e-05 1.2408430 -9.1350562 0.0000000 1.000000e-06 1.360000e-04
Other microcytic anemia MCH 3.016968e+01 1.1001979 3.0965679 0.0019578 3.492051e+00 2.606519e+02
Other microcytic anemia MCHC 6.905537e+00 0.6402067 3.0182805 0.0025421 1.969031e+00 2.421823e+01
Thrombocytopenia (Intercept) 9.000000e-07 2.2663896 -6.1584900 0.0000000 0.000000e+00 7.370000e-05
Thrombocytopenia HGB 4.633014e+00 2.4106235 0.6360212 0.5247626 4.110900e-02 5.221437e+02
Thrombocytopenia PLT 2.000000e-06 1.8635111 -7.0301349 0.0000000 1.000000e-07 7.880000e-05
Thrombocytopenia WBC 6.656523e-01 0.3991859 -1.0195445 0.3079445 3.044106e-01 1.455577e+00
Thrombocytopenia RBC 4.027801e-01 2.2542035 -0.4034083 0.6866479 4.856100e-03 3.340793e+01
Thrombocytopenia MCV 9.428073e+00 2.1611542 1.0381914 0.2991809 1.364097e-01 6.516295e+02
Thrombocytopenia MCH 3.600000e-05 10.2199131 -1.0012674 0.3166976 0.000000e+00 1.799404e+04
Thrombocytopenia MCHC 4.618368e+00 1.2737297 1.2012294 0.2296622 3.804415e-01 5.606467e+01
mlr_pred <- predict(mlr_model, data = data_fix_uas)
data_clean$MLR_Predicted <- mlr_pred

conf_matrix <- table(Predicted = mlr_pred,
                     Actual = data_fix_uas$Diagnosis)
print(conf_matrix)
##                                 Actual
## Predicted                        Healthy Iron deficiency anemia Leukemia
##   Healthy                            329                      0        1
##   Iron deficiency anemia               1                    173        2
##   Leukemia                             0                      2       33
##   Leukemia with thrombocytopenia       0                      0        0
##   Macrocytic anemia                    0                      0        1
##   Normocytic hypochromic anemia        3                      5        0
##   Normocytic normochromic anemia       1                      1        1
##   Other microcytic anemia              0                      0        1
##   Thrombocytopenia                     1                      0        0
##                                 Actual
## Predicted                        Leukemia with thrombocytopenia
##   Healthy                                                     0
##   Iron deficiency anemia                                      0
##   Leukemia                                                    2
##   Leukemia with thrombocytopenia                              9
##   Macrocytic anemia                                           0
##   Normocytic hypochromic anemia                               0
##   Normocytic normochromic anemia                              0
##   Other microcytic anemia                                     0
##   Thrombocytopenia                                            0
##                                 Actual
## Predicted                        Macrocytic anemia
##   Healthy                                        0
##   Iron deficiency anemia                         0
##   Leukemia                                       0
##   Leukemia with thrombocytopenia                 0
##   Macrocytic anemia                             13
##   Normocytic hypochromic anemia                  2
##   Normocytic normochromic anemia                 3
##   Other microcytic anemia                        0
##   Thrombocytopenia                               0
##                                 Actual
## Predicted                        Normocytic hypochromic anemia
##   Healthy                                                    1
##   Iron deficiency anemia                                     2
##   Leukemia                                                   0
##   Leukemia with thrombocytopenia                             0
##   Macrocytic anemia                                          2
##   Normocytic hypochromic anemia                            234
##   Normocytic normochromic anemia                            15
##   Other microcytic anemia                                   14
##   Thrombocytopenia                                           0
##                                 Actual
## Predicted                        Normocytic normochromic anemia
##   Healthy                                                     0
##   Iron deficiency anemia                                      0
##   Leukemia                                                    0
##   Leukemia with thrombocytopenia                              1
##   Macrocytic anemia                                           0
##   Normocytic hypochromic anemia                               3
##   Normocytic normochromic anemia                            259
##   Other microcytic anemia                                     1
##   Thrombocytopenia                                            1
##                                 Actual
## Predicted                        Other microcytic anemia Thrombocytopenia
##   Healthy                                              0                5
##   Iron deficiency anemia                               1                0
##   Leukemia                                             1                0
##   Leukemia with thrombocytopenia                       0                0
##   Macrocytic anemia                                    0                0
##   Normocytic hypochromic anemia                        0                0
##   Normocytic normochromic anemia                       1                1
##   Other microcytic anemia                             49                0
##   Thrombocytopenia                                     1               65
accuracy <- mean(mlr_pred == data_fix_uas$Diagnosis)
cat("\nAkurasi klasifikasi MLR:", round(accuracy * 100, 2), "%\n")
## 
## Akurasi klasifikasi MLR: 93.8 %
mlr_scores <- data.frame(mlr_pred)
mlr_scores$Diagnosis <- data_fix_uas$Diagnosis

5 Visualisasi Hasil dengan Multi-Dimensional Scaling (MDS)

MDS digunakan untuk memvisualisasikan hasil klasifikasi dalam ruang 2 dimensi, membantu kita memahami bagaimana model mengelompokkan observasi.

5.1 MDS untuk Hasil LDA

#Mapping------------------------------------------------------------------
##Analisis Diskriminan

lda_mapped <- data_fix_uas
lda_mapped$LDA_Predicted <- pred
lda_mds <- lda_mapped[, !(names(lda_mapped) %in% c("Diagnosis", "LDA_Predicted"))]
lda_matrix <- dist(lda_mds, method = "euclidean")

lda_mds_result <- cmdscale(lda_matrix, k = 2)
mds_lda_df <- as.data.frame(lda_mds_result)
colnames(mds_lda_df) <- c("Dim1", "Dim2")
mds_lda_df$Predicted <- lda_mapped$LDA_Predicted

ggplot(mds_lda_df, aes(x = Dim1, y = Dim2, color = Predicted)) +
  geom_point(size = 3, alpha = 0.7) +
  labs(title = "MDS Mapping Berdasarkan Hasil Klasifikasi LDA",
       x = "Dimensi 1", y = "Dimensi 2") +
  theme_minimal() +
  theme(legend.title = element_text(size = 10))

5.2 MDS untuk Hasil MLR

##Multinomial Logistic Regression
mlr_mapped <- data_fix_uas
mlr_mapped$MLR_Predicted <- mlr_pred
mlr_mds <- mlr_mapped[, !(names(mlr_mapped) %in% c("Diagnosis", "MLR_Predicted"))]
mlr_matrix <- dist(mlr_mds, method = "euclidean")

mlr_mds_result <- cmdscale(mlr_matrix, k = 2)
mds_mlr_df <- as.data.frame(mlr_mds_result)
colnames(mds_mlr_df) <- c("Dim1", "Dim2")
mds_mlr_df$Predicted <- mlr_mapped$MLR_Predicted

ggplot(mds_mlr_df, aes(x = Dim1, y = Dim2, color = Predicted)) +
  geom_point(size = 3, alpha = 0.7) +
  labs(title = "MDS Mapping Berdasarkan Hasil Klasifikasi MLR",
       x = "Dimensi 1", y = "Dimensi 2") +
  theme_minimal() +
  theme(legend.title = element_text(size = 10))