# 2. Impor Data
data_tailor <- read.csv("D:/mbd euy/taylor/tailor.csv")

# 3. Pra-pemrosesan Data
# Memilih variabel numerik yang relevan dan target (GarmentType)
# Kita akan mengganti nilai NA dengan 0 karena nilai NA pada dataset ini 
# menunjukkan ukuran tersebut tidak berlaku untuk jenis pakaian tertentu.
cols_to_use <- c("GarmentType", "Cost", "Chest", "Arm.Length", "Shoulder", 
                 "Waist", "Hip", "Waist.to.Knee")

# Membersihkan nama kolom (R mengganti spasi dengan titik secara otomatis)
data_clean <- data_tailor[, cols_to_use]
data_clean[is.na(data_clean)] <- 0

# Mengonversi target menjadi faktor
data_clean$GarmentType <- as.factor(data_clean$GarmentType)

# 4. Melakukan Linear Discriminant Analysis (LDA)
lda_model <- lda(GarmentType ~ ., data = data_clean)
print(lda_model)
## Call:
## lda(GarmentType ~ ., data = data_clean)
## 
## Prior probabilities of groups:
##    Dress   Jacket    Shirt     Suit Trousers 
##    0.198    0.227    0.188    0.194    0.193 
## 
## Group means:
##              Cost    Chest Arm.Length Shoulder    Waist      Hip Waist.to.Knee
## Dress    269.5000  0.00000    0.00000  0.00000 18.90404 19.06061      18.86869
## Jacket   272.8767 18.64758   35.49339 18.86344 18.92511  0.00000       0.00000
## Shirt    262.9628 19.26596   34.97340 19.57979  0.00000  0.00000       0.00000
## Suit     271.1495 18.92784   34.79897 18.73196 18.83505 19.28351      18.91753
## Trousers 270.7720  0.00000    0.00000  0.00000 18.88601 19.04663      18.97927
## 
## Coefficients of linear discriminants:
##                         LD1           LD2           LD3          LD4
## Cost           0.0001011944 -3.411493e-05  0.0001676834  0.001713635
## Chest         -0.1798369397  1.888646e-01 -0.0182783885  0.004324182
## Arm.Length    -0.0354721913  4.398623e-02  0.0022838239  0.002176552
## Shoulder      -0.2088886758  1.660956e-01 -0.0180043790 -0.005005847
## Waist          0.0614230668  9.459900e-02  0.3340510714 -0.001002088
## Hip            0.1762563283  1.911749e-01 -0.1057112579 -0.278703561
## Waist.to.Knee  0.1901932437  2.046286e-01 -0.0920065893  0.284222692
## 
## Proportion of trace:
##    LD1    LD2    LD3    LD4 
## 0.7763 0.1609 0.0628 0.0000
# 5. Prediksi Skor Diskriminan untuk Plotting
lda_pred <- predict(lda_model)
plot_data <- data.frame(
  GarmentType = data_clean$GarmentType,
  LD1 = lda_pred$x[,1],
  LD2 = lda_pred$x[,2]
)

# 6. Membuat Plot Visualisasi
ggplot(plot_data, aes(x = LD1, y = LD2, color = GarmentType)) +
  geom_point(alpha = 0.7, size = 2) +
  stat_ellipse(level = 0.95) +
  theme_minimal() +
  labs(title = "Plot Analisis Diskriminan (LDA) - Jenis Pakaian",
       x = "Linear Discriminant 1",
       y = "Linear Discriminant 2",
       color = "Jenis Pakaian") +
  theme(legend.position = "bottom")