LDA dan OLR

Linear Discriminant Analysis (LDA) adalah sebuah metode statistik supervised yang digunakan untuk dua tujuan utama: klasifikasi dan reduksi dimensi. Sebagai metode klasifikasi, LDA bekerja dengan mencari satu set kombinasi linear dari variabel prediktor, yang dikenal sebagai fungsi diskriminan, yang mampu memaksimalkan pemisahan (separasi) antara dua atau lebih kelas atau kelompok yang telah ditentukan sebelumnya. Sedangkan Ordinal Logistic Regression (OLR), sering juga disebut sebagai Proportional Odds Model, adalah ekstensi dari regresi logistik biner yang dirancang khusus untuk memodelkan variabel dependen yang bersifat ordinal. Variabel ordinal adalah variabel kategorikal di mana terdapat urutan atau tingkatan yang jelas antar kategorinya (contoh: ‘rendah’, ‘sedang’, ‘tinggi’; atau ‘tidak setuju’, ‘netral’, ‘setuju’).

Import Dataset

# Install dan load package
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.3
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
# Load dataset
library(readr)
data <- read_csv("D:/Tugas SMT 4/analisis multivariat/data.csv")
## Rows: 3338 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (20): GameID, LeagueIndex, Age, HoursPerWeek, TotalHours, APM, SelectByH...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Eksplorasi Data Analysis

# Cek jumlah baris dan kolom
dim(data)  # atau dim(data) jika sebelum preprocessing
## [1] 3338   20
# cek duplikat data
sum(duplicated(data))
## [1] 0
# Cek nama kolom
colnames(data)
##  [1] "GameID"               "LeagueIndex"          "Age"                 
##  [4] "HoursPerWeek"         "TotalHours"           "APM"                 
##  [7] "SelectByHotkeys"      "AssignToHotkeys"      "UniqueHotkeys"       
## [10] "MinimapAttacks"       "MinimapRightClicks"   "NumberOfPACs"        
## [13] "GapBetweenPACs"       "ActionLatency"        "ActionsInPAC"        
## [16] "TotalMapExplored"     "WorkersMade"          "UniqueUnitsMade"     
## [19] "ComplexUnitsMade"     "ComplexAbilitiesUsed"
# Cek missing values
colSums(is.na(data))
##               GameID          LeagueIndex                  Age 
##                    0                    0                    0 
##         HoursPerWeek           TotalHours                  APM 
##                    0                    0                    0 
##      SelectByHotkeys      AssignToHotkeys        UniqueHotkeys 
##                    0                    0                    0 
##       MinimapAttacks   MinimapRightClicks         NumberOfPACs 
##                    0                    0                    0 
##       GapBetweenPACs        ActionLatency         ActionsInPAC 
##                    0                    0                    0 
##     TotalMapExplored          WorkersMade      UniqueUnitsMade 
##                    0                    0                    0 
##     ComplexUnitsMade ComplexAbilitiesUsed 
##                    0                    0
# Statistik ringkasan untuk semua kolom
summary(data)
##      GameID      LeagueIndex         Age         HoursPerWeek   
##  Min.   :  52   Min.   :1.000   Min.   :16.00   Min.   :  0.00  
##  1st Qu.:2423   1st Qu.:3.000   1st Qu.:19.00   1st Qu.:  8.00  
##  Median :4788   Median :4.000   Median :21.00   Median : 12.00  
##  Mean   :4720   Mean   :4.121   Mean   :21.65   Mean   : 15.91  
##  3rd Qu.:6995   3rd Qu.:5.000   3rd Qu.:24.00   3rd Qu.: 20.00  
##  Max.   :9271   Max.   :7.000   Max.   :44.00   Max.   :168.00  
##    TotalHours             APM         SelectByHotkeys    AssignToHotkeys    
##  Min.   :      3.0   Min.   : 22.06   Min.   :0.000000   Min.   :0.0000000  
##  1st Qu.:    300.0   1st Qu.: 79.23   1st Qu.:0.001245   1st Qu.:0.0002017  
##  Median :    500.0   Median :107.07   Median :0.002445   Median :0.0003487  
##  Mean   :    960.4   Mean   :114.58   Mean   :0.004023   Mean   :0.0003641  
##  3rd Qu.:    800.0   3rd Qu.:140.16   3rd Qu.:0.004945   3rd Qu.:0.0004929  
##  Max.   :1000000.0   Max.   :389.83   Max.   :0.043088   Max.   :0.0016483  
##  UniqueHotkeys    MinimapAttacks      MinimapRightClicks   NumberOfPACs     
##  Min.   : 0.000   Min.   :0.000e+00   Min.   :0.0000000   Min.   :0.000679  
##  1st Qu.: 3.000   1st Qu.:0.000e+00   1st Qu.:0.0001388   1st Qu.:0.002743  
##  Median : 4.000   Median :3.864e-05   Median :0.0002784   Median :0.003376  
##  Mean   : 4.316   Mean   :9.378e-05   Mean   :0.0003802   Mean   :0.003433  
##  3rd Qu.: 6.000   3rd Qu.:1.134e-04   3rd Qu.:0.0005076   3rd Qu.:0.004003  
##  Max.   :10.000   Max.   :3.019e-03   Max.   :0.0036877   Max.   :0.007971  
##  GapBetweenPACs    ActionLatency     ActionsInPAC    TotalMapExplored
##  Min.   :  6.667   Min.   : 24.63   Min.   : 2.039   Min.   : 5.00   
##  1st Qu.: 29.327   1st Qu.: 50.89   1st Qu.: 4.262   1st Qu.:17.00   
##  Median : 37.059   Median : 61.30   Median : 5.087   Median :22.00   
##  Mean   : 40.714   Mean   : 64.21   Mean   : 5.267   Mean   :22.12   
##  3rd Qu.: 48.510   3rd Qu.: 74.03   3rd Qu.: 6.027   3rd Qu.:27.00   
##  Max.   :237.143   Max.   :176.37   Max.   :18.558   Max.   :58.00   
##   WorkersMade        UniqueUnitsMade  ComplexUnitsMade    ComplexAbilitiesUsed
##  Min.   :7.698e-05   Min.   : 2.000   Min.   :0.000e+00   Min.   :0.000e+00   
##  1st Qu.:6.818e-04   1st Qu.: 5.000   1st Qu.:0.000e+00   1st Qu.:0.000e+00   
##  Median :9.042e-04   Median : 6.000   Median :0.000e+00   Median :2.043e-05   
##  Mean   :1.031e-03   Mean   : 6.541   Mean   :5.998e-05   Mean   :1.419e-04   
##  3rd Qu.:1.258e-03   3rd Qu.: 8.000   3rd Qu.:8.742e-05   3rd Qu.:1.823e-04   
##  Max.   :5.149e-03   Max.   :13.000   Max.   :9.023e-04   Max.   :3.084e-03
# Bar chart LeagueIndex
library(ggplot2)

ggplot(data, aes(x = LeagueIndex)) +
  geom_bar(fill = "#2c7fb8") +
  labs(title = "Distribusi Tingkatan Liga", x = "LeagueIndex", y = "Jumlah")

# Loop histogram untuk semua variabel numerik
num_cols <- c('GameID','LeagueIndex','Age','HoursPerWeek','TotalHours','APM','SelectByHotkeys','AssignToHotkeys','UniqueHotkeys','MinimapAttacks','MinimapRightClicks','NumberOfPACs','GapBetweenPACs','ActionLatency','ActionsInPAC','TotalMapExplored','WorkersMade','UniqueUnitsMade','ComplexUnitsMade','ComplexAbilitiesUsed')

for (col in num_cols) {
  print(
    ggplot(data, aes_string(x = col)) +
      geom_histogram(bins = 30, fill = "#1a9641", color = "white") +
      labs(title = paste("Distribusi", col), x = col, y = "Frekuensi")
  ) # Add the closing parenthesis here
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Korelasi dan heatmap
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
cor_matrix <- cor(data[, num_cols])
corrplot(cor_matrix, method = "color", tl.cex = 0.8)

# cek outlier menggunakan boxplot untuk semua variabel numerik
for (col in num_cols) {
  print(
    ggplot(data, aes_string(y = col)) +
      geom_boxplot(fill = "#d7191c") +
      labs(title = paste("Boxplot", col), y = col)
  )
}

Preprocessing

# hapus kolom Game_Id
data <- subset(data, select = -GameID)
colnames(data)
##  [1] "LeagueIndex"          "Age"                  "HoursPerWeek"        
##  [4] "TotalHours"           "APM"                  "SelectByHotkeys"     
##  [7] "AssignToHotkeys"      "UniqueHotkeys"        "MinimapAttacks"      
## [10] "MinimapRightClicks"   "NumberOfPACs"         "GapBetweenPACs"      
## [13] "ActionLatency"        "ActionsInPAC"         "TotalMapExplored"    
## [16] "WorkersMade"          "UniqueUnitsMade"      "ComplexUnitsMade"    
## [19] "ComplexAbilitiesUsed"
# Handling Outlier 

Q1 <- quantile(data$MinimapRightClicks, 0.25)
Q3 <- quantile(data$MinimapRightClicks, 0.75)
IQR <- Q3 - Q1

lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR

data_clean <- subset(data, MinimapRightClicks >= lower_bound & MinimapRightClicks <= upper_bound)

boxplot(data_clean$MinimapRightClicks, col = "red", main = "Boxplot setelah Outlier Dihapus")

#handling outlier
data_clean <- subset(data_clean, TotalHours <= 25000)

boxplot(data_clean$TotalHours, col = "blue", main = "Boxplot TotalHours setelah Outlier > 25000 Dihapus")

# Menstandarkan variabel numerik
num_cols <- c('HoursPerWeek', 'TotalHours','SelectByHotkeys',
                   'AssignToHotkeys', 'UniqueHotkeys', 'MinimapAttacks',
                   'MinimapRightClicks', 'GapBetweenPACs',
                   'ActionLatency', 'ActionsInPAC', 'TotalMapExplored',
                   'WorkersMade', 'UniqueUnitsMade', 'ComplexUnitsMade',
                   'ComplexAbilitiesUsed')

# Create data_scale as a copy of the original data frame
data_scaled <- data

data_scaled[num_cols] <- scale(data[num_cols])
# Cek hasil
head(data_scaled)
## # A tibble: 6 × 19
##   LeagueIndex   Age HoursPerWeek TotalHours   APM SelectByHotkeys
##         <dbl> <dbl>        <dbl>      <dbl> <dbl>           <dbl>
## 1           5    27       -0.494     0.118  144.           -0.108
## 2           5    23       -0.494     0.233  129.           -0.152
## 3           4    30       -0.494    -0.0439  70.0          -0.618
## 4           3    19        0.342    -0.0324 108.           -0.633
## 5           3    32       -0.494    -0.0266 123.           -0.611
## 6           2    27       -0.828    -0.0514  44.5          -0.644
## # ℹ 13 more variables: AssignToHotkeys <dbl>, UniqueHotkeys <dbl>,
## #   MinimapAttacks <dbl>, MinimapRightClicks <dbl>, NumberOfPACs <dbl>,
## #   GapBetweenPACs <dbl>, ActionLatency <dbl>, ActionsInPAC <dbl>,
## #   TotalMapExplored <dbl>, WorkersMade <dbl>, UniqueUnitsMade <dbl>,
## #   ComplexUnitsMade <dbl>, ComplexAbilitiesUsed <dbl>
summary(data_scaled)
##   LeagueIndex         Age         HoursPerWeek       TotalHours      
##  Min.   :1.000   Min.   :16.00   Min.   :-1.3297   Min.   :-0.05528  
##  1st Qu.:3.000   1st Qu.:19.00   1st Qu.:-0.6611   1st Qu.:-0.03813  
##  Median :4.000   Median :21.00   Median :-0.3268   Median :-0.02659  
##  Mean   :4.121   Mean   :21.65   Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.:5.000   3rd Qu.:24.00   3rd Qu.: 0.3419   3rd Qu.:-0.00926  
##  Max.   :7.000   Max.   :44.00   Max.   :12.7118   Max.   :57.68748  
##       APM         SelectByHotkeys   AssignToHotkeys    UniqueHotkeys    
##  Min.   : 22.06   Min.   :-0.8512   Min.   :-1.73386   Min.   :-1.8499  
##  1st Qu.: 79.23   1st Qu.:-0.5879   1st Qu.:-0.77334   1st Qu.:-0.5642  
##  Median :107.07   Median :-0.3339   Median :-0.07359   Median :-0.1356  
##  Mean   :114.58   Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.:140.16   3rd Qu.: 0.1950   3rd Qu.: 0.61285   3rd Qu.: 0.7216  
##  Max.   :389.83   Max.   : 8.2653   Max.   : 6.11437   Max.   : 2.4359  
##  MinimapAttacks    MinimapRightClicks  NumberOfPACs      GapBetweenPACs   
##  Min.   :-0.5899   Min.   :-1.0577    Min.   :0.000679   Min.   :-1.9961  
##  1st Qu.:-0.5899   1st Qu.:-0.6716    1st Qu.:0.002743   1st Qu.:-0.6676  
##  Median :-0.3468   Median :-0.2833    Median :0.003376   Median :-0.2143  
##  Mean   : 0.0000   Mean   : 0.0000    Mean   :0.003433   Mean   : 0.0000  
##  3rd Qu.: 0.1237   3rd Qu.: 0.3542    3rd Qu.:0.004003   3rd Qu.: 0.4571  
##  Max.   :18.4020   Max.   : 9.2003    Max.   :0.007971   Max.   :11.5159  
##  ActionLatency      ActionsInPAC     TotalMapExplored   WorkersMade     
##  Min.   :-2.0789   Min.   :-2.1512   Min.   :-2.3004   Min.   :-1.8334  
##  1st Qu.:-0.6998   1st Qu.:-0.6700   1st Qu.:-0.6877   1st Qu.:-0.6710  
##  Median :-0.1530   Median :-0.1199   Median :-0.0157   Median :-0.2436  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.5160   3rd Qu.: 0.5067   3rd Qu.: 0.6563   3rd Qu.: 0.4371  
##  Max.   : 5.8917   Max.   : 8.8572   Max.   : 4.8224   Max.   : 7.9142  
##  UniqueUnitsMade   ComplexUnitsMade  ComplexAbilitiesUsed
##  Min.   :-2.4427   Min.   :-0.5379   Min.   :-0.5343     
##  1st Qu.:-0.8289   1st Qu.:-0.5379   1st Qu.:-0.5343     
##  Median :-0.2910   Median :-0.5379   Median :-0.4574     
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000     
##  3rd Qu.: 0.7848   3rd Qu.: 0.2460   3rd Qu.: 0.1520     
##  Max.   : 3.4743   Max.   : 7.5535   Max.   :11.0740
head(data_scaled)
## # A tibble: 6 × 19
##   LeagueIndex   Age HoursPerWeek TotalHours   APM SelectByHotkeys
##         <dbl> <dbl>        <dbl>      <dbl> <dbl>           <dbl>
## 1           5    27       -0.494     0.118  144.           -0.108
## 2           5    23       -0.494     0.233  129.           -0.152
## 3           4    30       -0.494    -0.0439  70.0          -0.618
## 4           3    19        0.342    -0.0324 108.           -0.633
## 5           3    32       -0.494    -0.0266 123.           -0.611
## 6           2    27       -0.828    -0.0514  44.5          -0.644
## # ℹ 13 more variables: AssignToHotkeys <dbl>, UniqueHotkeys <dbl>,
## #   MinimapAttacks <dbl>, MinimapRightClicks <dbl>, NumberOfPACs <dbl>,
## #   GapBetweenPACs <dbl>, ActionLatency <dbl>, ActionsInPAC <dbl>,
## #   TotalMapExplored <dbl>, WorkersMade <dbl>, UniqueUnitsMade <dbl>,
## #   ComplexUnitsMade <dbl>, ComplexAbilitiesUsed <dbl>
summary(data_scaled)
##   LeagueIndex         Age         HoursPerWeek       TotalHours      
##  Min.   :1.000   Min.   :16.00   Min.   :-1.3297   Min.   :-0.05528  
##  1st Qu.:3.000   1st Qu.:19.00   1st Qu.:-0.6611   1st Qu.:-0.03813  
##  Median :4.000   Median :21.00   Median :-0.3268   Median :-0.02659  
##  Mean   :4.121   Mean   :21.65   Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.:5.000   3rd Qu.:24.00   3rd Qu.: 0.3419   3rd Qu.:-0.00926  
##  Max.   :7.000   Max.   :44.00   Max.   :12.7118   Max.   :57.68748  
##       APM         SelectByHotkeys   AssignToHotkeys    UniqueHotkeys    
##  Min.   : 22.06   Min.   :-0.8512   Min.   :-1.73386   Min.   :-1.8499  
##  1st Qu.: 79.23   1st Qu.:-0.5879   1st Qu.:-0.77334   1st Qu.:-0.5642  
##  Median :107.07   Median :-0.3339   Median :-0.07359   Median :-0.1356  
##  Mean   :114.58   Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.:140.16   3rd Qu.: 0.1950   3rd Qu.: 0.61285   3rd Qu.: 0.7216  
##  Max.   :389.83   Max.   : 8.2653   Max.   : 6.11437   Max.   : 2.4359  
##  MinimapAttacks    MinimapRightClicks  NumberOfPACs      GapBetweenPACs   
##  Min.   :-0.5899   Min.   :-1.0577    Min.   :0.000679   Min.   :-1.9961  
##  1st Qu.:-0.5899   1st Qu.:-0.6716    1st Qu.:0.002743   1st Qu.:-0.6676  
##  Median :-0.3468   Median :-0.2833    Median :0.003376   Median :-0.2143  
##  Mean   : 0.0000   Mean   : 0.0000    Mean   :0.003433   Mean   : 0.0000  
##  3rd Qu.: 0.1237   3rd Qu.: 0.3542    3rd Qu.:0.004003   3rd Qu.: 0.4571  
##  Max.   :18.4020   Max.   : 9.2003    Max.   :0.007971   Max.   :11.5159  
##  ActionLatency      ActionsInPAC     TotalMapExplored   WorkersMade     
##  Min.   :-2.0789   Min.   :-2.1512   Min.   :-2.3004   Min.   :-1.8334  
##  1st Qu.:-0.6998   1st Qu.:-0.6700   1st Qu.:-0.6877   1st Qu.:-0.6710  
##  Median :-0.1530   Median :-0.1199   Median :-0.0157   Median :-0.2436  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.5160   3rd Qu.: 0.5067   3rd Qu.: 0.6563   3rd Qu.: 0.4371  
##  Max.   : 5.8917   Max.   : 8.8572   Max.   : 4.8224   Max.   : 7.9142  
##  UniqueUnitsMade   ComplexUnitsMade  ComplexAbilitiesUsed
##  Min.   :-2.4427   Min.   :-0.5379   Min.   :-0.5343     
##  1st Qu.:-0.8289   1st Qu.:-0.5379   1st Qu.:-0.5343     
##  Median :-0.2910   Median :-0.5379   Median :-0.4574     
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000     
##  3rd Qu.: 0.7848   3rd Qu.: 0.2460   3rd Qu.: 0.1520     
##  Max.   : 3.4743   Max.   : 7.5535   Max.   :11.0740
str(data_scaled)
## tibble [3,338 × 19] (S3: tbl_df/tbl/data.frame)
##  $ LeagueIndex         : num [1:3338] 5 5 4 3 3 2 1 7 4 4 ...
##  $ Age                 : num [1:3338] 27 23 30 19 32 27 21 17 20 18 ...
##  $ HoursPerWeek        : num [1:3338] -0.494 -0.494 -0.494 0.342 -0.494 ...
##  $ TotalHours          : num [1:3338] 0.1178 0.2333 -0.0439 -0.0324 -0.0266 ...
##  $ APM                 : num [1:3338] 144 129 70 108 123 ...
##  $ SelectByHotkeys     : num [1:3338] -0.108 -0.152 -0.618 -0.633 -0.611 ...
##  $ AssignToHotkeys     : num [1:3338] -0.688 -0.498 -0.136 -0.719 -0.175 ...
##  $ UniqueHotkeys       : num [1:3338] 1.15 -0.136 -0.136 -1.421 -0.993 ...
##  $ MinimapAttacks      : num [1:3338] 0.101 1.26 1.257 -0.255 -0.59 ...
##  $ MinimapRightClicks  : num [1:3338] 0.0336 0.1452 0.2258 0.4539 2.6379 ...
##  $ NumberOfPACs        : num [1:3338] 0.00485 0.00431 0.00293 0.00378 0.00237 ...
##  $ GapBetweenPACs      : num [1:3338] -0.472 -0.457 0.231 -0.674 -1.057 ...
##  $ ActionLatency       : num [1:3338] -1.226 -1.148 0.585 -0.55 -0.112 ...
##  $ ActionsInPAC        : num [1:3338] -0.344 -0.282 -0.816 -0.234 2.737 ...
##  $ TotalMapExplored    : num [1:3338] 0.7907 -0.0157 -0.0157 -0.4189 -0.9565 ...
##  $ WorkersMade         : num [1:3338] 0.703 0.312 -0.55 -1.162 0.276 ...
##  $ UniqueUnitsMade     : num [1:3338] -0.291 -0.829 -0.291 0.247 -1.367 ...
##  $ ComplexUnitsMade    : num [1:3338] -0.538 -0.538 -0.538 -0.538 -0.538 ...
##  $ ComplexAbilitiesUsed: num [1:3338] -0.534 0.247 0.176 0.91 -0.462 ...
prop.table(table(data_scaled$LeagueIndex))
## 
##          1          2          3          4          5          6          7 
## 0.05002996 0.10395446 0.16566806 0.24295986 0.24086279 0.18603954 0.01048532

#3Uji Asumsi

#LDA 1. Uji Multikolinearitas

# Pastikan dplyr sudah aktif
library(MASS)
# Pisahkan fitur numerik (selain LeagueIndex)
features <- data_scaled %>% dplyr::select(-LeagueIndex)

# Bagi data berdasarkan LeagueIndex
grouped_data <- split(data_scaled, data_scaled$LeagueIndex)

# Uji normalitas Shapiro-Wilk untuk tiap fitur per grup LeagueIndex
normality_results <- lapply(names(features), function(feat) {
  sapply(grouped_data, function(group) {
    # Pastikan kolom fitur ada dan tidak semua NA
    if (nrow(group) >= 3 && all(!is.na(group[[feat]]))) {
      tryCatch(
        shapiro.test(group[[feat]])$p.value,
        error = function(e) NA
      )
    } else {
      NA
    }
  })
})

# Ubah hasil ke data.frame
normality_results_df <- as.data.frame(do.call(rbind, normality_results))
rownames(normality_results_df) <- names(features)

# Tampilkan hasil
normality_results_df
##                                 1            2            3            4
## Age                  1.564557e-08 1.857704e-13 4.895264e-18 4.525719e-18
## HoursPerWeek         8.376279e-13 2.926029e-19 2.101601e-23 4.456902e-29
## TotalHours           1.145293e-13 1.645084e-18 1.452047e-38 2.135359e-46
## APM                  1.769937e-08 1.186078e-07 6.062001e-13 1.743495e-13
## SelectByHotkeys      5.168161e-20 1.737062e-24 3.876843e-31 2.230163e-36
## AssignToHotkeys      3.227129e-09 1.991833e-10 9.923346e-11 1.045519e-16
## UniqueHotkeys        3.667612e-05 1.298086e-08 9.136284e-12 5.467023e-12
## MinimapAttacks       3.968334e-19 2.334363e-29 6.704285e-36 5.448659e-36
## MinimapRightClicks   7.911731e-14 6.904780e-22 1.554658e-25 2.262078e-31
## NumberOfPACs         6.618546e-03 1.504687e-01 1.022977e-02 1.053482e-01
## GapBetweenPACs       6.058565e-10 8.187020e-11 5.428698e-12 1.457525e-13
## ActionLatency        1.646470e-05 2.258143e-10 8.977229e-11 2.034559e-14
## ActionsInPAC         7.587403e-09 1.852385e-16 1.475459e-13 8.107886e-21
## TotalMapExplored     5.882548e-07 6.726613e-03 1.652148e-08 2.913655e-12
## WorkersMade          6.280443e-09 2.563764e-15 7.368747e-21 2.449994e-24
## UniqueUnitsMade      3.530102e-04 1.799087e-08 1.158809e-09 7.256471e-12
## ComplexUnitsMade     1.167487e-23 2.850728e-32 2.113443e-35 1.405968e-38
## ComplexAbilitiesUsed 7.930522e-22 8.495354e-32 8.935050e-37 2.579207e-39
##                                 5            6            7
## Age                  2.035832e-15 1.472301e-11 5.791262e-02
## HoursPerWeek         3.141134e-27 1.019794e-27 4.682444e-03
## TotalHours           4.552724e-53 1.788244e-42 7.550364e-09
## APM                  4.098781e-19 1.012480e-10 2.025266e-01
## SelectByHotkeys      9.654193e-35 6.154131e-27 5.888732e-03
## AssignToHotkeys      1.322127e-07 9.953129e-10 1.487034e-01
## UniqueHotkeys        1.223919e-10 1.314679e-10 8.702914e-02
## MinimapAttacks       5.461243e-41 2.895374e-32 2.358348e-04
## MinimapRightClicks   1.887322e-27 5.392883e-25 2.658611e-02
## NumberOfPACs         6.848102e-02 4.220428e-05 3.940142e-01
## GapBetweenPACs       1.615802e-10 1.695354e-12 7.404780e-01
## ActionLatency        5.157857e-09 2.596012e-08 2.166930e-01
## ActionsInPAC         5.698484e-27 5.045023e-17 3.226406e-01
## TotalMapExplored     4.572844e-07 3.417213e-09 9.784375e-02
## WorkersMade          2.227548e-24 5.800788e-22 1.743396e-04
## UniqueUnitsMade      6.811580e-12 3.372936e-09 2.678919e-01
## ComplexUnitsMade     9.849624e-36 1.721935e-32 3.018311e-07
## ComplexAbilitiesUsed 2.132116e-37 1.223818e-33 9.687512e-10
  1. Uji box
library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## ---
## biotools version 4.3
library(MASS)

boxM_result <- boxM(data_scaled[, -which(names(data_scaled) == "LeagueIndex")],
                    grouping = data_scaled$LeagueIndex)
boxM_result
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  data_scaled[, -which(names(data_scaled) == "LeagueIndex")]
## Chi-Sq (approx.) = 22901, df = 1026, p-value < 2.2e-16

#Olr Uji Multikonileritas

library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(dplyr)
# Fit model OLS sementara untuk cek VIF
model_ols <- lm(as.numeric(LeagueIndex) ~ ., data = data_scaled)
vif(model_ols)
##                  Age         HoursPerWeek           TotalHours 
##             1.125685             1.098924             1.008281 
##                  APM      SelectByHotkeys      AssignToHotkeys 
##            36.723621            12.258322             1.612387 
##        UniqueHotkeys       MinimapAttacks   MinimapRightClicks 
##             1.309584             1.136730             1.292985 
##         NumberOfPACs       GapBetweenPACs        ActionLatency 
##            13.724444             2.252419             5.332298 
##         ActionsInPAC     TotalMapExplored          WorkersMade 
##             8.026527             1.855805             1.307615 
##      UniqueUnitsMade     ComplexUnitsMade ComplexAbilitiesUsed 
##             1.673609             1.819523             1.674409
#Jika VIF > 5 atau 10, ada indikasi multikolinearitas

#Reduksi Dimensi PCA

# PCA tanpa variabel target
pca_result <- prcomp(data_scaled[, -which(names(data_scaled) == "LeagueIndex")], center = TRUE, scale. = TRUE)

# Ringkasan hasil PCA
summary(pca_result)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.1948 1.4365 1.2297 1.12599 1.02865 0.99614 0.99178
## Proportion of Variance 0.2676 0.1146 0.0840 0.07044 0.05878 0.05513 0.05465
## Cumulative Proportion  0.2676 0.3823 0.4663 0.53670 0.59548 0.65061 0.70525
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.91031 0.87131 0.85881 0.84404 0.77100 0.71518 0.64969
## Proportion of Variance 0.04604 0.04218 0.04098 0.03958 0.03302 0.02842 0.02345
## Cumulative Proportion  0.75129 0.79347 0.83444 0.87402 0.90704 0.93546 0.95891
##                           PC15    PC16    PC17    PC18
## Standard deviation     0.59764 0.50516 0.33377 0.12585
## Proportion of Variance 0.01984 0.01418 0.00619 0.00088
## Cumulative Proportion  0.97875 0.99293 0.99912 1.00000
# Proporsi varian kumulatif
cum_var <- cumsum(pca_result$sdev^2 / sum(pca_result$sdev^2))
plot(cum_var, type = "b", xlab = "Jumlah Komponen", ylab = "Proporsi Varian Kumulatif",
     main = "Scree Plot PCA", col = "blue", pch = 19)
abline(h = 0.9, col = "red", lty = 2) # misalnya threshold 90% varian

Factor Analysis

library(psych)
## Warning: package 'psych' was built under R version 4.4.3
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
# Menentukan jumlah faktor dengan scree plot
fa.parallel(data_scaled[, -which(names(data_scaled) == "LeagueIndex")], fa = "fa")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Parallel analysis suggests that the number of factors =  7  and the number of components =  NA
# Lakukan FA dengan jumlah faktor misalnya 5
fa_result <- fa(data_scaled[, -which(names(data_scaled) == "LeagueIndex")], nfactors = 5, rotate = "varimax")
print(fa_result)
## Factor Analysis using method =  minres
## Call: fa(r = data_scaled[, -which(names(data_scaled) == "LeagueIndex")], 
##     nfactors = 5, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                        MR1   MR4   MR5   MR3   MR2     h2    u2 com
## Age                  -0.27 -0.12  0.10  0.02 -0.11 0.1050 0.895 2.1
## HoursPerWeek          0.16  0.20  0.00  0.06  0.07 0.0724 0.928 2.4
## TotalHours            0.02  0.08  0.00  0.00 -0.01 0.0076 0.992 1.1
## APM                   0.49  0.74  0.14  0.42  0.07 0.9865 0.014 2.5
## SelectByHotkeys       0.14  0.91  0.04  0.11  0.03 0.8684 0.132 1.1
## AssignToHotkeys       0.31  0.43  0.25  0.16  0.08 0.3710 0.629 2.9
## UniqueHotkeys         0.15  0.27  0.37  0.09  0.02 0.2411 0.759 2.4
## MinimapAttacks        0.05  0.10  0.23  0.27 -0.04 0.1437 0.856 2.4
## MinimapRightClicks    0.09  0.04  0.20  0.47  0.03 0.2670 0.733 1.5
## NumberOfPACs          0.76  0.28  0.50 -0.11  0.04 0.9196 0.080 2.1
## GapBetweenPACs       -0.59 -0.17 -0.09 -0.39  0.03 0.5388 0.461 2.0
## ActionLatency        -0.85 -0.25 -0.28 -0.19 -0.07 0.9025 0.097 1.5
## ActionsInPAC          0.00  0.10 -0.32  0.84  0.09 0.8313 0.169 1.4
## TotalMapExplored      0.12  0.04  0.71  0.03  0.20 0.5586 0.441 1.2
## WorkersMade           0.25  0.11  0.08  0.29  0.14 0.1838 0.816 3.0
## UniqueUnitsMade       0.02 -0.03  0.66  0.05  0.29 0.5261 0.474 1.4
## ComplexUnitsMade      0.09  0.01  0.19  0.06  0.87 0.8092 0.191 1.1
## ComplexAbilitiesUsed  0.09  0.02  0.16  0.05  0.65 0.4603 0.540 1.2
## 
##                        MR1  MR4  MR5  MR3  MR2
## SS loadings           2.24 1.90 1.77 1.51 1.37
## Proportion Var        0.12 0.11 0.10 0.08 0.08
## Cumulative Var        0.12 0.23 0.33 0.41 0.49
## Proportion Explained  0.25 0.22 0.20 0.17 0.16
## Cumulative Proportion 0.25 0.47 0.67 0.84 1.00
## 
## Mean item complexity =  1.8
## Test of the hypothesis that 5 factors are sufficient.
## 
## df null model =  153  with the objective function =  8.92 with Chi Square =  29690.4
## df of  the model are 73  and the objective function was  1 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic n.obs is  3338 with the empirical chi square  609.46  with prob <  1.3e-85 
## The total n.obs was  3338  with Likelihood Chi Square =  3311.6  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.77
## RMSEA index =  0.115  and the 90 % confidence intervals are  0.112 0.119
## BIC =  2719.35
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    MR1  MR4  MR5  MR3  MR2
## Correlation of (regression) scores with factors   0.94 0.97 0.85 0.93 0.90
## Multiple R square of scores with factors          0.88 0.94 0.73 0.86 0.81
## Minimum correlation of possible factor scores     0.76 0.88 0.46 0.73 0.62

Visualaisasi PCA

#2D Scatter Plot (2 Komponen)
# Ambil dua komponen utama
pca_df <- as.data.frame(pca_result$x[, 1:2])
pca_df$LeagueIndex <- data_scaled$LeagueIndex

# install.packages("ggplot2") jika belum
library(ggplot2)

ggplot(pca_df, aes(x = PC1, y = PC2, color = LeagueIndex)) +
  geom_point(alpha = 0.7) +
  labs(title = "Visualisasi PCA (2D)", x = "PC1", y = "PC2") +
  theme_minimal()

#3D Plot

#3D plot
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
pca_3d <- as.data.frame(pca_result$x[, 1:3])
pca_3d$LeagueIndex <- data_scaled$LeagueIndex

plot_ly(pca_3d, x = ~PC1, y = ~PC2, z = ~PC3, color = ~LeagueIndex, colors = "Set1", type = "scatter3d", mode = "markers") %>%
  layout(title = "Visualisasi PCA (3D)")

#Perceptual Mapping Multidimensional Scaling (MDS)

# Jarak antar observasi berdasarkan fitur numerik
dist_matrix <- dist(data_scaled[, -which(names(data_scaled) == "LeagueIndex")])

# MDS klasik (metric MDS) ke 2 dimensi
mds_result <- cmdscale(dist_matrix, k = 2)
mds_df <- as.data.frame(mds_result)
colnames(mds_df) <- c("Dim1", "Dim2")
mds_df$LeagueIndex <- data_scaled$LeagueIndex

# Visualisasi hasil MDS
library(ggplot2)
ggplot(mds_df, aes(x = Dim1, y = Dim2, color = LeagueIndex)) +
  geom_point(alpha = 0.7) +
  labs(title = "Perceptual Mapping - MDS", x = "Dimensi 1", y = "Dimensi 2") +
  theme_minimal()

Biplot PCA

# PCA telah dilakukan sebelumnya (prcomp)
biplot(pca_result, scale = 0, cex = 0.6, main = "Biplot dari PCA")

# Biplot dengan ggplot (lebih rapi)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_pca_biplot(pca_result, label = "var", habillage = data_scaled$LeagueIndex,
                addEllipses = TRUE, repel = TRUE,
                col.var = "steelblue", col.ind = "gray30",
                title = "Biplot PCA dengan LeagueIndex")

#Pemodelan 1. LDA

library(MASS)
library(caret)

# Pastikan LeagueIndex adalah faktor
data_scaled$LeagueIndex <- as.factor(data_scaled$LeagueIndex)

# Bagi data (misalnya 70:30)
set.seed(123)
train_index <- createDataPartition(data_scaled$LeagueIndex, p = 0.7, list = FALSE)
train_data <- data_scaled[train_index, ]
test_data <- data_scaled[-train_index, ]

# LDA
lda_model <- lda(LeagueIndex ~ ., data = train_data)
lda_pred <- predict(lda_model, test_data)

# Pastikan faktor dengan level identik
ref_levels <- levels(data_scaled$LeagueIndex)
predicted <- factor(lda_pred$class, levels = ref_levels)
actual <- factor(test_data$LeagueIndex, levels = ref_levels)

# Confusion Matrix
confusionMatrix(predicted, actual)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3   4   5   6   7
##          1  25  17  20   3   0   0   0
##          2  10  16  11   5   0   0   0
##          3   7  31  37  20  10   1   0
##          4   8  37  79 137  68  28   0
##          5   0   2  15  61 108  71   0
##          6   0   1   3  16  51  77   7
##          7   0   0   0   1   4   9   3
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4034          
##                  95% CI : (0.3728, 0.4346)
##     No Information Rate : 0.2432          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2513          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity           0.50000  0.15385  0.22424   0.5638   0.4481  0.41398
## Specificity           0.95785  0.97095  0.91727   0.7090   0.8034  0.90406
## Pos Pred Value        0.38462  0.38095  0.34906   0.3838   0.4202  0.49677
## Neg Pred Value        0.97323  0.90805  0.85666   0.8349   0.8208  0.87085
## Prevalence            0.05005  0.10410  0.16517   0.2432   0.2412  0.18619
## Detection Rate        0.02503  0.01602  0.03704   0.1371   0.1081  0.07708
## Detection Prevalence  0.06507  0.04204  0.10611   0.3574   0.2573  0.15516
## Balanced Accuracy     0.72893  0.56240  0.57075   0.6364   0.6258  0.65902
##                      Class: 7
## Sensitivity          0.300000
## Specificity          0.985844
## Pos Pred Value       0.176471
## Neg Pred Value       0.992872
## Prevalence           0.010010
## Detection Rate       0.003003
## Detection Prevalence 0.017017
## Balanced Accuracy    0.642922
  1. OLR
# Pastikan LeagueIndex adalah ordered factor
data_scaled$LeagueIndex <- factor(data_scaled$LeagueIndex, ordered = TRUE)

# Model ordinal logistic regression
library(MASS)
olr_model <- polr(LeagueIndex ~ ., data = train_data, Hess = TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Prediksi
olr_pred <- predict(olr_model, newdata = test_data)

# Evaluasi
confusionMatrix(olr_pred, test_data$LeagueIndex)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3   4   5   6   7
##          1  10   9   5   1   0   0   0
##          2  25  24  29   8   2   0   0
##          3  10  37  42  31   7   1   0
##          4   5  31  72 127  79  17   0
##          5   0   3  13  63 101  82   0
##          6   0   0   4  12  50  85  10
##          7   0   0   0   1   2   1   0
## 
## Overall Statistics
##                                          
##                Accuracy : 0.3894         
##                  95% CI : (0.359, 0.4204)
##     No Information Rate : 0.2432         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.2315         
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4 Class: 5 Class: 6
## Sensitivity           0.20000  0.23077  0.25455   0.5226   0.4191  0.45699
## Specificity           0.98419  0.92849  0.89688   0.7302   0.7876  0.90652
## Pos Pred Value        0.40000  0.27273  0.32812   0.3837   0.3855  0.52795
## Neg Pred Value        0.95893  0.91218  0.85878   0.8263   0.8100  0.87947
## Prevalence            0.05005  0.10410  0.16517   0.2432   0.2412  0.18619
## Detection Rate        0.01001  0.02402  0.04204   0.1271   0.1011  0.08509
## Detection Prevalence  0.02503  0.08809  0.12813   0.3313   0.2623  0.16116
## Balanced Accuracy     0.59210  0.57963  0.57571   0.6264   0.6033  0.68175
##                      Class: 7
## Sensitivity          0.000000
## Specificity          0.995956
## Pos Pred Value       0.000000
## Neg Pred Value       0.989950
## Prevalence           0.010010
## Detection Rate       0.000000
## Detection Prevalence 0.004004
## Balanced Accuracy    0.497978

#Evaluasi

# Asumsikan sudah ada lda_pred, olr_pred, dan test_data$LeagueIndex

# Samakan tipe data: ubah ke karakter agar bisa dibandingkan
lda_pred_char <- as.character(lda_pred$class)
olr_pred_char <- as.character(olr_pred)
true_char <- as.character(test_data$LeagueIndex)

# Hitung confusion matrix untuk LDA
conf_matrix_lda <- table(Predicted = lda_pred_char, Actual = true_char)
print(conf_matrix_lda)
##          Actual
## Predicted   1   2   3   4   5   6   7
##         1  25  17  20   3   0   0   0
##         2  10  16  11   5   0   0   0
##         3   7  31  37  20  10   1   0
##         4   8  37  79 137  68  28   0
##         5   0   2  15  61 108  71   0
##         6   0   1   3  16  51  77   7
##         7   0   0   0   1   4   9   3
# Hitung confusion matrix untuk OLR
conf_matrix_olr <- table(Predicted = olr_pred_char, Actual = true_char)
print(conf_matrix_olr)
##          Actual
## Predicted   1   2   3   4   5   6   7
##         1  10   9   5   1   0   0   0
##         2  25  24  29   8   2   0   0
##         3  10  37  42  31   7   1   0
##         4   5  31  72 127  79  17   0
##         5   0   3  13  63 101  82   0
##         6   0   0   4  12  50  85  10
##         7   0   0   0   1   2   1   0
# Fungsi hitung metrik evaluasi: akurasi, precision, recall (per kelas)
eval_metrics <- function(conf_matrix) {
  accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
  precision <- diag(conf_matrix) / rowSums(conf_matrix)
  recall <- diag(conf_matrix) / colSums(conf_matrix)
  list(accuracy = accuracy, precision = precision, recall = recall)
}

metrics_lda <- eval_metrics(conf_matrix_lda)
metrics_olr <- eval_metrics(conf_matrix_olr)

print("LDA Metrics:")
## [1] "LDA Metrics:"
print(metrics_lda)
## $accuracy
## [1] 0.4034034
## 
## $precision
##         1         2         3         4         5         6         7 
## 0.3846154 0.3809524 0.3490566 0.3837535 0.4202335 0.4967742 0.1764706 
## 
## $recall
##         1         2         3         4         5         6         7 
## 0.5000000 0.1538462 0.2242424 0.5637860 0.4481328 0.4139785 0.3000000
print("OLR Metrics:")
## [1] "OLR Metrics:"
print(metrics_olr)
## $accuracy
## [1] 0.3893894
## 
## $precision
##         1         2         3         4         5         6         7 
## 0.4000000 0.2727273 0.3281250 0.3836858 0.3854962 0.5279503 0.0000000 
## 
## $recall
##         1         2         3         4         5         6         7 
## 0.2000000 0.2307692 0.2545455 0.5226337 0.4190871 0.4569892 0.0000000
# Buat tabel perbandingan benar/tidak prediksi per model
lda_correct <- lda_pred_char == true_char
olr_correct <- olr_pred_char == true_char

# Contingency table untuk uji McNemar
comparison_table <- table(LDA_correct = lda_correct, OLR_correct = olr_correct)
print(comparison_table)
##            OLR_correct
## LDA_correct FALSE TRUE
##       FALSE   501   95
##       TRUE    109  294
# Uji McNemar untuk signifikansi perbedaan prediksi
mcnemar_result <- mcnemar.test(comparison_table)
print(mcnemar_result)
## 
##  McNemar's Chi-squared test with continuity correction
## 
## data:  comparison_table
## McNemar's chi-squared = 0.82843, df = 1, p-value = 0.3627
# Gabungkan metrik
metrics_df <- data.frame(
  Metric = rep(c("Accuracy", "Precision", "Recall"), each = length(metrics_lda$precision)),
  Model = rep(c("LDA", "OLR"), times = c(length(metrics_lda$precision) * 3, length(metrics_olr$precision) * 3)),
  Value = c(
    rep(metrics_lda$accuracy, length(metrics_lda$precision)),
    metrics_lda$precision,
    metrics_lda$recall,
    rep(metrics_olr$accuracy, length(metrics_olr$precision)),
    metrics_olr$precision,
    metrics_olr$recall
  ),
  Class = rep(names(metrics_lda$precision), times = 6)
)

# Plot barplot
ggplot(metrics_df, aes(x = Class, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  facet_wrap(~ Metric, scales = "free_y") +
  labs(title = "Perbandingan Akurasi, Precision, dan Recall per Kelas",
       y = "Value", x = "Kelas") +
  theme_minimal()

accuracy_df <- data.frame(
  Model = c("LDA", "OLR"),
  Accuracy = c(metrics_lda$accuracy, metrics_olr$accuracy)
)

ggplot(accuracy_df, aes(x = Model, y = Accuracy, fill = Model)) +
  geom_bar(stat = "identity", width = 0.5) +
  ylim(0, 1) +
  labs(title = "Perbandingan Akurasi Model", y = "Akurasi", x = "") +
  theme_minimal()