Klasifikasi Status Alzheimer Menggunakan Regresi Logistik

Nama : Shafira Nabila Noer Poerwanto

NIM : 23031554011

Dosen Pengampu : Ike Fitriyaningsih, M.Si

Mata Kuliah : Analisis Multivariat

Universitas Negeri Surabaya

1. Import library

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.4.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.4     ✔ stringr   1.5.1
## ✔ purrr     1.0.4     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.3
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

2. Pre processing

Load data

df <- read.csv("C:/Users/shafi/OneDrive/Documents/Semester 4/Analisis Multivariat/Project uas/alzheimers_disease_data.csv")

Cek struktur data

str(df)
## 'data.frame':    2149 obs. of  35 variables:
##  $ PatientID                : int  4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 ...
##  $ Age                      : int  73 89 73 74 89 86 68 75 72 87 ...
##  $ Gender                   : int  0 0 0 1 0 1 0 0 1 0 ...
##  $ Ethnicity                : int  0 0 3 0 0 1 3 0 1 0 ...
##  $ EducationLevel           : int  2 0 1 1 0 1 2 1 0 0 ...
##  $ BMI                      : num  22.9 26.8 17.8 33.8 20.7 ...
##  $ Smoking                  : int  0 0 0 1 0 0 1 0 0 1 ...
##  $ AlcoholConsumption       : num  13.3 4.54 19.56 12.21 18.45 ...
##  $ PhysicalActivity         : num  6.33 7.62 7.84 8.43 6.31 ...
##  $ DietQuality              : num  1.347 0.519 1.826 7.436 0.795 ...
##  $ SleepQuality             : num  9.03 7.15 9.67 8.39 5.6 ...
##  $ FamilyHistoryAlzheimers  : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ CardiovascularDisease    : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Diabetes                 : int  1 0 0 0 0 1 0 0 0 0 ...
##  $ Depression               : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ HeadInjury               : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ Hypertension             : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ SystolicBP               : int  142 115 99 118 94 168 143 117 117 130 ...
##  $ DiastolicBP              : int  72 64 116 115 117 62 88 63 119 78 ...
##  $ CholesterolTotal         : num  242 231 284 160 238 ...
##  $ CholesterolLDL           : num  56.2 193.4 153.3 65.4 92.9 ...
##  $ CholesterolHDL           : num  33.7 79 69.8 68.5 56.9 ...
##  $ CholesterolTriglycerides : num  162.2 294.6 83.6 277.6 291.2 ...
##  $ MMSE                     : num  21.46 20.61 7.36 13.99 13.52 ...
##  $ FunctionalAssessment     : num  6.52 7.12 5.9 8.97 6.05 ...
##  $ MemoryComplaints         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BehavioralProblems       : int  0 0 0 1 0 0 0 0 1 1 ...
##  $ ADL                      : num  1.7259 2.5924 7.1195 6.4812 0.0147 ...
##  $ Confusion                : int  0 0 0 0 0 1 0 1 0 0 ...
##  $ Disorientation           : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ PersonalityChanges       : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ DifficultyCompletingTasks: int  1 0 1 0 1 0 0 0 0 0 ...
##  $ Forgetfulness            : int  0 1 0 0 0 0 1 1 0 0 ...
##  $ Diagnosis                : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ DoctorInCharge           : chr  "XXXConfid" "XXXConfid" "XXXConfid" "XXXConfid" ...

Cek data duplikat

cat("Jumlah duplikasi:", sum(duplicated(df)), "\n")
## Jumlah duplikasi: 0
print(colSums(is.na(df)))
##                 PatientID                       Age                    Gender 
##                         0                         0                         0 
##                 Ethnicity            EducationLevel                       BMI 
##                         0                         0                         0 
##                   Smoking        AlcoholConsumption          PhysicalActivity 
##                         0                         0                         0 
##               DietQuality              SleepQuality   FamilyHistoryAlzheimers 
##                         0                         0                         0 
##     CardiovascularDisease                  Diabetes                Depression 
##                         0                         0                         0 
##                HeadInjury              Hypertension                SystolicBP 
##                         0                         0                         0 
##               DiastolicBP          CholesterolTotal            CholesterolLDL 
##                         0                         0                         0 
##            CholesterolHDL  CholesterolTriglycerides                      MMSE 
##                         0                         0                         0 
##      FunctionalAssessment          MemoryComplaints        BehavioralProblems 
##                         0                         0                         0 
##                       ADL                 Confusion            Disorientation 
##                         0                         0                         0 
##        PersonalityChanges DifficultyCompletingTasks             Forgetfulness 
##                         0                         0                         0 
##                 Diagnosis            DoctorInCharge 
##                         0                         0

Drop kolom yang tidak perlu

df <- df %>% dplyr::select(-PatientID, -DoctorInCharge)
str(df)
## 'data.frame':    2149 obs. of  33 variables:
##  $ Age                      : int  73 89 73 74 89 86 68 75 72 87 ...
##  $ Gender                   : int  0 0 0 1 0 1 0 0 1 0 ...
##  $ Ethnicity                : int  0 0 3 0 0 1 3 0 1 0 ...
##  $ EducationLevel           : int  2 0 1 1 0 1 2 1 0 0 ...
##  $ BMI                      : num  22.9 26.8 17.8 33.8 20.7 ...
##  $ Smoking                  : int  0 0 0 1 0 0 1 0 0 1 ...
##  $ AlcoholConsumption       : num  13.3 4.54 19.56 12.21 18.45 ...
##  $ PhysicalActivity         : num  6.33 7.62 7.84 8.43 6.31 ...
##  $ DietQuality              : num  1.347 0.519 1.826 7.436 0.795 ...
##  $ SleepQuality             : num  9.03 7.15 9.67 8.39 5.6 ...
##  $ FamilyHistoryAlzheimers  : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ CardiovascularDisease    : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Diabetes                 : int  1 0 0 0 0 1 0 0 0 0 ...
##  $ Depression               : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ HeadInjury               : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ Hypertension             : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ SystolicBP               : int  142 115 99 118 94 168 143 117 117 130 ...
##  $ DiastolicBP              : int  72 64 116 115 117 62 88 63 119 78 ...
##  $ CholesterolTotal         : num  242 231 284 160 238 ...
##  $ CholesterolLDL           : num  56.2 193.4 153.3 65.4 92.9 ...
##  $ CholesterolHDL           : num  33.7 79 69.8 68.5 56.9 ...
##  $ CholesterolTriglycerides : num  162.2 294.6 83.6 277.6 291.2 ...
##  $ MMSE                     : num  21.46 20.61 7.36 13.99 13.52 ...
##  $ FunctionalAssessment     : num  6.52 7.12 5.9 8.97 6.05 ...
##  $ MemoryComplaints         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BehavioralProblems       : int  0 0 0 1 0 0 0 0 1 1 ...
##  $ ADL                      : num  1.7259 2.5924 7.1195 6.4812 0.0147 ...
##  $ Confusion                : int  0 0 0 0 0 1 0 1 0 0 ...
##  $ Disorientation           : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ PersonalityChanges       : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ DifficultyCompletingTasks: int  1 0 1 0 1 0 0 0 0 0 ...
##  $ Forgetfulness            : int  0 1 0 0 0 0 1 1 0 0 ...
##  $ Diagnosis                : int  0 0 0 0 0 0 0 1 0 0 ...

Konversi diagnosis jadi faktor

df$Diagnosis <- as.factor(df$Diagnosis)
str(df)
## 'data.frame':    2149 obs. of  33 variables:
##  $ Age                      : int  73 89 73 74 89 86 68 75 72 87 ...
##  $ Gender                   : int  0 0 0 1 0 1 0 0 1 0 ...
##  $ Ethnicity                : int  0 0 3 0 0 1 3 0 1 0 ...
##  $ EducationLevel           : int  2 0 1 1 0 1 2 1 0 0 ...
##  $ BMI                      : num  22.9 26.8 17.8 33.8 20.7 ...
##  $ Smoking                  : int  0 0 0 1 0 0 1 0 0 1 ...
##  $ AlcoholConsumption       : num  13.3 4.54 19.56 12.21 18.45 ...
##  $ PhysicalActivity         : num  6.33 7.62 7.84 8.43 6.31 ...
##  $ DietQuality              : num  1.347 0.519 1.826 7.436 0.795 ...
##  $ SleepQuality             : num  9.03 7.15 9.67 8.39 5.6 ...
##  $ FamilyHistoryAlzheimers  : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ CardiovascularDisease    : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Diabetes                 : int  1 0 0 0 0 1 0 0 0 0 ...
##  $ Depression               : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ HeadInjury               : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ Hypertension             : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ SystolicBP               : int  142 115 99 118 94 168 143 117 117 130 ...
##  $ DiastolicBP              : int  72 64 116 115 117 62 88 63 119 78 ...
##  $ CholesterolTotal         : num  242 231 284 160 238 ...
##  $ CholesterolLDL           : num  56.2 193.4 153.3 65.4 92.9 ...
##  $ CholesterolHDL           : num  33.7 79 69.8 68.5 56.9 ...
##  $ CholesterolTriglycerides : num  162.2 294.6 83.6 277.6 291.2 ...
##  $ MMSE                     : num  21.46 20.61 7.36 13.99 13.52 ...
##  $ FunctionalAssessment     : num  6.52 7.12 5.9 8.97 6.05 ...
##  $ MemoryComplaints         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BehavioralProblems       : int  0 0 0 1 0 0 0 0 1 1 ...
##  $ ADL                      : num  1.7259 2.5924 7.1195 6.4812 0.0147 ...
##  $ Confusion                : int  0 0 0 0 0 1 0 1 0 0 ...
##  $ Disorientation           : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ PersonalityChanges       : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ DifficultyCompletingTasks: int  1 0 1 0 1 0 0 0 0 0 ...
##  $ Forgetfulness            : int  0 1 0 0 0 0 1 1 0 0 ...
##  $ Diagnosis                : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...

Menampilkan 10 data

head(df, 10)
##    Age Gender Ethnicity EducationLevel      BMI Smoking AlcoholConsumption
## 1   73      0         0              2 22.92775       0         13.2972177
## 2   89      0         0              0 26.82768       0          4.5425238
## 3   73      0         3              1 17.79588       0         19.5550845
## 4   74      1         0              1 33.80082       1         12.2092655
## 5   89      0         0              0 20.71697       0         18.4543561
## 6   86      1         1              1 30.62689       0          4.1401438
## 7   68      0         3              2 38.38762       1          0.6460473
## 8   75      0         0              1 18.77601       0         13.7238257
## 9   72      1         1              0 27.83319       0         12.1678476
## 10  87      0         0              0 35.45630       1         16.0286882
##    PhysicalActivity DietQuality SleepQuality FamilyHistoryAlzheimers
## 1         6.3271125   1.3472143     9.025679                       0
## 2         7.6198845   0.5187671     7.151293                       0
## 3         7.8449878   1.8263347     9.673574                       1
## 4         8.4280014   7.4356041     8.392554                       0
## 5         6.3104607   0.7954975     5.597238                       0
## 6         0.2110616   1.5849220     7.261953                       0
## 7         9.2576949   5.8973879     5.477686                       0
## 8         4.6494507   8.3419032     4.213210                       0
## 9         1.5313598   6.7368820     5.748224                       0
## 10        6.4407727   8.0860191     7.551773                       0
##    CardiovascularDisease Diabetes Depression HeadInjury Hypertension SystolicBP
## 1                      0        1          1          0            0        142
## 2                      0        0          0          0            0        115
## 3                      0        0          0          0            0         99
## 4                      0        0          0          0            0        118
## 5                      0        0          0          0            0         94
## 6                      0        1          0          0            0        168
## 7                      0        0          0          1            0        143
## 8                      0        0          0          0            0        117
## 9                      0        0          0          0            1        117
## 10                     1        0          0          0            0        130
##    DiastolicBP CholesterolTotal CholesterolLDL CholesterolHDL
## 1           72         242.3668       56.15090       33.68256
## 2           64         231.1626      193.40800       79.02848
## 3          116         284.1819      153.32276       69.77229
## 4          115         159.5822       65.36664       68.45749
## 5          117         237.6022       92.86970       56.87430
## 6           62         280.7125      198.33463       79.08050
## 7           88         263.7341       52.47067       66.53337
## 8           63         151.3831       69.62351       77.34682
## 9          119         233.6058      144.04574       43.07589
## 10          78         281.6301      130.49758       74.29125
##    CholesterolTriglycerides      MMSE FunctionalAssessment MemoryComplaints
## 1                 162.18914 21.463532             6.518877                0
## 2                 294.63091 20.613267             7.118696                0
## 3                  83.63832  7.356249             5.895077                0
## 4                 277.57736 13.991127             8.965106                0
## 5                 291.19878 13.517609             6.045039                0
## 6                 263.94365 27.517529             5.510144                0
## 7                 216.48917  1.964413             6.062124                0
## 8                 210.57087 10.139568             3.401374                0
## 9                 151.16419 25.820732             7.396061                0
## 10                144.17597 28.388409             1.148904                0
##    BehavioralProblems        ADL Confusion Disorientation PersonalityChanges
## 1                   0 1.72588346         0              0                  0
## 2                   0 2.59242413         0              0                  0
## 3                   0 7.11954774         0              1                  0
## 4                   1 6.48122586         0              0                  0
## 5                   0 0.01469122         0              0                  1
## 6                   0 9.01568628         1              0                  0
## 7                   0 9.23632828         0              0                  0
## 8                   0 4.51724827         1              0                  0
## 9                   1 0.75623181         0              0                  1
## 10                  1 4.55439387         0              0                  0
##    DifficultyCompletingTasks Forgetfulness Diagnosis
## 1                          1             0         0
## 2                          0             1         0
## 3                          1             0         0
## 4                          0             0         0
## 5                          1             0         0
## 6                          0             0         0
## 7                          0             1         0
## 8                          0             1         1
## 9                          0             0         0
## 10                         0             0         0

Mengubah nilai diagnosis jadi faktor

df$Diagnosis <- factor(df$Diagnosis, levels = c(0,1), labels = c("Tidak Alzheimer", "Alzheimer"))

Visualisasi diagnosis alzheimer

num_cols <- sapply(df, is.numeric)
num_cols["Diagnosis"] <- FALSE
num_cols["Gender"] <- FALSE
df[num_cols] <- scale(df[num_cols])

ggplot(df, aes(x = Diagnosis)) +
  geom_bar(fill = "lightblue") +
  ggtitle("Distribusi Diagnosis Alzheimer") +
  xlab("Status Diagnosis") + ylab("Jumlah Pasien")

Menampilkan jumlah tiap kategori pada label Diagnosis

cat("Jumlah tiap kategori:\n")
## Jumlah tiap kategori:
print(table(df$Diagnosis))
## 
## Tidak Alzheimer       Alzheimer 
##            1389             760

Menampilkan persentase label diagnosis

label_percentages <- prop.table(table(df$Diagnosis)) * 100
cat("\nPersentase Tiap Label Diagnosis:\n")
## 
## Persentase Tiap Label Diagnosis:
print(round(label_percentages, 2))
## 
## Tidak Alzheimer       Alzheimer 
##           64.63           35.37

Memilih variabel numerik

valid_vars <- names(df)[sapply(df, function(x) is.numeric(x) && length(unique(x)) > 1)]

Menghitung p-value ANOVA

anova_pvals <- sapply(valid_vars, function(var) {
  formula <- as.formula(paste0("`", var, "` ~ Diagnosis"))
  result <- tryCatch({
    aov_model <- aov(formula, data = df)
    summary(aov_model)[[1]][["Pr(>F)"]][1]
  }, error = function(e) NA)
  return(result)
})

Seleksi fitur

selected_features <- names(anova_pvals[anova_pvals < 0.05])
cat("Fitur terpilih (p < 0.05):\n")
## Fitur terpilih (p < 0.05):
print(selected_features)
## [1] "EducationLevel"       "SleepQuality"         "CholesterolHDL"      
## [4] "MMSE"                 "FunctionalAssessment" "MemoryComplaints"    
## [7] "BehavioralProblems"   "ADL"

Membuat dataset berisi diagnosis dan fitur terpilih

final_data <- df[, c("Diagnosis", selected_features)]
head(final_data)
##         Diagnosis EducationLevel SleepQuality CholesterolHDL        MMSE
## 1 Tidak Alzheimer      0.7886499   1.11965745     -1.1141698  0.77885552
## 2 Tidak Alzheimer     -1.4224508   0.05682309      0.8455334  0.68013845
## 3 Tidak Alzheimer     -0.3169004   1.48703408      0.4455111 -0.85902164
## 4 Tidak Alzheimer     -0.3169004   0.76065615      0.3886897 -0.08870211
## 5 Tidak Alzheimer     -1.4224508  -0.82437383     -0.1118981 -0.14367832
## 6 Tidak Alzheimer     -0.3169004   0.11957058      0.8477818  1.48173376
##   FunctionalAssessment MemoryComplaints BehavioralProblems        ADL
## 1            0.4973901       -0.5123573         -0.4311563 -1.1041775
## 2            0.7047429       -0.5123573         -0.4311563 -0.8104125
## 3            0.2817472       -0.5123573         -0.4311563  0.7243229
## 4            1.3430335       -0.5123573          2.3182650  0.5079260
## 5            0.3335878       -0.5123573         -0.4311563 -1.6842869
## 6            0.1486786       -0.5123573         -0.4311563  1.3671308

3. Split data

Training dan testing (70:30)

set.seed(123)
train_idx <- createDataPartition(final_data$Diagnosis, p = 0.7, list = FALSE)
train_data <- final_data[train_idx, ]
test_data  <- final_data[-train_idx, ]

Simpan training dan testing ke file csv

write.csv(train_data, "C:/Users/shafi/OneDrive/Documents/Semester 4/Analisis Multivariat/Project uas/train_data.csv", row.names = FALSE)
write.csv(test_data, "C:/Users/shafi/OneDrive/Documents/Semester 4/Analisis Multivariat/Project uas/test_data.csv", row.names = FALSE)

Menampilkan distribusi diagnosis sebelum oversampling

cat("Distribusi sebelum oversampling:\n")
## Distribusi sebelum oversampling:
print(table(train_data$Diagnosis))
## 
## Tidak Alzheimer       Alzheimer 
##             973             532

Oversampling menggunakan ROSE

Menampilkan distribusi setelah oversampling

cat("Distribusi setelah oversampling:\n")
## Distribusi setelah oversampling:
print(table(data_balanced$Diagnosis))
## 
## Tidak Alzheimer       Alzheimer 
##             973             973

Memastikan label diagnosis jadi faktor

data_balanced$Diagnosis <- factor(data_balanced$Diagnosis, levels = c("Tidak Alzheimer", "Alzheimer"))

Menyimpan data oversampled

write.csv(data_balanced, "C:/Users/shafi/OneDrive/Documents/Semester 4/Analisis Multivariat/Project uas/train_data_oversampled.csv", row.names = FALSE)

4. Modeling Analisis Diskriminan

Import library

library(caret)     
library(MASS)       
library(dplyr)      
library(MVN)        
## Warning: package 'MVN' was built under R version 4.4.3
library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## ---
## biotools version 4.3

Load data

data <- read.csv("train_data_oversampled.csv")
data$Diagnosis <- factor(data$Diagnosis,
                         levels = c("Tidak Alzheimer", "Alzheimer"))

5. Uji asumsi sebelum LDA

Multivariate normality

mvn_res <- mvn(data %>% dplyr::select(-Diagnosis),
               mvnTest = "royston", 
               univariatePlot = FALSE, 
               multivariatePlot = FALSE)
print(mvn_res$multivariateNormality)
##      Test        H      p value MVN
## 1 Royston 1216.625 2.45327e-257  NO

Homogenitas matriks kovarians Box M

boxm_res <- boxM(data %>% dplyr::select(-Diagnosis),
                 data$Diagnosis)
print(boxm_res)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  data %>% dplyr::select(-Diagnosis)
## Chi-Sq (approx.) = 467.98, df = 36, p-value < 2.2e-16

Bartlett’s test of sphericity

bartlett_test <- function(df) {
  x <- df[complete.cases(df), ]
  n <- nrow(x); p <- ncol(x)
  chisq <- (n - 1 - (2*p+5)/6) * log(det(cor(x)))
  df_chi <- p*(p-1)/2
  pval <- pchisq(chisq, df_chi, lower.tail = FALSE)
  data.frame(statistic = chisq, df = df_chi, p.value = pval)
}
bart_res <- bartlett_test(data %>% dplyr::select(-Diagnosis))
print(bart_res)
##   statistic df p.value
## 1 -83.59947 28       1

6. Membangun model

lda_model <- lda(Diagnosis ~ ., data = data)

7. Prediksi dan evaluasi

pred <- predict(lda_model, data)$class
conf <- confusionMatrix(pred, data$Diagnosis)
print(conf)
## Confusion Matrix and Statistics
## 
##                  Reference
## Prediction        Tidak Alzheimer Alzheimer
##   Tidak Alzheimer             811       134
##   Alzheimer                   162       839
##                                           
##                Accuracy : 0.8479          
##                  95% CI : (0.8312, 0.8636)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.6958          
##                                           
##  Mcnemar's Test P-Value : 0.1166          
##                                           
##             Sensitivity : 0.8335          
##             Specificity : 0.8623          
##          Pos Pred Value : 0.8582          
##          Neg Pred Value : 0.8382          
##              Prevalence : 0.5000          
##          Detection Rate : 0.4168          
##    Detection Prevalence : 0.4856          
##       Balanced Accuracy : 0.8479          
##                                           
##        'Positive' Class : Tidak Alzheimer 
## 

Confusion matrix

cm_df <- as.data.frame(conf$table)
names(cm_df) <- c("Actual", "Predicted", "Freq")

library(ggplot2)
ggplot(cm_df, aes(x = Actual, y = Predicted, fill = Freq)) +
  geom_tile(color = "grey70") +
  geom_text(aes(label = Freq), size = 5) +
  scale_fill_gradient(low = "white", high = "steelblue") +
  labs(
    title = "Confusion Matrix",
    x = "Data Asli",
    y = "Prediksi",
    fill = "Jumlah"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid = element_blank()
  )

Analisis Diskriminan Linear (LDA) adalah metode klasifikasi yang memisahkan kelompok data berdasarkan kombinasi linear fitur. Pada hasil ini, model LDA mencapai akurasi sebesar 84,79%, yang menunjukkan bahwa model cukup baik dalam membedakan antara penderita Alzheimer dan non-Alzheimer.