Nama : Shafira Nabila Noer Poerwanto
NIM : 23031554011
Dosen Pengampu : Ike Fitriyaningsih, M.Si
Mata Kuliah : Analisis Multivariat
Universitas Negeri Surabaya
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.4.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.4 ✔ stringr 1.5.1
## ✔ purrr 1.0.4 ✔ tibble 3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.3
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
Load data
df <- read.csv("C:/Users/shafi/OneDrive/Documents/Semester 4/Analisis Multivariat/Project uas/alzheimers_disease_data.csv")
Cek struktur data
str(df)
## 'data.frame': 2149 obs. of 35 variables:
## $ PatientID : int 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 ...
## $ Age : int 73 89 73 74 89 86 68 75 72 87 ...
## $ Gender : int 0 0 0 1 0 1 0 0 1 0 ...
## $ Ethnicity : int 0 0 3 0 0 1 3 0 1 0 ...
## $ EducationLevel : int 2 0 1 1 0 1 2 1 0 0 ...
## $ BMI : num 22.9 26.8 17.8 33.8 20.7 ...
## $ Smoking : int 0 0 0 1 0 0 1 0 0 1 ...
## $ AlcoholConsumption : num 13.3 4.54 19.56 12.21 18.45 ...
## $ PhysicalActivity : num 6.33 7.62 7.84 8.43 6.31 ...
## $ DietQuality : num 1.347 0.519 1.826 7.436 0.795 ...
## $ SleepQuality : num 9.03 7.15 9.67 8.39 5.6 ...
## $ FamilyHistoryAlzheimers : int 0 0 1 0 0 0 0 0 0 0 ...
## $ CardiovascularDisease : int 0 0 0 0 0 0 0 0 0 1 ...
## $ Diabetes : int 1 0 0 0 0 1 0 0 0 0 ...
## $ Depression : int 1 0 0 0 0 0 0 0 0 0 ...
## $ HeadInjury : int 0 0 0 0 0 0 1 0 0 0 ...
## $ Hypertension : int 0 0 0 0 0 0 0 0 1 0 ...
## $ SystolicBP : int 142 115 99 118 94 168 143 117 117 130 ...
## $ DiastolicBP : int 72 64 116 115 117 62 88 63 119 78 ...
## $ CholesterolTotal : num 242 231 284 160 238 ...
## $ CholesterolLDL : num 56.2 193.4 153.3 65.4 92.9 ...
## $ CholesterolHDL : num 33.7 79 69.8 68.5 56.9 ...
## $ CholesterolTriglycerides : num 162.2 294.6 83.6 277.6 291.2 ...
## $ MMSE : num 21.46 20.61 7.36 13.99 13.52 ...
## $ FunctionalAssessment : num 6.52 7.12 5.9 8.97 6.05 ...
## $ MemoryComplaints : int 0 0 0 0 0 0 0 0 0 0 ...
## $ BehavioralProblems : int 0 0 0 1 0 0 0 0 1 1 ...
## $ ADL : num 1.7259 2.5924 7.1195 6.4812 0.0147 ...
## $ Confusion : int 0 0 0 0 0 1 0 1 0 0 ...
## $ Disorientation : int 0 0 1 0 0 0 0 0 0 0 ...
## $ PersonalityChanges : int 0 0 0 0 1 0 0 0 1 0 ...
## $ DifficultyCompletingTasks: int 1 0 1 0 1 0 0 0 0 0 ...
## $ Forgetfulness : int 0 1 0 0 0 0 1 1 0 0 ...
## $ Diagnosis : int 0 0 0 0 0 0 0 1 0 0 ...
## $ DoctorInCharge : chr "XXXConfid" "XXXConfid" "XXXConfid" "XXXConfid" ...
Cek data duplikat
cat("Jumlah duplikasi:", sum(duplicated(df)), "\n")
## Jumlah duplikasi: 0
print(colSums(is.na(df)))
## PatientID Age Gender
## 0 0 0
## Ethnicity EducationLevel BMI
## 0 0 0
## Smoking AlcoholConsumption PhysicalActivity
## 0 0 0
## DietQuality SleepQuality FamilyHistoryAlzheimers
## 0 0 0
## CardiovascularDisease Diabetes Depression
## 0 0 0
## HeadInjury Hypertension SystolicBP
## 0 0 0
## DiastolicBP CholesterolTotal CholesterolLDL
## 0 0 0
## CholesterolHDL CholesterolTriglycerides MMSE
## 0 0 0
## FunctionalAssessment MemoryComplaints BehavioralProblems
## 0 0 0
## ADL Confusion Disorientation
## 0 0 0
## PersonalityChanges DifficultyCompletingTasks Forgetfulness
## 0 0 0
## Diagnosis DoctorInCharge
## 0 0
Drop kolom yang tidak perlu
df <- df %>% dplyr::select(-PatientID, -DoctorInCharge)
str(df)
## 'data.frame': 2149 obs. of 33 variables:
## $ Age : int 73 89 73 74 89 86 68 75 72 87 ...
## $ Gender : int 0 0 0 1 0 1 0 0 1 0 ...
## $ Ethnicity : int 0 0 3 0 0 1 3 0 1 0 ...
## $ EducationLevel : int 2 0 1 1 0 1 2 1 0 0 ...
## $ BMI : num 22.9 26.8 17.8 33.8 20.7 ...
## $ Smoking : int 0 0 0 1 0 0 1 0 0 1 ...
## $ AlcoholConsumption : num 13.3 4.54 19.56 12.21 18.45 ...
## $ PhysicalActivity : num 6.33 7.62 7.84 8.43 6.31 ...
## $ DietQuality : num 1.347 0.519 1.826 7.436 0.795 ...
## $ SleepQuality : num 9.03 7.15 9.67 8.39 5.6 ...
## $ FamilyHistoryAlzheimers : int 0 0 1 0 0 0 0 0 0 0 ...
## $ CardiovascularDisease : int 0 0 0 0 0 0 0 0 0 1 ...
## $ Diabetes : int 1 0 0 0 0 1 0 0 0 0 ...
## $ Depression : int 1 0 0 0 0 0 0 0 0 0 ...
## $ HeadInjury : int 0 0 0 0 0 0 1 0 0 0 ...
## $ Hypertension : int 0 0 0 0 0 0 0 0 1 0 ...
## $ SystolicBP : int 142 115 99 118 94 168 143 117 117 130 ...
## $ DiastolicBP : int 72 64 116 115 117 62 88 63 119 78 ...
## $ CholesterolTotal : num 242 231 284 160 238 ...
## $ CholesterolLDL : num 56.2 193.4 153.3 65.4 92.9 ...
## $ CholesterolHDL : num 33.7 79 69.8 68.5 56.9 ...
## $ CholesterolTriglycerides : num 162.2 294.6 83.6 277.6 291.2 ...
## $ MMSE : num 21.46 20.61 7.36 13.99 13.52 ...
## $ FunctionalAssessment : num 6.52 7.12 5.9 8.97 6.05 ...
## $ MemoryComplaints : int 0 0 0 0 0 0 0 0 0 0 ...
## $ BehavioralProblems : int 0 0 0 1 0 0 0 0 1 1 ...
## $ ADL : num 1.7259 2.5924 7.1195 6.4812 0.0147 ...
## $ Confusion : int 0 0 0 0 0 1 0 1 0 0 ...
## $ Disorientation : int 0 0 1 0 0 0 0 0 0 0 ...
## $ PersonalityChanges : int 0 0 0 0 1 0 0 0 1 0 ...
## $ DifficultyCompletingTasks: int 1 0 1 0 1 0 0 0 0 0 ...
## $ Forgetfulness : int 0 1 0 0 0 0 1 1 0 0 ...
## $ Diagnosis : int 0 0 0 0 0 0 0 1 0 0 ...
Konversi diagnosis jadi faktor
df$Diagnosis <- as.factor(df$Diagnosis)
str(df)
## 'data.frame': 2149 obs. of 33 variables:
## $ Age : int 73 89 73 74 89 86 68 75 72 87 ...
## $ Gender : int 0 0 0 1 0 1 0 0 1 0 ...
## $ Ethnicity : int 0 0 3 0 0 1 3 0 1 0 ...
## $ EducationLevel : int 2 0 1 1 0 1 2 1 0 0 ...
## $ BMI : num 22.9 26.8 17.8 33.8 20.7 ...
## $ Smoking : int 0 0 0 1 0 0 1 0 0 1 ...
## $ AlcoholConsumption : num 13.3 4.54 19.56 12.21 18.45 ...
## $ PhysicalActivity : num 6.33 7.62 7.84 8.43 6.31 ...
## $ DietQuality : num 1.347 0.519 1.826 7.436 0.795 ...
## $ SleepQuality : num 9.03 7.15 9.67 8.39 5.6 ...
## $ FamilyHistoryAlzheimers : int 0 0 1 0 0 0 0 0 0 0 ...
## $ CardiovascularDisease : int 0 0 0 0 0 0 0 0 0 1 ...
## $ Diabetes : int 1 0 0 0 0 1 0 0 0 0 ...
## $ Depression : int 1 0 0 0 0 0 0 0 0 0 ...
## $ HeadInjury : int 0 0 0 0 0 0 1 0 0 0 ...
## $ Hypertension : int 0 0 0 0 0 0 0 0 1 0 ...
## $ SystolicBP : int 142 115 99 118 94 168 143 117 117 130 ...
## $ DiastolicBP : int 72 64 116 115 117 62 88 63 119 78 ...
## $ CholesterolTotal : num 242 231 284 160 238 ...
## $ CholesterolLDL : num 56.2 193.4 153.3 65.4 92.9 ...
## $ CholesterolHDL : num 33.7 79 69.8 68.5 56.9 ...
## $ CholesterolTriglycerides : num 162.2 294.6 83.6 277.6 291.2 ...
## $ MMSE : num 21.46 20.61 7.36 13.99 13.52 ...
## $ FunctionalAssessment : num 6.52 7.12 5.9 8.97 6.05 ...
## $ MemoryComplaints : int 0 0 0 0 0 0 0 0 0 0 ...
## $ BehavioralProblems : int 0 0 0 1 0 0 0 0 1 1 ...
## $ ADL : num 1.7259 2.5924 7.1195 6.4812 0.0147 ...
## $ Confusion : int 0 0 0 0 0 1 0 1 0 0 ...
## $ Disorientation : int 0 0 1 0 0 0 0 0 0 0 ...
## $ PersonalityChanges : int 0 0 0 0 1 0 0 0 1 0 ...
## $ DifficultyCompletingTasks: int 1 0 1 0 1 0 0 0 0 0 ...
## $ Forgetfulness : int 0 1 0 0 0 0 1 1 0 0 ...
## $ Diagnosis : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
Menampilkan 10 data
head(df, 10)
## Age Gender Ethnicity EducationLevel BMI Smoking AlcoholConsumption
## 1 73 0 0 2 22.92775 0 13.2972177
## 2 89 0 0 0 26.82768 0 4.5425238
## 3 73 0 3 1 17.79588 0 19.5550845
## 4 74 1 0 1 33.80082 1 12.2092655
## 5 89 0 0 0 20.71697 0 18.4543561
## 6 86 1 1 1 30.62689 0 4.1401438
## 7 68 0 3 2 38.38762 1 0.6460473
## 8 75 0 0 1 18.77601 0 13.7238257
## 9 72 1 1 0 27.83319 0 12.1678476
## 10 87 0 0 0 35.45630 1 16.0286882
## PhysicalActivity DietQuality SleepQuality FamilyHistoryAlzheimers
## 1 6.3271125 1.3472143 9.025679 0
## 2 7.6198845 0.5187671 7.151293 0
## 3 7.8449878 1.8263347 9.673574 1
## 4 8.4280014 7.4356041 8.392554 0
## 5 6.3104607 0.7954975 5.597238 0
## 6 0.2110616 1.5849220 7.261953 0
## 7 9.2576949 5.8973879 5.477686 0
## 8 4.6494507 8.3419032 4.213210 0
## 9 1.5313598 6.7368820 5.748224 0
## 10 6.4407727 8.0860191 7.551773 0
## CardiovascularDisease Diabetes Depression HeadInjury Hypertension SystolicBP
## 1 0 1 1 0 0 142
## 2 0 0 0 0 0 115
## 3 0 0 0 0 0 99
## 4 0 0 0 0 0 118
## 5 0 0 0 0 0 94
## 6 0 1 0 0 0 168
## 7 0 0 0 1 0 143
## 8 0 0 0 0 0 117
## 9 0 0 0 0 1 117
## 10 1 0 0 0 0 130
## DiastolicBP CholesterolTotal CholesterolLDL CholesterolHDL
## 1 72 242.3668 56.15090 33.68256
## 2 64 231.1626 193.40800 79.02848
## 3 116 284.1819 153.32276 69.77229
## 4 115 159.5822 65.36664 68.45749
## 5 117 237.6022 92.86970 56.87430
## 6 62 280.7125 198.33463 79.08050
## 7 88 263.7341 52.47067 66.53337
## 8 63 151.3831 69.62351 77.34682
## 9 119 233.6058 144.04574 43.07589
## 10 78 281.6301 130.49758 74.29125
## CholesterolTriglycerides MMSE FunctionalAssessment MemoryComplaints
## 1 162.18914 21.463532 6.518877 0
## 2 294.63091 20.613267 7.118696 0
## 3 83.63832 7.356249 5.895077 0
## 4 277.57736 13.991127 8.965106 0
## 5 291.19878 13.517609 6.045039 0
## 6 263.94365 27.517529 5.510144 0
## 7 216.48917 1.964413 6.062124 0
## 8 210.57087 10.139568 3.401374 0
## 9 151.16419 25.820732 7.396061 0
## 10 144.17597 28.388409 1.148904 0
## BehavioralProblems ADL Confusion Disorientation PersonalityChanges
## 1 0 1.72588346 0 0 0
## 2 0 2.59242413 0 0 0
## 3 0 7.11954774 0 1 0
## 4 1 6.48122586 0 0 0
## 5 0 0.01469122 0 0 1
## 6 0 9.01568628 1 0 0
## 7 0 9.23632828 0 0 0
## 8 0 4.51724827 1 0 0
## 9 1 0.75623181 0 0 1
## 10 1 4.55439387 0 0 0
## DifficultyCompletingTasks Forgetfulness Diagnosis
## 1 1 0 0
## 2 0 1 0
## 3 1 0 0
## 4 0 0 0
## 5 1 0 0
## 6 0 0 0
## 7 0 1 0
## 8 0 1 1
## 9 0 0 0
## 10 0 0 0
Mengubah nilai diagnosis jadi faktor
df$Diagnosis <- factor(df$Diagnosis, levels = c(0,1), labels = c("Tidak Alzheimer", "Alzheimer"))
Visualisasi diagnosis alzheimer
num_cols <- sapply(df, is.numeric)
num_cols["Diagnosis"] <- FALSE
num_cols["Gender"] <- FALSE
df[num_cols] <- scale(df[num_cols])
ggplot(df, aes(x = Diagnosis)) +
geom_bar(fill = "lightblue") +
ggtitle("Distribusi Diagnosis Alzheimer") +
xlab("Status Diagnosis") + ylab("Jumlah Pasien")
Menampilkan jumlah tiap kategori pada label Diagnosis
cat("Jumlah tiap kategori:\n")
## Jumlah tiap kategori:
print(table(df$Diagnosis))
##
## Tidak Alzheimer Alzheimer
## 1389 760
Menampilkan persentase label diagnosis
label_percentages <- prop.table(table(df$Diagnosis)) * 100
cat("\nPersentase Tiap Label Diagnosis:\n")
##
## Persentase Tiap Label Diagnosis:
print(round(label_percentages, 2))
##
## Tidak Alzheimer Alzheimer
## 64.63 35.37
Memilih variabel numerik
valid_vars <- names(df)[sapply(df, function(x) is.numeric(x) && length(unique(x)) > 1)]
Menghitung p-value ANOVA
anova_pvals <- sapply(valid_vars, function(var) {
formula <- as.formula(paste0("`", var, "` ~ Diagnosis"))
result <- tryCatch({
aov_model <- aov(formula, data = df)
summary(aov_model)[[1]][["Pr(>F)"]][1]
}, error = function(e) NA)
return(result)
})
Seleksi fitur
selected_features <- names(anova_pvals[anova_pvals < 0.05])
cat("Fitur terpilih (p < 0.05):\n")
## Fitur terpilih (p < 0.05):
print(selected_features)
## [1] "EducationLevel" "SleepQuality" "CholesterolHDL"
## [4] "MMSE" "FunctionalAssessment" "MemoryComplaints"
## [7] "BehavioralProblems" "ADL"
Membuat dataset berisi diagnosis dan fitur terpilih
final_data <- df[, c("Diagnosis", selected_features)]
head(final_data)
## Diagnosis EducationLevel SleepQuality CholesterolHDL MMSE
## 1 Tidak Alzheimer 0.7886499 1.11965745 -1.1141698 0.77885552
## 2 Tidak Alzheimer -1.4224508 0.05682309 0.8455334 0.68013845
## 3 Tidak Alzheimer -0.3169004 1.48703408 0.4455111 -0.85902164
## 4 Tidak Alzheimer -0.3169004 0.76065615 0.3886897 -0.08870211
## 5 Tidak Alzheimer -1.4224508 -0.82437383 -0.1118981 -0.14367832
## 6 Tidak Alzheimer -0.3169004 0.11957058 0.8477818 1.48173376
## FunctionalAssessment MemoryComplaints BehavioralProblems ADL
## 1 0.4973901 -0.5123573 -0.4311563 -1.1041775
## 2 0.7047429 -0.5123573 -0.4311563 -0.8104125
## 3 0.2817472 -0.5123573 -0.4311563 0.7243229
## 4 1.3430335 -0.5123573 2.3182650 0.5079260
## 5 0.3335878 -0.5123573 -0.4311563 -1.6842869
## 6 0.1486786 -0.5123573 -0.4311563 1.3671308
Training dan testing (70:30)
set.seed(123)
train_idx <- createDataPartition(final_data$Diagnosis, p = 0.7, list = FALSE)
train_data <- final_data[train_idx, ]
test_data <- final_data[-train_idx, ]
Simpan training dan testing ke file csv
write.csv(train_data, "C:/Users/shafi/OneDrive/Documents/Semester 4/Analisis Multivariat/Project uas/train_data.csv", row.names = FALSE)
write.csv(test_data, "C:/Users/shafi/OneDrive/Documents/Semester 4/Analisis Multivariat/Project uas/test_data.csv", row.names = FALSE)
Menampilkan distribusi diagnosis sebelum oversampling
cat("Distribusi sebelum oversampling:\n")
## Distribusi sebelum oversampling:
print(table(train_data$Diagnosis))
##
## Tidak Alzheimer Alzheimer
## 973 532
Oversampling menggunakan ROSE
Menampilkan distribusi setelah oversampling
cat("Distribusi setelah oversampling:\n")
## Distribusi setelah oversampling:
print(table(data_balanced$Diagnosis))
##
## Tidak Alzheimer Alzheimer
## 973 973
Memastikan label diagnosis jadi faktor
data_balanced$Diagnosis <- factor(data_balanced$Diagnosis, levels = c("Tidak Alzheimer", "Alzheimer"))
Menyimpan data oversampled
write.csv(data_balanced, "C:/Users/shafi/OneDrive/Documents/Semester 4/Analisis Multivariat/Project uas/train_data_oversampled.csv", row.names = FALSE)
Import library
library(caret)
library(MASS)
library(dplyr)
library(MVN)
## Warning: package 'MVN' was built under R version 4.4.3
library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## ---
## biotools version 4.3
Load data
data <- read.csv("train_data_oversampled.csv")
data$Diagnosis <- factor(data$Diagnosis,
levels = c("Tidak Alzheimer", "Alzheimer"))
Multivariate normality
mvn_res <- mvn(data %>% dplyr::select(-Diagnosis),
mvnTest = "royston",
univariatePlot = FALSE,
multivariatePlot = FALSE)
print(mvn_res$multivariateNormality)
## Test H p value MVN
## 1 Royston 1216.625 2.45327e-257 NO
Homogenitas matriks kovarians Box M
boxm_res <- boxM(data %>% dplyr::select(-Diagnosis),
data$Diagnosis)
print(boxm_res)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: data %>% dplyr::select(-Diagnosis)
## Chi-Sq (approx.) = 467.98, df = 36, p-value < 2.2e-16
Bartlett’s test of sphericity
bartlett_test <- function(df) {
x <- df[complete.cases(df), ]
n <- nrow(x); p <- ncol(x)
chisq <- (n - 1 - (2*p+5)/6) * log(det(cor(x)))
df_chi <- p*(p-1)/2
pval <- pchisq(chisq, df_chi, lower.tail = FALSE)
data.frame(statistic = chisq, df = df_chi, p.value = pval)
}
bart_res <- bartlett_test(data %>% dplyr::select(-Diagnosis))
print(bart_res)
## statistic df p.value
## 1 -83.59947 28 1
lda_model <- lda(Diagnosis ~ ., data = data)
pred <- predict(lda_model, data)$class
conf <- confusionMatrix(pred, data$Diagnosis)
print(conf)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Tidak Alzheimer Alzheimer
## Tidak Alzheimer 811 134
## Alzheimer 162 839
##
## Accuracy : 0.8479
## 95% CI : (0.8312, 0.8636)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6958
##
## Mcnemar's Test P-Value : 0.1166
##
## Sensitivity : 0.8335
## Specificity : 0.8623
## Pos Pred Value : 0.8582
## Neg Pred Value : 0.8382
## Prevalence : 0.5000
## Detection Rate : 0.4168
## Detection Prevalence : 0.4856
## Balanced Accuracy : 0.8479
##
## 'Positive' Class : Tidak Alzheimer
##
Confusion matrix
cm_df <- as.data.frame(conf$table)
names(cm_df) <- c("Actual", "Predicted", "Freq")
library(ggplot2)
ggplot(cm_df, aes(x = Actual, y = Predicted, fill = Freq)) +
geom_tile(color = "grey70") +
geom_text(aes(label = Freq), size = 5) +
scale_fill_gradient(low = "white", high = "steelblue") +
labs(
title = "Confusion Matrix",
x = "Data Asli",
y = "Prediksi",
fill = "Jumlah"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid = element_blank()
)
Analisis Diskriminan Linear (LDA) adalah metode klasifikasi yang memisahkan kelompok data berdasarkan kombinasi linear fitur. Pada hasil ini, model LDA mencapai akurasi sebesar 84,79%, yang menunjukkan bahwa model cukup baik dalam membedakan antara penderita Alzheimer dan non-Alzheimer.