Klasifikasi Status Alzheimer Menggunakan Regresi Logistik

Nama : Jesika Trisyanda

Dosen Pengampu :Ike Fitriyaningsih, M.Si

Mata Kuliah : Analisis Multivariat

Universitas Negeri Surabaya

suppressPackageStartupMessages({
  library(ROSE)
  library(dplyr)
  library(ggplot2)
  library(ggpubr)
  library(tidyr)
  library(ggcorrplot)
  library(tidyverse)
  library(ResourceSelection)
  library(tidymodels)
  library(MASS)
  library(corrplot)
  library(psych)
  library(car)
  library(caret)
  library(parsnip)
})

PRE PROCESSING

df <- read.csv("alzheimers_disease_data.csv")

# Info struktur data
str(df)

## 'data.frame':    2149 obs. of  35 variables:
##  $ PatientID                : int  4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 ...
##  $ Age                      : int  73 89 73 74 89 86 68 75 72 87 ...
##  $ Gender                   : int  0 0 0 1 0 1 0 0 1 0 ...
##  $ Ethnicity                : int  0 0 3 0 0 1 3 0 1 0 ...
##  $ EducationLevel           : int  2 0 1 1 0 1 2 1 0 0 ...
##  $ BMI                      : num  22.9 26.8 17.8 33.8 20.7 ...
##  $ Smoking                  : int  0 0 0 1 0 0 1 0 0 1 ...
##  $ AlcoholConsumption       : num  13.3 4.54 19.56 12.21 18.45 ...
##  $ PhysicalActivity         : num  6.33 7.62 7.84 8.43 6.31 ...
##  $ DietQuality              : num  1.347 0.519 1.826 7.436 0.795 ...
##  $ SleepQuality             : num  9.03 7.15 9.67 8.39 5.6 ...
##  $ FamilyHistoryAlzheimers  : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ CardiovascularDisease    : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Diabetes                 : int  1 0 0 0 0 1 0 0 0 0 ...
##  $ Depression               : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ HeadInjury               : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ Hypertension             : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ SystolicBP               : int  142 115 99 118 94 168 143 117 117 130 ...
##  $ DiastolicBP              : int  72 64 116 115 117 62 88 63 119 78 ...
##  $ CholesterolTotal         : num  242 231 284 160 238 ...
##  $ CholesterolLDL           : num  56.2 193.4 153.3 65.4 92.9 ...
##  $ CholesterolHDL           : num  33.7 79 69.8 68.5 56.9 ...
##  $ CholesterolTriglycerides : num  162.2 294.6 83.6 277.6 291.2 ...
##  $ MMSE                     : num  21.46 20.61 7.36 13.99 13.52 ...
##  $ FunctionalAssessment     : num  6.52 7.12 5.9 8.97 6.05 ...
##  $ MemoryComplaints         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BehavioralProblems       : int  0 0 0 1 0 0 0 0 1 1 ...
##  $ ADL                      : num  1.7259 2.5924 7.1195 6.4812 0.0147 ...
##  $ Confusion                : int  0 0 0 0 0 1 0 1 0 0 ...
##  $ Disorientation           : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ PersonalityChanges       : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ DifficultyCompletingTasks: int  1 0 1 0 1 0 0 0 0 0 ...
##  $ Forgetfulness            : int  0 1 0 0 0 0 1 1 0 0 ...
##  $ Diagnosis                : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ DoctorInCharge           : chr  "XXXConfid" "XXXConfid" "XXXConfid" "XXXConfid" ...

# Cek data duplikat
duplicates <- sum(duplicated(df))
cat("Jumlah duplikasi:", duplicates, "\n")

## Jumlah duplikasi: 0

# Cek kolom kosong
colSums(is.na(df))

##                 PatientID                       Age                    Gender 
##                         0                         0                         0 
##                 Ethnicity            EducationLevel                       BMI 
##                         0                         0                         0 
##                   Smoking        AlcoholConsumption          PhysicalActivity 
##                         0                         0                         0 
##               DietQuality              SleepQuality   FamilyHistoryAlzheimers 
##                         0                         0                         0 
##     CardiovascularDisease                  Diabetes                Depression 
##                         0                         0                         0 
##                HeadInjury              Hypertension                SystolicBP 
##                         0                         0                         0 
##               DiastolicBP          CholesterolTotal            CholesterolLDL 
##                         0                         0                         0 
##            CholesterolHDL  CholesterolTriglycerides                      MMSE 
##                         0                         0                         0 
##      FunctionalAssessment          MemoryComplaints        BehavioralProblems 
##                         0                         0                         0 
##                       ADL                 Confusion            Disorientation 
##                         0                         0                         0 
##        PersonalityChanges DifficultyCompletingTasks             Forgetfulness 
##                         0                         0                         0 
##                 Diagnosis            DoctorInCharge 
##                         0                         0

# Drop kolom tidak diperlukan
df <- df %>% dplyr::select(-PatientID, -DoctorInCharge)

# Konversi Diagnosis jadi faktor
df$Diagnosis <- as.factor(df$Diagnosis)

# Ubah nilai Diagnosis jadi faktor dengan label
df$Diagnosis <- factor(df$Diagnosis, levels = c(0, 1), labels = c("Tidak Alzheimer", "Alzheimer"))

EDA

names(df)

##  [1] "Age"                       "Gender"                   
##  [3] "Ethnicity"                 "EducationLevel"           
##  [5] "BMI"                       "Smoking"                  
##  [7] "AlcoholConsumption"        "PhysicalActivity"         
##  [9] "DietQuality"               "SleepQuality"             
## [11] "FamilyHistoryAlzheimers"   "CardiovascularDisease"    
## [13] "Diabetes"                  "Depression"               
## [15] "HeadInjury"                "Hypertension"             
## [17] "SystolicBP"                "DiastolicBP"              
## [19] "CholesterolTotal"          "CholesterolLDL"           
## [21] "CholesterolHDL"            "CholesterolTriglycerides" 
## [23] "MMSE"                      "FunctionalAssessment"     
## [25] "MemoryComplaints"          "BehavioralProblems"       
## [27] "ADL"                       "Confusion"                
## [29] "Disorientation"            "PersonalityChanges"       
## [31] "DifficultyCompletingTasks" "Forgetfulness"            
## [33] "Diagnosis"

summary(df)

##       Age            Gender         Ethnicity      EducationLevel 
##  Min.   :60.00   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:67.00   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.000  
##  Median :75.00   Median :1.0000   Median :0.0000   Median :1.000  
##  Mean   :74.91   Mean   :0.5063   Mean   :0.6975   Mean   :1.287  
##  3rd Qu.:83.00   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:2.000  
##  Max.   :90.00   Max.   :1.0000   Max.   :3.0000   Max.   :3.000  
##       BMI           Smoking       AlcoholConsumption  PhysicalActivity  
##  Min.   :15.01   Min.   :0.0000   Min.   : 0.002003   Min.   :0.003616  
##  1st Qu.:21.61   1st Qu.:0.0000   1st Qu.: 5.139810   1st Qu.:2.570626  
##  Median :27.82   Median :0.0000   Median : 9.934412   Median :4.766424  
##  Mean   :27.66   Mean   :0.2885   Mean   :10.039442   Mean   :4.920202  
##  3rd Qu.:33.87   3rd Qu.:1.0000   3rd Qu.:15.157931   3rd Qu.:7.427899  
##  Max.   :39.99   Max.   :1.0000   Max.   :19.989293   Max.   :9.987429  
##   DietQuality        SleepQuality    FamilyHistoryAlzheimers
##  Min.   :0.009385   Min.   : 4.003   Min.   :0.0000         
##  1st Qu.:2.458455   1st Qu.: 5.483   1st Qu.:0.0000         
##  Median :5.076087   Median : 7.116   Median :0.0000         
##  Mean   :4.993138   Mean   : 7.051   Mean   :0.2522         
##  3rd Qu.:7.558625   3rd Qu.: 8.563   3rd Qu.:1.0000         
##  Max.   :9.998346   Max.   :10.000   Max.   :1.0000         
##  CardiovascularDisease    Diabetes        Depression       HeadInjury    
##  Min.   :0.0000        Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000        1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000        Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.1443        Mean   :0.1508   Mean   :0.2006   Mean   :0.0926  
##  3rd Qu.:0.0000        3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.0000        Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##   Hypertension      SystolicBP     DiastolicBP     CholesterolTotal
##  Min.   :0.0000   Min.   : 90.0   Min.   : 60.00   Min.   :150.1   
##  1st Qu.:0.0000   1st Qu.:112.0   1st Qu.: 74.00   1st Qu.:190.3   
##  Median :0.0000   Median :134.0   Median : 91.00   Median :225.1   
##  Mean   :0.1489   Mean   :134.3   Mean   : 89.85   Mean   :225.2   
##  3rd Qu.:0.0000   3rd Qu.:157.0   3rd Qu.:105.00   3rd Qu.:262.0   
##  Max.   :1.0000   Max.   :179.0   Max.   :119.00   Max.   :300.0   
##  CholesterolLDL   CholesterolHDL  CholesterolTriglycerides      MMSE          
##  Min.   : 50.23   Min.   :20.00   Min.   : 50.41           Min.   : 0.005312  
##  1st Qu.: 87.20   1st Qu.:39.10   1st Qu.:137.58           1st Qu.: 7.167602  
##  Median :123.34   Median :59.77   Median :230.30           Median :14.441660  
##  Mean   :124.34   Mean   :59.46   Mean   :228.28           Mean   :14.755132  
##  3rd Qu.:161.73   3rd Qu.:78.94   3rd Qu.:314.84           3rd Qu.:22.161028  
##  Max.   :199.97   Max.   :99.98   Max.   :399.94           Max.   :29.991381  
##  FunctionalAssessment MemoryComplaints BehavioralProblems      ADL           
##  Min.   :0.0004596    Min.   :0.000    Min.   :0.0000     Min.   : 0.001288  
##  1st Qu.:2.5662809    1st Qu.:0.000    1st Qu.:0.0000     1st Qu.: 2.342836  
##  Median :5.0944387    Median :0.000    Median :0.0000     Median : 5.038973  
##  Mean   :5.0800550    Mean   :0.208    Mean   :0.1568     Mean   : 4.982958  
##  3rd Qu.:7.5469813    3rd Qu.:0.000    3rd Qu.:0.0000     3rd Qu.: 7.581490  
##  Max.   :9.9964671    Max.   :1.000    Max.   :1.0000     Max.   : 9.999747  
##    Confusion      Disorientation   PersonalityChanges DifficultyCompletingTasks
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000     Min.   :0.0000           
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000     1st Qu.:0.0000           
##  Median :0.0000   Median :0.0000   Median :0.0000     Median :0.0000           
##  Mean   :0.2052   Mean   :0.1582   Mean   :0.1508     Mean   :0.1587           
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000     3rd Qu.:0.0000           
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000     Max.   :1.0000           
##  Forgetfulness              Diagnosis   
##  Min.   :0.0000   Tidak Alzheimer:1389  
##  1st Qu.:0.0000   Alzheimer      : 760  
##  Median :0.0000                         
##  Mean   :0.3015                         
##  3rd Qu.:1.0000                         
##  Max.   :1.0000

# Histogram umur
ggplot(df, aes(x = Age)) + geom_histogram(binwidth = 5, fill = "skyblue", color = "black") + theme_minimal()

df$Gender <- factor(df$Gender, levels = c(0, 1), labels = c("Male", "Female"))
ggplot(df, aes(x = Gender)) +
  geom_bar(fill = "skyblue", color = "black") +
  labs(title = "Distribusi Gender", x = "Gender", y = "Jumlah") +
  theme_minimal()

ggplot(df, aes(x = Diagnosis, fill = Gender)) +
  geom_bar(position = "dodge") +
  labs(title = "Distribusi Gender terhadap Diagnosis",
       x = "Diagnosis", y = "Jumlah") +
  theme_minimal()

ggplot(df, aes(x = Diagnosis, y = Age, fill = Diagnosis)) +
  geom_boxplot() +
  labs(title = "Distribusi Usia berdasarkan Diagnosis",
       x = "Diagnosis", y = "Usia") +
  theme_minimal()

ggplot(df, aes(x = Age)) +
  geom_histogram(binwidth = 5, fill ="skyblue", color = "black") +
  facet_wrap(~ Diagnosis) +
  labs(title = "Histogram Usia terhadap Diagnosis",
       x = "Usia", y = "Jumlah") +
  theme_minimal()

ggplot(df, aes(x = Diagnosis)) +
  geom_bar(fill = "skyblue", color = "black") +
  ggtitle("Distribusi Diagnosis Alzheimer") +
  xlab("Status Diagnosis") +
  ylab("Jumlah Pasien")

cat("Jumlah tiap kategori:\n")

## Jumlah tiap kategori:

print(table(df$Diagnosis))

## 
## Tidak Alzheimer       Alzheimer 
##            1389             760

label_percentages <- prop.table(table(df$Diagnosis)) * 100
cat("\nPersentase Tiap Label Diagnosis:\n")

## 
## Persentase Tiap Label Diagnosis:

print(round(label_percentages, 2))

## 
## Tidak Alzheimer       Alzheimer 
##           64.63           35.37

# Korelasi antar fitur numerik


numeric_df <- df[, sapply(df, is.numeric)]

library(corrplot)

corrplot(cor(numeric_df),
         method = "color",
         tl.cex = 0.6,
         addCoef.col = "black",
         number.cex = 0.3,
         type = "full")

FITUR SELECTION MENGGUNAKAN ANOVA

# Standarisasi variabel numerik (kecuali Diagnosis)
num_cols <- sapply(df, is.numeric)
df[num_cols] <- scale(df[num_cols])
  
valid_vars <- names(df)[
  sapply(df, function(x) is.numeric(x) && length(unique(x)) > 1)
]

# Hitung p-value ANOVA
anova_pvals <- sapply(valid_vars, function(var) {
  formula <- as.formula(paste0("`", var, "` ~ Diagnosis"))
  aov_model <- aov(formula, data = df)
  summary(aov_model)[[1]][["Pr(>F)"]][1]
})

# Ambil fitur dengan p-value < 0.05
selected_features <- names(anova_pvals[anova_pvals < 0.05])
print(selected_features)

## [1] "EducationLevel"       "SleepQuality"         "CholesterolHDL"      
## [4] "MMSE"                 "FunctionalAssessment" "MemoryComplaints"    
## [7] "BehavioralProblems"   "ADL"

final_data <- df[, c("Diagnosis", selected_features)]
# Cek hasil
head(final_data)

##         Diagnosis EducationLevel SleepQuality CholesterolHDL        MMSE
## 1 Tidak Alzheimer      0.7886499   1.11965745     -1.1141698  0.77885552
## 2 Tidak Alzheimer     -1.4224508   0.05682309      0.8455334  0.68013845
## 3 Tidak Alzheimer     -0.3169004   1.48703408      0.4455111 -0.85902164
## 4 Tidak Alzheimer     -0.3169004   0.76065615      0.3886897 -0.08870211
## 5 Tidak Alzheimer     -1.4224508  -0.82437383     -0.1118981 -0.14367832
## 6 Tidak Alzheimer     -0.3169004   0.11957058      0.8477818  1.48173376
##   FunctionalAssessment MemoryComplaints BehavioralProblems        ADL
## 1            0.4973901       -0.5123573         -0.4311563 -1.1041775
## 2            0.7047429       -0.5123573         -0.4311563 -0.8104125
## 3            0.2817472       -0.5123573         -0.4311563  0.7243229
## 4            1.3430335       -0.5123573          2.3182650  0.5079260
## 5            0.3335878       -0.5123573         -0.4311563 -1.6842869
## 6            0.1486786       -0.5123573         -0.4311563  1.3671308

set.seed(123)
sample_size <- floor(0.7 * nrow(final_data))
train_indices <- sample(seq_len(nrow(final_data)), size = sample_size)

UJI ASUMSI SEBELUM MODELING

# (VIF)
model_vif <- glm(Diagnosis ~ ., data = final_data, family = binomial)
vif_values <- vif(model_vif)
print("VIF Tiap Variabel:")

## [1] "VIF Tiap Variabel:"

print(vif_values)

##       EducationLevel         SleepQuality       CholesterolHDL 
##             1.003401             1.002325             1.005188 
##                 MMSE FunctionalAssessment     MemoryComplaints 
##             1.192906             1.293479             1.250640 
##   BehavioralProblems                  ADL 
##             1.259085             1.302034

# Outlier dan leverage plot
plot(model_vif, which = 4, caption = "Influence plot")

# Hosmer-Lemeshow Test
hoslem <- hoslem.test(as.integer(final_data$Diagnosis == "Alzheimer"), fitted(model_vif), g = 10)
print(hoslem)

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  as.integer(final_data$Diagnosis == "Alzheimer"), fitted(model_vif)
## X-squared = 62.119, df = 8, p-value = 1.787e-10

# Plot residual deviance
plot(fitted(model_vif), residuals(model_vif, type = "deviance"),
     xlab = "Fitted values", ylab = "Deviance Residuals",
     main = "Residuals vs Fitted")
abline(h = 0, col = "red")

SPLIT DATA

train_data <- final_data[train_indices, ]
test_data <- final_data[-train_indices, ]

# Simpan ke CSV
write.csv(train_data, "train_data.csv", row.names = FALSE)
write.csv(test_data, "test_data.csv", row.names = FALSE)

set.seed(123)
train_idx <- createDataPartition(final_data$Diagnosis, p = 0.7, list = FALSE)
train_data <- final_data[train_idx, ]
test_data  <- final_data[-train_idx, ]

write.csv(train_data, "train_data.csv", row.names = FALSE)
write.csv(test_data, "test_data.csv", row.names = FALSE)

OVERSAMPLING

data_train <- read.csv("train_data.csv")  

ggplot(data_train, aes(x = Diagnosis)) +
  geom_bar(fill = "lightblue", color = "black") +
  ggtitle("Distribusi Diagnosis Alzheimer") +
  xlab("Status Diagnosis") +
  ylab("Jumlah Pasien")

cat("Jumlah tiap kategori:\n")

## Jumlah tiap kategori:

print(table(data_train$Diagnosis))

## 
##       Alzheimer Tidak Alzheimer 
##             532             973

cat("\nPersentase Tiap Label Diagnosis:\n")

## 
## Persentase Tiap Label Diagnosis:

print(round(label_percentages, 2))

## 
## Tidak Alzheimer       Alzheimer 
##           64.63           35.37

set.seed(123)  
data_balanced <- ovun.sample(Diagnosis ~ ., data = data_train, method = "over", N = max(table(data_train$Diagnosis)) * length(unique(data_train$Diagnosis)))$data

ggplot(data_balanced, aes(x = Diagnosis)) +
  geom_bar(fill = "lightblue", color = "black") +
  ggtitle("Distribusi Diagnosis Alzheimer (Setelah Oversampling)") +
  xlab("Status Diagnosis") +
  ylab("Jumlah Pasien")

cat("Jumlah tiap kategori (setelah oversampling):\n")

## Jumlah tiap kategori (setelah oversampling):

print(table(data_balanced$Diagnosis))

## 
##       Alzheimer Tidak Alzheimer 
##             973             973

data_balanced$Diagnosis <- ifelse(data_balanced$Diagnosis == "Alzheimer", 1, 0)
write.csv(data_balanced, "train_data_oversampled.csv", row.names = FALSE)

MODELING LOGISTIK REGRESI

train_data <- read.csv("train_data_oversampled.csv")
train_data$Diagnosis <- factor(train_data$Diagnosis, 
                               levels = c(0, 1), 
                               labels = c("Tidak Alzheimer", "Alzheimer"))

summary(train_data)

##            Diagnosis   EducationLevel      SleepQuality      CholesterolHDL    
##  Tidak Alzheimer:973   Min.   :-1.42245   Min.   :-1.72857   Min.   :-1.70534  
##  Alzheimer      :973   1st Qu.:-0.31690   1st Qu.:-0.86290   1st Qu.:-0.83524  
##                        Median :-0.31690   Median : 0.04097   Median : 0.06394  
##                        Mean   :-0.02148   Mean   :-0.01481   Mean   : 0.02811  
##                        3rd Qu.: 0.78865   3rd Qu.: 0.82859   3rd Qu.: 0.85873  
##                        Max.   : 1.89420   Max.   : 1.67204   Max.   : 1.75100  
##       MMSE          FunctionalAssessment MemoryComplaints   BehavioralProblems
##  Min.   :-1.71248   Min.   :-1.7520      Min.   :-0.51236   Min.   :-0.43116  
##  1st Qu.:-0.94214   1st Qu.:-1.0373      1st Qu.:-0.51236   1st Qu.:-0.43116  
##  Median :-0.18240   Median :-0.1852      Median :-0.51236   Median :-0.43116  
##  Mean   :-0.08885   Mean   :-0.1454      Mean   : 0.09016   Mean   : 0.03933  
##  3rd Qu.: 0.74578   3rd Qu.: 0.7191      3rd Qu.:-0.51236   3rd Qu.:-0.43116  
##  Max.   : 1.76895   Max.   : 1.6996      Max.   : 1.95085   Max.   : 2.31826  
##       ADL         
##  Min.   :-1.6888  
##  1st Qu.:-1.0520  
##  Median :-0.2259  
##  Mean   :-0.1426  
##  3rd Qu.: 0.7321  
##  Max.   : 1.7007

model <- logistic_reg(mixture = 1, penalty = 0.1) %>%
  set_engine("glmnet") %>%
  set_mode("classification") %>%
  fit(Diagnosis ~ ., data = train_data)

tidy(model)

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded glmnet 4.1-8

## # A tibble: 9 × 3
##   term                 estimate penalty
##   <chr>                   <dbl>   <dbl>
## 1 (Intercept)           -0.136      0.1
## 2 EducationLevel         0          0.1
## 3 SleepQuality           0          0.1
## 4 CholesterolHDL         0          0.1
## 5 MMSE                  -0.0758     0.1
## 6 FunctionalAssessment  -0.457      0.1
## 7 MemoryComplaints       0.172      0.1
## 8 BehavioralProblems     0.0147     0.1
## 9 ADL                   -0.305      0.1

table(train_data$Diagnosis)

## 
## Tidak Alzheimer       Alzheimer 
##             973             973

head(train_data, 5)

##         Diagnosis EducationLevel SleepQuality CholesterolHDL       MMSE
## 1 Tidak Alzheimer     -1.4224508   0.05682309      0.8455334  0.6801384
## 2 Tidak Alzheimer     -1.4224508  -0.82437383     -0.1118981 -0.1436783
## 3 Tidak Alzheimer     -0.3169004   0.11957058      0.8477818  1.4817338
## 4 Tidak Alzheimer     -1.4224508  -0.73876003     -0.7082206  1.2847330
## 5 Tidak Alzheimer     -1.4224508   0.28390795      0.6408057  1.5828443
##   FunctionalAssessment MemoryComplaints BehavioralProblems        ADL
## 1            0.7047429       -0.5123573         -0.4311563 -0.8104125
## 2            0.3335878       -0.5123573         -0.4311563 -1.6842869
## 3            0.1486786       -0.5123573         -0.4311563  1.3671308
## 4            0.8006261       -0.5123573          2.3182650 -1.4328981
## 5           -1.3589699       -0.5123573          2.3182650 -0.1452872

CEK MODEL DENGAN DATA TEST

test <- read.csv("test_data.csv")

test$Diagnosis <- factor(test$Diagnosis, levels = c("Tidak Alzheimer", "Alzheimer"))

#Prediksi kelas dengan model
pred_class <- predict(model, new_data = test, type = "class")

pred_class <- factor(pred_class$.pred_class, levels = levels(test$Diagnosis))

#Evaluasi model dengan confusionMatrix
conf_matrix <- confusionMatrix(pred_class, test$Diagnosis)

print(conf_matrix)

## Confusion Matrix and Statistics
## 
##                  Reference
## Prediction        Tidak Alzheimer Alzheimer
##   Tidak Alzheimer             329        59
##   Alzheimer                    87       169
##                                          
##                Accuracy : 0.7733         
##                  95% CI : (0.739, 0.8051)
##     No Information Rate : 0.646          
##     P-Value [Acc > NIR] : 1.787e-12      
##                                          
##                   Kappa : 0.5177         
##                                          
##  Mcnemar's Test P-Value : 0.02545        
##                                          
##             Sensitivity : 0.7909         
##             Specificity : 0.7412         
##          Pos Pred Value : 0.8479         
##          Neg Pred Value : 0.6602         
##              Prevalence : 0.6460         
##          Detection Rate : 0.5109         
##    Detection Prevalence : 0.6025         
##       Balanced Accuracy : 0.7660         
##                                          
##        'Positive' Class : Tidak Alzheimer
##

Script Metode Regresi Logistik

Klasifikasi Status Alzheimer Menggunakan Regresi Logistik

Nama : Jesika Trisyanda

Dosen Pengampu :Ike Fitriyaningsih, M.Si

Mata Kuliah : Analisis Multivariat

Universitas Negeri Surabaya

PRE PROCESSING

EDA

FITUR SELECTION MENGGUNAKAN ANOVA

UJI ASUMSI SEBELUM MODELING

SPLIT DATA

OVERSAMPLING

MODELING LOGISTIK REGRESI

CEK MODEL DENGAN DATA TEST