1) Langkah 1: Import dan Persiapan Data

library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
heart_dataset <- read_excel("C:/Users/denih/OneDrive/Dokumen/1. Statmul/3. TUGAS FIX/Tugas 7/heart_dataset.xlsx")
X1 <- heart_dataset$age
X2 <- heart_dataset$thalach
X3 <- heart_dataset$oldpeak
Y <- heart_dataset$target  # Variabel target (dependen)
Data <- data.frame(X1 = X1, X2 = X2, X3 = X3, Y = Y)

# Membersihkan data (menangani missing values)
complete_rows <- complete.cases(Data)
Data_clean <- Data[complete_rows, ]

2) Langkah 2: Uji Normalitas Multivariat

X <- Data_clean[, 1:3]  # Variabel independen
data <- data.matrix(X)

# Hitung mean & covariance
mean_vec <- colMeans(data)
cov_data <- cov(data)

# Hitung jarak Mahalanobis
di <- mahalanobis(data, center = mean_vec, cov = cov_data)

# Hitung ranking dan Chi-square
rank <- rank(di)
p <- (rank - 0.5) / nrow(data)
chi_square <- qchisq(p, df = ncol(data))

# Tambahkan hasil ke Data_clean
Data_clean$di_kuadrat <- di
Data_clean$k <- rank
Data_clean$p_k <- p
Data_clean$Chi_Square <- chi_square

head(Data_clean, 10)
##    X1  X2  X3 Y di_kuadrat   k        p_k Chi_Square
## 1  63 150 2.3 1  2.2493741 145 0.47689769  2.2451899
## 2  37 187 3.5 1 13.4452222 299 0.98514851 10.4866862
## 3  41 172 1.4 1  2.9719875 190 0.62541254  3.1126049
## 4  56 178 0.8 1  2.1007748 136 0.44719472  2.0955908
## 5  57 163 0.6 1  0.7210974  33 0.10726073  0.6160899
## 6  57 148 0.4 1  0.4952839  22 0.07095710  0.4532148
## 7  56 153 1.3 1  0.1626224   6 0.01815182  0.1728517
## 8  44 173 0.0 1  1.9460002 125 0.41089109  1.9203163
## 9  52 162 0.5 1  0.3798488  12 0.03795380  0.2892027
## 10 57 174 1.6 1  2.4432572 153 0.50330033  2.3835738
# QQ Plot dan Uji Kolmogorov-Smirnov
ks.test(di, "pchisq", df = 3)
## Warning in ks.test.default(di, "pchisq", df = 3): ties should not be present
## for the one-sample Kolmogorov-Smirnov test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  di
## D = 0.040828, p-value = 0.6934
## alternative hypothesis: two-sided
# Visualisasi QQ Plot
plot(Data_clean$di_kuadrat, Data_clean$Chi_Square, 
     main = "QQ Plot", xlab = "Jarak Mahalanobis²", 
     ylab ="Chi-Square", col = "blue")
abline(lm(Data_clean$Chi_Square ~ Data_clean$di_kuadrat), col = "red")

3) Langkah 3: Uji Multikolinearitas

# Korelasi antar variabel independen
cor(X)
##            X1         X2         X3
## X1  1.0000000 -0.3985219  0.2100126
## X2 -0.3985219  1.0000000 -0.3441869
## X3  0.2100126 -0.3441869  1.0000000

4) Langkah 4: Uji Homogenitas Matriks Varians-Kovarians

library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 4.4.3
## ---
## biotools version 4.2
# Pisahkan data bedasarkan kelompok
X_matrix <- as.matrix(Data_clean[, 1:3])
Y_factor <- as.factor(Data_clean$Y)

# Lihat kovarian tiap grup
grup_data <- split(as.data.frame(X_matrix), Y_factor)
Sj_list <- lapply(grup_data, cov)
Sj_list
## $`0`
##            X1         X2        X3
## X1  63.394742 -23.886279  1.066445
## X2 -23.886279 510.704961 -6.267132
## X3   1.066445  -6.267132  1.690883
## 
## $`1`
##            X1         X2         X3
## X1  91.214930 -96.288211  1.3017775
## X2 -96.288211 367.652846 -2.7249593
## X3   1.301778  -2.724959  0.6094664
# Hitung jumlah sampel per kelompok
n <- sapply(grup_data, nrow)

# Hitung Matriks kovarians gabungan (W)
# Formula pooled covariance: W = [(n₁-1)S₁ + (n₂-1)S₂] / [(n₁-1) + (n₂-1)]
W <- ((n[1]-1)*Sj_list[[1]] + (n[2]-1)*Sj_list[[2]]) / (sum(n) - length(n))
W
##            X1         X2        X3
## X1  78.552585 -63.334508  1.194666
## X2 -63.334508 432.762945 -4.337178
## X3   1.194666  -4.337178  1.101673
# Hitung determinan matriks kovarians per kelompok
det_Sj <- sapply(Sj_list, det)

# Hitung determinan matriks W
det_W <- det(W)

# Box's M test
# M = (n-g)ln|S_pooled| - Σ[(n_j-1)ln|S_j|]
BoxM <- (sum(n) - length(n)) * log(det_W) - sum((n-1) * log(det_Sj))
BoxM
## [1] 65.80281
# Menghitung p-value untuk Box's M
# Box's M ~ chi-square dengan df = 0.5*p*(p+1)*(g-1)
# Di mana p = jumlah variabel, g = jumlah grup
p_var <- ncol(X_matrix)
g <- length(unique(Y_factor))
df <- 0.5 * p_var * (p_var + 1) * (g - 1)
p_value <- 1 - pchisq(BoxM, df)
p_value
## [1] 2.95719e-12

5) Langkah 5: Analisis Diskriminan

# Pisahkan data berdasarkan nilai Y
klp1 <- Data_clean[Data_clean$Y == 1, 1:3]
klp2 <- Data_clean[Data_clean$Y == 0, 1:3]

# Rata-rata Kelompok dengan Y == 1
x1 <- colMeans(klp1)
x1
##          X1          X2          X3 
##  52.4969697 158.4666667   0.5830303
# Rata-rata Kelompok dengan Y == 0
x2 <- colMeans(klp2)
x2
##         X1         X2         X3 
##  56.601449 139.101449   1.585507
# Selisih rata-rata kelompok
x1_x2 <- x1 - x2
x1_x2
##        X1        X2        X3 
## -4.104480 19.365217 -1.002477
# Matriks kovarian klp 1 (S1)
s1 <- cov(klp1)
s1
##            X1         X2         X3
## X1  91.214930 -96.288211  1.3017775
## X2 -96.288211 367.652846 -2.7249593
## X3   1.301778  -2.724959  0.6094664
# Matriks kovarian klp 2 (S2)
s2 <- cov(klp2)
s2
##            X1         X2        X3
## X1  63.394742 -23.886279  1.066445
## X2 -23.886279 510.704961 -6.267132
## X3   1.066445  -6.267132  1.690883
# Matriks kovarians gabungan (pooled)
n1 <- nrow(klp1)
n2 <- nrow(klp2)
spl <- ((n1-1)*s1 + (n2-1)*s2) / (n1 + n2 - 2)
spl
##            X1         X2        X3
## X1  78.552585 -63.334508  1.194666
## X2 -63.334508 432.762945 -4.337178
## X3   1.194666  -4.337178  1.101673
# Invers matriks kovarian gabungan
spl_inv <- solve(spl)
spl_inv
##              X1          X2           X3
## X1  0.014495409 0.002044523 -0.007669890
## X2  0.002044523 0.002694023  0.008389002
## X3 -0.007669890 0.008389002  0.949054557
# Koefisien fungsi diskriminan Fisher
a <- spl_inv %*% (x1 - x2)
a
##           [,1]
## X1 -0.01221458
## X2  0.03536886
## X3 -0.75746955

6) Langkah 6: Skor Diskriminan dan Cutting Score

# Skor diskriminan untuk kelompok 1
Z1 <- as.matrix(klp1) %*% a
z1_bar <- mean(Z1)
z1_bar
## [1] 4.521929
# Skor diskriminan untuk kelompok 0
Z2 <- as.matrix(klp2) %*% a
z2_bar <- mean(Z2)
z2_bar
## [1] 3.027523
# Cutting Score
# Jika ukuran kelompok sama: m = (z1_bar + z2_bar) / 2
# Jika ukuran kelompok berbeda: m = (n1*z1_bar + n2*z2_bar) / (n1 + n2)
m <- (n1 * z1_bar + n2 * z2_bar) / (n1 + n2)
m
## [1] 3.841309

7) Langkah 7: Analisis Diskriminan dengan Fungsi lda()

library(MASS)
fit <- lda(Y ~ X1 + X2 + X3, data = Data_clean)
fit
## Call:
## lda(Y ~ X1 + X2 + X3, data = Data_clean)
## 
## Prior probabilities of groups:
##         0         1 
## 0.4554455 0.5445545 
## 
## Group means:
##         X1       X2        X3
## 0 56.60145 139.1014 1.5855072
## 1 52.49697 158.4667 0.5830303
## 
## Coefficients of linear discriminants:
##             LD1
## X1 -0.009991815
## X2  0.028932555
## X3 -0.619627782

8) Langkah 8: Evaluasi Hasil Klasifikasi

# Prediksi dan confusion matrix
fit_val <- predict(fit, Data_clean[, 1:3])
ct <- table(Data_clean$Y, fit_val$class)
ct
##    
##       0   1
##   0  89  49
##   1  28 137
# Percent Correct for each category
diag(prop.table(ct, 1))
##         0         1 
## 0.6449275 0.8303030
# Akurasi total
sum(diag(prop.table(ct)))
## [1] 0.7458746