hcv_data <- read.csv("hcvdat0.csv")
data_pca <- na.omit(hcv_data[, 5:14])
dim(data_pca)
## [1] 589 10
head(data_pca)
## ALB ALP ALT AST BIL CHE CHOL CREA GGT PROT
## 1 38.5 52.5 7.7 22.1 7.5 6.93 3.23 106 12.1 69.0
## 2 38.5 70.3 18.0 24.7 3.9 11.17 4.80 74 15.6 76.5
## 3 46.9 74.7 36.2 52.6 6.1 8.84 5.20 86 33.2 79.3
## 4 43.2 52.0 30.6 22.6 18.9 7.33 4.74 80 33.8 75.7
## 5 39.2 74.1 32.6 24.8 9.6 9.15 4.32 76 29.9 68.7
## 6 41.6 43.3 18.5 19.7 12.3 9.92 6.05 111 91.0 74.0
matriks_kor <- cor(data_pca)
corrplot(matriks_kor,
method = "color",
type = "upper",
addCoef.col = "black",
number.cex = 0.7,
tl.col = "black",
tl.srt = 45,
title = "Matriks Korelasi Variabel Darah",
mar = c(0, 0, 1, 0))
hasil_kmo <- KMO(matriks_kor)
print(paste("Nilai KMO Keseluruhan:", round(hasil_kmo$MSA, 3)))
## [1] "Nilai KMO Keseluruhan: 0.614"
print("Nilai MSA per Variabel:")
## [1] "Nilai MSA per Variabel:"
round(hasil_kmo$MSAi, 3)
## ALB ALP ALT AST BIL CHE CHOL CREA GGT PROT
## 0.628 0.527 0.593 0.528 0.747 0.733 0.696 0.529 0.578 0.576
hasil_bartlett <- cortest.bartlett(matriks_kor, n = nrow(data_pca))
print(paste("Chi-Square:", round(hasil_bartlett$chisq, 3)))
## [1] "Chi-Square: 1113.575"
print(paste("P-Value :", hasil_bartlett$p.value))
## [1] "P-Value : 7.28741336033632e-204"
uji_normalitas <- lapply(data_pca, shapiro.test)
p_values <- sapply(uji_normalitas, function(x) x$p.value)
tabel_normalitas <- data.frame(
Variabel = names(p_values),
P_Value = round(p_values, 4),
Status = ifelse(p_values > 0.05, "Normal", "Tidak Normal")
)
print(tabel_normalitas)
## Variabel P_Value Status
## ALB ALB 0e+00 Tidak Normal
## ALP ALP 0e+00 Tidak Normal
## ALT ALT 0e+00 Tidak Normal
## AST AST 0e+00 Tidak Normal
## BIL BIL 0e+00 Tidak Normal
## CHE CHE 0e+00 Tidak Normal
## CHOL CHOL 2e-04 Tidak Normal
## CREA CREA 0e+00 Tidak Normal
## GGT GGT 0e+00 Tidak Normal
## PROT PROT 0e+00 Tidak Normal
hasil_pcor <- pcor(data_pca)
cat("--- Matriks Korelasi Parsial ---\n")
## --- Matriks Korelasi Parsial ---
print(round(hasil_pcor$estimate, 3))
## ALB ALP ALT AST BIL CHE CHOL CREA GGT PROT
## ALB 1.000 -0.136 0.059 -0.154 -0.043 0.168 -0.013 0.046 0.009 0.529
## ALP -0.136 1.000 0.162 -0.197 0.036 0.025 0.087 0.115 0.441 0.016
## ALT 0.059 0.162 1.000 0.225 -0.124 0.191 0.071 -0.067 0.063 -0.091
## AST -0.154 -0.197 0.225 1.000 0.202 -0.063 -0.183 -0.060 0.450 0.177
## BIL -0.043 0.036 -0.124 0.202 1.000 -0.206 -0.027 0.001 0.069 0.049
## CHE 0.168 0.025 0.191 -0.063 -0.206 1.000 0.306 0.021 -0.045 0.121
## CHOL -0.013 0.087 0.071 -0.183 -0.027 0.306 1.000 -0.078 0.086 0.147
## CREA 0.046 0.115 -0.067 -0.060 0.001 0.021 -0.078 1.000 0.097 -0.027
## GGT 0.009 0.441 0.063 0.450 0.069 -0.045 0.086 0.097 1.000 -0.027
## PROT 0.529 0.016 -0.091 0.177 0.049 0.121 0.147 -0.027 -0.027 1.000
pca_result <- prcomp(data_pca, center = TRUE, scale. = TRUE)
summary(pca_result)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.5531 1.3559 1.1298 1.0385 0.92654 0.81835 0.78637
## Proportion of Variance 0.2412 0.1839 0.1277 0.1078 0.08585 0.06697 0.06184
## Cumulative Proportion 0.2412 0.4251 0.5527 0.6606 0.74642 0.81339 0.87522
## PC8 PC9 PC10
## Standard deviation 0.70971 0.6504 0.5666
## Proportion of Variance 0.05037 0.0423 0.0321
## Cumulative Proportion 0.92559 0.9679 1.0000
eigenvalues <- get_eigenvalue(pca_result)
tabel_varians <- data.frame(
Komponen = paste0("PC", 1:nrow(eigenvalues)),
Eigenvalue = round(eigenvalues$eigenvalue, 4),
Pct_Varians = round(eigenvalues$variance.percent, 2),
Kumulatif = round(eigenvalues$cumulative.variance.percent, 2)
)
print(tabel_varians)
## Komponen Eigenvalue Pct_Varians Kumulatif
## 1 PC1 2.4122 24.12 24.12
## 2 PC2 1.8385 18.39 42.51
## 3 PC3 1.2765 12.76 55.27
## 4 PC4 1.0784 10.78 66.06
## 5 PC5 0.8585 8.58 74.64
## 6 PC6 0.6697 6.70 81.34
## 7 PC7 0.6184 6.18 87.52
## 8 PC8 0.5037 5.04 92.56
## 9 PC9 0.4230 4.23 96.79
## 10 PC10 0.3210 3.21 100.00
fviz_eig(pca_result,
addlabels = TRUE,
ylim = c(0, 40),
main = "Scree Plot: Persentase Varians yang Dijelaskan")
round(pca_result$rotation[, 1:4], 3)
## PC1 PC2 PC3 PC4
## ALB 0.444 0.099 -0.400 0.174
## ALP -0.153 0.471 0.319 0.224
## ALT 0.033 0.454 0.152 -0.407
## AST -0.342 0.300 -0.417 -0.226
## BIL -0.342 0.024 -0.419 0.113
## CHE 0.445 0.260 0.123 -0.062
## CHOL 0.347 0.289 0.184 -0.044
## CREA -0.068 0.110 0.139 0.811
## GGT -0.301 0.519 -0.073 0.073
## PROT 0.361 0.198 -0.541 0.145
Dihitung menggunakan rumus: ρ(Yi, Zk) = e_ik × √λi (sesuai materi slide 18–22, untuk data yang sudah distandarisasi)
eigenvectors <- pca_result$rotation
lambda <- pca_result$sdev^2
m <- 4
korelasi_pc_var <- sapply(1:m, function(i) eigenvectors[, i] * sqrt(lambda[i]))
colnames(korelasi_pc_var) <- paste0("PC", 1:m)
cat("--- Korelasi Variabel dengan PC (rho_Yi,Zk) ---\n")
## --- Korelasi Variabel dengan PC (rho_Yi,Zk) ---
print(round(korelasi_pc_var, 3))
## PC1 PC2 PC3 PC4
## ALB 0.689 0.134 -0.452 0.181
## ALP -0.238 0.639 0.361 0.233
## ALT 0.051 0.615 0.171 -0.423
## AST -0.531 0.407 -0.471 -0.235
## BIL -0.531 0.033 -0.473 0.118
## CHE 0.692 0.353 0.139 -0.065
## CHOL 0.539 0.392 0.208 -0.046
## CREA -0.106 0.149 0.157 0.842
## GGT -0.468 0.703 -0.083 0.076
## PROT 0.561 0.269 -0.611 0.151
corrplot(korelasi_pc_var,
method = "color",
is.corr = FALSE,
cl.lim = c(-1, 1),
addCoef.col = "black",
number.cex = 0.7,
title = "Korelasi Variabel dengan PC",
mar = c(0, 0, 2, 0),
tl.col = "black",
tl.srt = 45)
Komunalitas menunjukkan proporsi varians tiap variabel yang dijelaskan oleh 4 PC yang dipertahankan.
komunalitas_pca <- rowSums(korelasi_pc_var^2)
cat("--- Komunalitas PCA (h2) per Variabel ---\n")
## --- Komunalitas PCA (h2) per Variabel ---
print(round(komunalitas_pca, 3))
## ALB ALP ALT AST BIL CHE CHOL CREA GGT PROT
## 0.730 0.649 0.589 0.724 0.521 0.626 0.489 0.767 0.726 0.784
skor_pc <- as.data.frame(pca_result$x[, 1:m])
colnames(skor_pc) <- paste0("PC", 1:m)
cat("--- Skor PC (6 observasi pertama) ---\n")
## --- Skor PC (6 observasi pertama) ---
print(round(head(skor_pc), 4))
## PC1 PC2 PC3 PC4
## 1 -0.9941 -1.8676 0.0900 0.5930
## 2 0.8356 -0.1542 0.0803 0.0190
## 3 0.8766 0.8475 -1.0619 0.1197
## 4 0.0903 -0.4444 -0.8659 0.0790
## 5 -0.3899 -0.2577 0.6303 -0.2600
## 6 0.6141 0.2646 -0.2158 0.5701
data_pca %>%
pivot_longer(everything(), names_to = "Variabel", values_to = "Nilai") %>%
ggplot(aes(x = Variabel, y = Nilai)) +
geom_boxplot(fill = "steelblue", alpha = 0.7,
outlier.color = "red", outlier.size = 1) +
facet_wrap(~Variabel, scales = "free_y", ncol = 5) +
labs(title = "Distribusi Variabel Darah (Boxplot)", x = NULL, y = "Nilai") +
theme_minimal() +
theme(strip.text = element_text(face = "bold", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.title = element_text(face = "bold", hjust = 0.5))
fviz_pca_biplot(pca_result,
repel = TRUE,
geom.ind = "point",
col.ind = "steelblue",
col.var = "red",
label = "var",
title = "Biplot PCA: Skor Observasi & Loading Variabel")
Sebelum rotasi, dilihat pola loading awal untuk menilai apakah rotasi diperlukan (sesuai materi Stage 5 slide 20).
fa_unrotated <- fa(data_pca, nfactors = 3, rotate = "none", fm = "ml")
loading_unrot <- as.data.frame(unclass(fa_unrotated$loadings))
colnames(loading_unrot) <- paste0("ML", 1:3)
loading_unrot$Komunalitas <- fa_unrotated$communality
cat("--- Unrotated Factor Loading Matrix ---\n")
## --- Unrotated Factor Loading Matrix ---
print(round(loading_unrot, 3))
## ML1 ML2 ML3 Komunalitas
## ALB -0.501 0.480 -0.057 0.485
## ALP 0.420 0.252 0.283 0.321
## ALT 0.151 0.255 0.280 0.166
## AST 0.503 0.220 -0.300 0.391
## BIL 0.327 -0.022 -0.381 0.253
## CHE -0.382 0.394 0.464 0.516
## CHOL -0.224 0.348 0.404 0.334
## CREA 0.118 0.050 0.043 0.018
## GGT 0.763 0.459 0.044 0.794
## PROT -0.441 0.690 -0.271 0.744
Rotasi Varimax dilakukan untuk menyederhanakan kolom matriks loading — memaksimalkan loading tinggi pada satu faktor dan mendekati nol pada faktor lainnya (slide 14–15).
fa_res <- fa(data_pca, nfactors = 3, rotate = "varimax", fm = "ml")
loading_rot <- as.data.frame(unclass(fa_res$loadings))
colnames(loading_rot) <- paste0("ML", 1:3)
loading_rot$Komunalitas <- fa_res$communality
cat("--- Rotated Factor Loading Matrix (Varimax) ---\n")
## --- Rotated Factor Loading Matrix (Varimax) ---
print(round(loading_rot, 3))
## ML1 ML2 ML3 Komunalitas
## ALB -0.125 0.639 0.247 0.485
## ALP 0.558 -0.079 0.059 0.321
## ALT 0.357 0.044 0.191 0.166
## AST 0.387 0.062 -0.488 0.391
## BIL 0.090 -0.030 -0.493 0.253
## CHE 0.106 0.332 0.628 0.516
## CHOL 0.176 0.245 0.493 0.334
## CREA 0.132 -0.027 -0.017 0.018
## GGT 0.840 0.022 -0.298 0.794
## PROT -0.042 0.860 0.054 0.744
cat("=== UNROTATED ===\n")
## === UNROTATED ===
print(round(as.data.frame(unclass(fa_unrotated$loadings)), 3))
## ML1 ML2 ML3
## ALB -0.501 0.480 -0.057
## ALP 0.420 0.252 0.283
## ALT 0.151 0.255 0.280
## AST 0.503 0.220 -0.300
## BIL 0.327 -0.022 -0.381
## CHE -0.382 0.394 0.464
## CHOL -0.224 0.348 0.404
## CREA 0.118 0.050 0.043
## GGT 0.763 0.459 0.044
## PROT -0.441 0.690 -0.271
cat("\n=== ROTATED (Varimax) ===\n")
##
## === ROTATED (Varimax) ===
print(round(as.data.frame(unclass(fa_res$loadings)), 3))
## ML1 ML2 ML3
## ALB -0.125 0.639 0.247
## ALP 0.558 -0.079 0.059
## ALT 0.357 0.044 0.191
## AST 0.387 0.062 -0.488
## BIL 0.090 -0.030 -0.493
## CHE 0.106 0.332 0.628
## CHOL 0.176 0.245 0.493
## CREA 0.132 -0.027 -0.017
## GGT 0.840 0.022 -0.298
## PROT -0.042 0.860 0.054
cat("\n--- Proporsi Varians ---\n")
##
## --- Proporsi Varians ---
cat("Unrotated:\n")
## Unrotated:
print(round(fa_unrotated$Vaccounted, 3))
## ML1 ML2 ML3
## SS loadings 1.797 1.372 0.853
## Proportion Var 0.180 0.137 0.085
## Cumulative Var 0.180 0.317 0.402
## Proportion Explained 0.447 0.341 0.212
## Cumulative Proportion 0.447 0.788 1.000
cat("\nRotated:\n")
##
## Rotated:
print(round(fa_res$Vaccounted, 3))
## ML1 ML2 ML3
## SS loadings 1.378 1.332 1.312
## Proportion Var 0.138 0.133 0.131
## Cumulative Var 0.138 0.271 0.402
## Proportion Explained 0.343 0.331 0.326
## Cumulative Proportion 0.343 0.674 1.000
Komunalitas FA diambil langsung dari hasil fa(),
menunjukkan total varians tiap variabel yang dijelaskan oleh semua
faktor yang dipertahankan — nilai ini tidak berubah setelah
rotasi (slide 22).
komunalitas_fa <- data.frame(
Variabel = names(fa_res$communality),
Unrotated_h2 = round(fa_unrotated$communality, 3),
Rotated_h2 = round(fa_res$communality, 3)
)
print(komunalitas_fa)
## Variabel Unrotated_h2 Rotated_h2
## ALB ALB 0.485 0.485
## ALP ALP 0.321 0.321
## ALT ALT 0.166 0.166
## AST AST 0.391 0.391
## BIL BIL 0.253 0.253
## CHE CHE 0.516 0.516
## CHOL CHOL 0.334 0.334
## CREA CREA 0.018 0.018
## GGT GGT 0.794 0.794
## PROT PROT 0.744 0.744
Variabel dengan loading signifikan (> 0.30) di lebih dari satu faktor disebut cross-loading. Rasio kuadrat loading digunakan untuk menentukan tingkat masalah (slide 10): - Rasio 1.0–1.5 → problematic cross-loading (kandidat eliminasi) - Rasio 1.5–2.0 → potential cross-loading - Rasio > 2.0 → ignorable cross-loading
loading_mat <- as.matrix(unclass(fa_res$loadings))
cross_result <- do.call(rbind, lapply(1:nrow(loading_mat), function(i) {
idx <- which(abs(loading_mat[i, ]) >= 0.30)
if (length(idx) < 2) return(NULL)
ord <- order(abs(loading_mat[i, idx]), decreasing = TRUE)
l1 <- loading_mat[i, idx[ord[1]]]; l2 <- loading_mat[i, idx[ord[2]]]
rasio <- round(l1^2 / l2^2, 3)
data.frame(
Variabel = rownames(loading_mat)[i],
Faktor1 = colnames(loading_mat)[idx[ord[1]]], Loading1 = round(l1, 3),
Faktor2 = colnames(loading_mat)[idx[ord[2]]], Loading2 = round(l2, 3),
Rasio_Kuadrat = rasio,
Status = ifelse(rasio <= 1.5, "Problematic",
ifelse(rasio <= 2.0, "Potential", "Ignorable"))
)
}))
if (is.null(cross_result)) {
cat("Tidak ada cross-loading yang terdeteksi (cutoff = 0.30)\n")
} else {
cat("--- Variabel dengan Cross-Loading ---\n")
print(cross_result)
}
## --- Variabel dengan Cross-Loading ---
## Variabel Faktor1 Loading1 Faktor2 Loading2 Rasio_Kuadrat Status
## 1 AST ML3 -0.488 ML1 0.387 1.592 Potential
## 2 CHE ML3 0.628 ML2 0.332 3.578 Ignorable
fa.diagram(fa_res, main = "Struktur Faktor Variabel Tes Darah")