1. Load Data

hcv_data  <- read.csv("hcvdat0.csv")
data_pca  <- na.omit(hcv_data[, 5:14])
dim(data_pca)
## [1] 589  10
head(data_pca)
##    ALB  ALP  ALT  AST  BIL   CHE CHOL CREA  GGT PROT
## 1 38.5 52.5  7.7 22.1  7.5  6.93 3.23  106 12.1 69.0
## 2 38.5 70.3 18.0 24.7  3.9 11.17 4.80   74 15.6 76.5
## 3 46.9 74.7 36.2 52.6  6.1  8.84 5.20   86 33.2 79.3
## 4 43.2 52.0 30.6 22.6 18.9  7.33 4.74   80 33.8 75.7
## 5 39.2 74.1 32.6 24.8  9.6  9.15 4.32   76 29.9 68.7
## 6 41.6 43.3 18.5 19.7 12.3  9.92 6.05  111 91.0 74.0

2. Matriks Korelasi

matriks_kor <- cor(data_pca)

corrplot(matriks_kor,
         method      = "color",
         type        = "upper",
         addCoef.col = "black",
         number.cex  = 0.7,
         tl.col      = "black",
         tl.srt      = 45,
         title       = "Matriks Korelasi Variabel Darah",
         mar         = c(0, 0, 1, 0))

3. Uji Asumsi

3.1 Measure of Sampling Adequacy (KMO)

hasil_kmo <- KMO(matriks_kor)
print(paste("Nilai KMO Keseluruhan:", round(hasil_kmo$MSA, 3)))
## [1] "Nilai KMO Keseluruhan: 0.614"
print("Nilai MSA per Variabel:")
## [1] "Nilai MSA per Variabel:"
round(hasil_kmo$MSAi, 3)
##   ALB   ALP   ALT   AST   BIL   CHE  CHOL  CREA   GGT  PROT 
## 0.628 0.527 0.593 0.528 0.747 0.733 0.696 0.529 0.578 0.576

3.2 Bartlett’s Test of Sphericity

hasil_bartlett <- cortest.bartlett(matriks_kor, n = nrow(data_pca))
print(paste("Chi-Square:", round(hasil_bartlett$chisq, 3)))
## [1] "Chi-Square: 1113.575"
print(paste("P-Value   :", hasil_bartlett$p.value))
## [1] "P-Value   : 7.28741336033632e-204"

3.3 Uji Normalitas (Shapiro-Wilk)

uji_normalitas <- lapply(data_pca, shapiro.test)
p_values       <- sapply(uji_normalitas, function(x) x$p.value)

tabel_normalitas <- data.frame(
  Variabel = names(p_values),
  P_Value  = round(p_values, 4),
  Status   = ifelse(p_values > 0.05, "Normal", "Tidak Normal")
)
print(tabel_normalitas)
##      Variabel P_Value       Status
## ALB       ALB   0e+00 Tidak Normal
## ALP       ALP   0e+00 Tidak Normal
## ALT       ALT   0e+00 Tidak Normal
## AST       AST   0e+00 Tidak Normal
## BIL       BIL   0e+00 Tidak Normal
## CHE       CHE   0e+00 Tidak Normal
## CHOL     CHOL   2e-04 Tidak Normal
## CREA     CREA   0e+00 Tidak Normal
## GGT       GGT   0e+00 Tidak Normal
## PROT     PROT   0e+00 Tidak Normal

3.4 Matriks Korelasi Parsial

hasil_pcor <- pcor(data_pca)
cat("--- Matriks Korelasi Parsial ---\n")
## --- Matriks Korelasi Parsial ---
print(round(hasil_pcor$estimate, 3))
##         ALB    ALP    ALT    AST    BIL    CHE   CHOL   CREA    GGT   PROT
## ALB   1.000 -0.136  0.059 -0.154 -0.043  0.168 -0.013  0.046  0.009  0.529
## ALP  -0.136  1.000  0.162 -0.197  0.036  0.025  0.087  0.115  0.441  0.016
## ALT   0.059  0.162  1.000  0.225 -0.124  0.191  0.071 -0.067  0.063 -0.091
## AST  -0.154 -0.197  0.225  1.000  0.202 -0.063 -0.183 -0.060  0.450  0.177
## BIL  -0.043  0.036 -0.124  0.202  1.000 -0.206 -0.027  0.001  0.069  0.049
## CHE   0.168  0.025  0.191 -0.063 -0.206  1.000  0.306  0.021 -0.045  0.121
## CHOL -0.013  0.087  0.071 -0.183 -0.027  0.306  1.000 -0.078  0.086  0.147
## CREA  0.046  0.115 -0.067 -0.060  0.001  0.021 -0.078  1.000  0.097 -0.027
## GGT   0.009  0.441  0.063  0.450  0.069 -0.045  0.086  0.097  1.000 -0.027
## PROT  0.529  0.016 -0.091  0.177  0.049  0.121  0.147 -0.027 -0.027  1.000

4. Principal Component Analysis

pca_result <- prcomp(data_pca, center = TRUE, scale. = TRUE)
summary(pca_result)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.5531 1.3559 1.1298 1.0385 0.92654 0.81835 0.78637
## Proportion of Variance 0.2412 0.1839 0.1277 0.1078 0.08585 0.06697 0.06184
## Cumulative Proportion  0.2412 0.4251 0.5527 0.6606 0.74642 0.81339 0.87522
##                            PC8    PC9   PC10
## Standard deviation     0.70971 0.6504 0.5666
## Proportion of Variance 0.05037 0.0423 0.0321
## Cumulative Proportion  0.92559 0.9679 1.0000

4.1 Tabel Eigenvalue, Proporsi Varians, dan Kumulatif

eigenvalues   <- get_eigenvalue(pca_result)
tabel_varians <- data.frame(
  Komponen    = paste0("PC", 1:nrow(eigenvalues)),
  Eigenvalue  = round(eigenvalues$eigenvalue, 4),
  Pct_Varians = round(eigenvalues$variance.percent, 2),
  Kumulatif   = round(eigenvalues$cumulative.variance.percent, 2)
)
print(tabel_varians)
##    Komponen Eigenvalue Pct_Varians Kumulatif
## 1       PC1     2.4122       24.12     24.12
## 2       PC2     1.8385       18.39     42.51
## 3       PC3     1.2765       12.76     55.27
## 4       PC4     1.0784       10.78     66.06
## 5       PC5     0.8585        8.58     74.64
## 6       PC6     0.6697        6.70     81.34
## 7       PC7     0.6184        6.18     87.52
## 8       PC8     0.5037        5.04     92.56
## 9       PC9     0.4230        4.23     96.79
## 10     PC10     0.3210        3.21    100.00

4.2 Scree Plot

fviz_eig(pca_result,
         addlabels = TRUE,
         ylim      = c(0, 40),
         main      = "Scree Plot: Persentase Varians yang Dijelaskan")

4.3 Loading (Eigenvectors) 4 PC Pertama

round(pca_result$rotation[, 1:4], 3)
##         PC1   PC2    PC3    PC4
## ALB   0.444 0.099 -0.400  0.174
## ALP  -0.153 0.471  0.319  0.224
## ALT   0.033 0.454  0.152 -0.407
## AST  -0.342 0.300 -0.417 -0.226
## BIL  -0.342 0.024 -0.419  0.113
## CHE   0.445 0.260  0.123 -0.062
## CHOL  0.347 0.289  0.184 -0.044
## CREA -0.068 0.110  0.139  0.811
## GGT  -0.301 0.519 -0.073  0.073
## PROT  0.361 0.198 -0.541  0.145

4.4 Korelasi antara Variabel dan Principal Component

Dihitung menggunakan rumus: ρ(Yi, Zk) = e_ik × √λi (sesuai materi slide 18–22, untuk data yang sudah distandarisasi)

eigenvectors    <- pca_result$rotation
lambda          <- pca_result$sdev^2
m               <- 4

korelasi_pc_var <- sapply(1:m, function(i) eigenvectors[, i] * sqrt(lambda[i]))
colnames(korelasi_pc_var) <- paste0("PC", 1:m)

cat("--- Korelasi Variabel dengan PC (rho_Yi,Zk) ---\n")
## --- Korelasi Variabel dengan PC (rho_Yi,Zk) ---
print(round(korelasi_pc_var, 3))
##         PC1   PC2    PC3    PC4
## ALB   0.689 0.134 -0.452  0.181
## ALP  -0.238 0.639  0.361  0.233
## ALT   0.051 0.615  0.171 -0.423
## AST  -0.531 0.407 -0.471 -0.235
## BIL  -0.531 0.033 -0.473  0.118
## CHE   0.692 0.353  0.139 -0.065
## CHOL  0.539 0.392  0.208 -0.046
## CREA -0.106 0.149  0.157  0.842
## GGT  -0.468 0.703 -0.083  0.076
## PROT  0.561 0.269 -0.611  0.151
corrplot(korelasi_pc_var,
         method      = "color",
         is.corr     = FALSE,
         cl.lim      = c(-1, 1),
         addCoef.col = "black",
         number.cex  = 0.7,
         title       = "Korelasi Variabel dengan PC",
         mar         = c(0, 0, 2, 0),
         tl.col      = "black",
         tl.srt      = 45)

4.5 Komunalitas PCA (h²)

Komunalitas menunjukkan proporsi varians tiap variabel yang dijelaskan oleh 4 PC yang dipertahankan.

komunalitas_pca <- rowSums(korelasi_pc_var^2)
cat("--- Komunalitas PCA (h2) per Variabel ---\n")
## --- Komunalitas PCA (h2) per Variabel ---
print(round(komunalitas_pca, 3))
##   ALB   ALP   ALT   AST   BIL   CHE  CHOL  CREA   GGT  PROT 
## 0.730 0.649 0.589 0.724 0.521 0.626 0.489 0.767 0.726 0.784

4.6 Skor PC

skor_pc <- as.data.frame(pca_result$x[, 1:m])
colnames(skor_pc) <- paste0("PC", 1:m)
cat("--- Skor PC (6 observasi pertama) ---\n")
## --- Skor PC (6 observasi pertama) ---
print(round(head(skor_pc), 4))
##       PC1     PC2     PC3     PC4
## 1 -0.9941 -1.8676  0.0900  0.5930
## 2  0.8356 -0.1542  0.0803  0.0190
## 3  0.8766  0.8475 -1.0619  0.1197
## 4  0.0903 -0.4444 -0.8659  0.0790
## 5 -0.3899 -0.2577  0.6303 -0.2600
## 6  0.6141  0.2646 -0.2158  0.5701

4.7 Boxplot Distribusi Variabel Darah

data_pca %>%
  pivot_longer(everything(), names_to = "Variabel", values_to = "Nilai") %>%
  ggplot(aes(x = Variabel, y = Nilai)) +
  geom_boxplot(fill = "steelblue", alpha = 0.7,
               outlier.color = "red", outlier.size = 1) +
  facet_wrap(~Variabel, scales = "free_y", ncol = 5) +
  labs(title = "Distribusi Variabel Darah (Boxplot)", x = NULL, y = "Nilai") +
  theme_minimal() +
  theme(strip.text     = element_text(face = "bold", size = 10),
        axis.text.x    = element_blank(),
        axis.ticks.x   = element_blank(),
        plot.title     = element_text(face = "bold", hjust = 0.5))

4.8 Biplot: Pengelompokan Observasi dan Loading Variabel

fviz_pca_biplot(pca_result,
                repel    = TRUE,
                geom.ind = "point",
                col.ind  = "steelblue",
                col.var  = "red",
                label    = "var",
                title    = "Biplot PCA: Skor Observasi & Loading Variabel")


5. Factor Analysis

5.1 Unrotated Factor Matrix

Sebelum rotasi, dilihat pola loading awal untuk menilai apakah rotasi diperlukan (sesuai materi Stage 5 slide 20).

fa_unrotated <- fa(data_pca, nfactors = 3, rotate = "none", fm = "ml")

loading_unrot <- as.data.frame(unclass(fa_unrotated$loadings))
colnames(loading_unrot) <- paste0("ML", 1:3)
loading_unrot$Komunalitas <- fa_unrotated$communality

cat("--- Unrotated Factor Loading Matrix ---\n")
## --- Unrotated Factor Loading Matrix ---
print(round(loading_unrot, 3))
##         ML1    ML2    ML3 Komunalitas
## ALB  -0.501  0.480 -0.057       0.485
## ALP   0.420  0.252  0.283       0.321
## ALT   0.151  0.255  0.280       0.166
## AST   0.503  0.220 -0.300       0.391
## BIL   0.327 -0.022 -0.381       0.253
## CHE  -0.382  0.394  0.464       0.516
## CHOL -0.224  0.348  0.404       0.334
## CREA  0.118  0.050  0.043       0.018
## GGT   0.763  0.459  0.044       0.794
## PROT -0.441  0.690 -0.271       0.744

5.2 Rotasi Varimax dan Rotated Factor Matrix

Rotasi Varimax dilakukan untuk menyederhanakan kolom matriks loading — memaksimalkan loading tinggi pada satu faktor dan mendekati nol pada faktor lainnya (slide 14–15).

fa_res <- fa(data_pca, nfactors = 3, rotate = "varimax", fm = "ml")

loading_rot <- as.data.frame(unclass(fa_res$loadings))
colnames(loading_rot) <- paste0("ML", 1:3)
loading_rot$Komunalitas <- fa_res$communality

cat("--- Rotated Factor Loading Matrix (Varimax) ---\n")
## --- Rotated Factor Loading Matrix (Varimax) ---
print(round(loading_rot, 3))
##         ML1    ML2    ML3 Komunalitas
## ALB  -0.125  0.639  0.247       0.485
## ALP   0.558 -0.079  0.059       0.321
## ALT   0.357  0.044  0.191       0.166
## AST   0.387  0.062 -0.488       0.391
## BIL   0.090 -0.030 -0.493       0.253
## CHE   0.106  0.332  0.628       0.516
## CHOL  0.176  0.245  0.493       0.334
## CREA  0.132 -0.027 -0.017       0.018
## GGT   0.840  0.022 -0.298       0.794
## PROT -0.042  0.860  0.054       0.744

5.3 Perbandingan Unrotated vs Rotated Loading

cat("=== UNROTATED ===\n")
## === UNROTATED ===
print(round(as.data.frame(unclass(fa_unrotated$loadings)), 3))
##         ML1    ML2    ML3
## ALB  -0.501  0.480 -0.057
## ALP   0.420  0.252  0.283
## ALT   0.151  0.255  0.280
## AST   0.503  0.220 -0.300
## BIL   0.327 -0.022 -0.381
## CHE  -0.382  0.394  0.464
## CHOL -0.224  0.348  0.404
## CREA  0.118  0.050  0.043
## GGT   0.763  0.459  0.044
## PROT -0.441  0.690 -0.271
cat("\n=== ROTATED (Varimax) ===\n")
## 
## === ROTATED (Varimax) ===
print(round(as.data.frame(unclass(fa_res$loadings)), 3))
##         ML1    ML2    ML3
## ALB  -0.125  0.639  0.247
## ALP   0.558 -0.079  0.059
## ALT   0.357  0.044  0.191
## AST   0.387  0.062 -0.488
## BIL   0.090 -0.030 -0.493
## CHE   0.106  0.332  0.628
## CHOL  0.176  0.245  0.493
## CREA  0.132 -0.027 -0.017
## GGT   0.840  0.022 -0.298
## PROT -0.042  0.860  0.054
cat("\n--- Proporsi Varians ---\n")
## 
## --- Proporsi Varians ---
cat("Unrotated:\n")
## Unrotated:
print(round(fa_unrotated$Vaccounted, 3))
##                         ML1   ML2   ML3
## SS loadings           1.797 1.372 0.853
## Proportion Var        0.180 0.137 0.085
## Cumulative Var        0.180 0.317 0.402
## Proportion Explained  0.447 0.341 0.212
## Cumulative Proportion 0.447 0.788 1.000
cat("\nRotated:\n")
## 
## Rotated:
print(round(fa_res$Vaccounted, 3))
##                         ML1   ML2   ML3
## SS loadings           1.378 1.332 1.312
## Proportion Var        0.138 0.133 0.131
## Cumulative Var        0.138 0.271 0.402
## Proportion Explained  0.343 0.331 0.326
## Cumulative Proportion 0.343 0.674 1.000

5.4 Komunalitas Factor Analysis

Komunalitas FA diambil langsung dari hasil fa(), menunjukkan total varians tiap variabel yang dijelaskan oleh semua faktor yang dipertahankan — nilai ini tidak berubah setelah rotasi (slide 22).

komunalitas_fa <- data.frame(
  Variabel     = names(fa_res$communality),
  Unrotated_h2 = round(fa_unrotated$communality, 3),
  Rotated_h2   = round(fa_res$communality, 3)
)
print(komunalitas_fa)
##      Variabel Unrotated_h2 Rotated_h2
## ALB       ALB        0.485      0.485
## ALP       ALP        0.321      0.321
## ALT       ALT        0.166      0.166
## AST       AST        0.391      0.391
## BIL       BIL        0.253      0.253
## CHE       CHE        0.516      0.516
## CHOL     CHOL        0.334      0.334
## CREA     CREA        0.018      0.018
## GGT       GGT        0.794      0.794
## PROT     PROT        0.744      0.744

5.5 Cross-Loading Check

Variabel dengan loading signifikan (> 0.30) di lebih dari satu faktor disebut cross-loading. Rasio kuadrat loading digunakan untuk menentukan tingkat masalah (slide 10): - Rasio 1.0–1.5 → problematic cross-loading (kandidat eliminasi) - Rasio 1.5–2.0 → potential cross-loading - Rasio > 2.0 → ignorable cross-loading

loading_mat <- as.matrix(unclass(fa_res$loadings))

cross_result <- do.call(rbind, lapply(1:nrow(loading_mat), function(i) {
  idx <- which(abs(loading_mat[i, ]) >= 0.30)
  if (length(idx) < 2) return(NULL)
  ord <- order(abs(loading_mat[i, idx]), decreasing = TRUE)
  l1  <- loading_mat[i, idx[ord[1]]]; l2 <- loading_mat[i, idx[ord[2]]]
  rasio <- round(l1^2 / l2^2, 3)
  data.frame(
    Variabel      = rownames(loading_mat)[i],
    Faktor1       = colnames(loading_mat)[idx[ord[1]]], Loading1 = round(l1, 3),
    Faktor2       = colnames(loading_mat)[idx[ord[2]]], Loading2 = round(l2, 3),
    Rasio_Kuadrat = rasio,
    Status        = ifelse(rasio <= 1.5, "Problematic",
                    ifelse(rasio <= 2.0, "Potential", "Ignorable"))
  )
}))

if (is.null(cross_result)) {
  cat("Tidak ada cross-loading yang terdeteksi (cutoff = 0.30)\n")
} else {
  cat("--- Variabel dengan Cross-Loading ---\n")
  print(cross_result)
}
## --- Variabel dengan Cross-Loading ---
##   Variabel Faktor1 Loading1 Faktor2 Loading2 Rasio_Kuadrat    Status
## 1      AST     ML3   -0.488     ML1    0.387         1.592 Potential
## 2      CHE     ML3    0.628     ML2    0.332         3.578 Ignorable

5.6 Factor Diagram

fa.diagram(fa_res, main = "Struktur Faktor Variabel Tes Darah")