Temuan Utama (terisi otomatis setelah analisis dijalankan):
# >> GANTI PATH SESUAI LOKASI FILE <<
data_raw <- read_excel("C:/Users/Faishal Muflih/Documents/cv fais terbaru/file_show.xlsx")
categorical_vars <- c("Gallstone Status", "Gender", "Comorbidity",
"CAD", "Hypothyroidism", "Hyperlipidemia", "DM")
data_num <- data_raw %>%
select(-any_of(categorical_vars)) %>%
select(where(is.numeric)) %>%
select(where(~ n_distinct(.) > 2)) %>%
na.omit()
n <- nrow(data_num)
p <- ncol(data_num)
cat(sprintf("Dimensi data mentah : %d baris × %d kolom\n", nrow(data_raw), ncol(data_raw)))## Dimensi data mentah : 319 baris × 39 kolom
## Variabel kategorik dibuang: 7
## Variabel numerik tersisa : 32
## Observasi valid : 319
Variabel yang dikeluarkan (binary/kategorikal): Gallstone Status, Gender, Comorbidity, CAD, Hypothyroidism, Hyperlipidemia, DM
data.frame(
Informasi = c("Sumber","Observasi (raw)","Variabel (raw)",
"Tujuan","Metode","Software"),
Detail = c(
"Data Klinis — Body Composition & Biomarker Gallstone",
paste0(nrow(data_raw), " pasien"),
paste0(ncol(data_raw), " variabel"),
"Reduksi dimensi & identifikasi struktur laten",
"PCA + VARIMAX Rotation",
paste0("R ", R.version$major, ".", R.version$minor)
)
) %>% kb(caption = "Informasi Umum Dataset")| Informasi | Detail |
|---|---|
| Sumber | Data Klinis — Body Composition & Biomarker Gallstone |
| Observasi (raw) | 319 pasien |
| Variabel (raw) | 39 variabel |
| Tujuan | Reduksi dimensi & identifikasi struktur laten |
| Metode | PCA + VARIMAX Rotation |
| Software | R 4.5.2 |
Tujuan PCA:
Syarat Desain (Hair et al., 2019): semua variabel metrik, n ≥ 50, rasio n/p ≥ 5:1, R-type analysis.
## 1. Sample Size & Ratio
## Observations: 319
## Variables: 32
## Ratio: 9.97 :1
## Status: PASSED
data.frame(
Kriteria = c("Semua variabel numerik","Obs (n >= 50)","Rasio n/p >= 5:1"),
Nilai = c("Ya", as.character(n), sprintf("%.1f:1", n / p)),
Syarat = c("Wajib", ">= 50", ">= 5:1"),
Status = c("✅ LULUS",
ifelse(n >= 50, "✅ LULUS", "❌ GAGAL"),
ifelse(n / p >= 5, "✅ LULUS", "⚠️ Perhatian"))
) %>% kb(caption = "Design Checklist")| Kriteria | Nilai | Syarat | Status |
|---|---|---|---|
| Semua variabel numerik | Ya | Wajib | ✅ LULUS | |
| Obs (n >= 50) | 319 | >= 50 | ✅ LULUS | |
| Rasio n/p >= 5:1 | 10.0:1 | >= 5:1 | ✅ LULUS | |
## 2. Descriptive Statistics
desc <- data.frame(
Variable = colnames(data_num),
Mean = round(sapply(data_num, mean, na.rm = TRUE), 3),
SD = round(sapply(data_num, sd, na.rm = TRUE), 3),
Min = round(sapply(data_num, min, na.rm = TRUE), 3),
Max = round(sapply(data_num, max, na.rm = TRUE), 3),
Skewness = round(sapply(data_num, psych::skew), 3),
Kurtosis = round(sapply(data_num, psych::kurtosi), 3)
)
print(desc, row.names = FALSE)## Variable Mean SD Min Max
## Age 48.069 12.115 20.00 96.00
## Height 167.157 10.053 145.00 191.00
## Weight 80.565 15.709 42.90 143.50
## Body Mass Index (BMI) 28.877 5.314 17.40 49.70
## Total Body Water (TBW) 40.588 7.930 13.00 66.20
## Extracellular Water (ECW) 17.071 3.162 9.00 27.80
## Intracellular Water (ICW) 23.634 5.349 13.80 57.10
## Extracellular Fluid/Total Body Water (ECF/TBW) 42.212 3.244 29.23 52.00
## Total Body Fat Ratio (TBFR) (%) 28.275 8.444 6.30 50.92
## Lean Mass (LM) (%) 71.638 8.438 48.99 93.67
## Body Protein Content (Protein) (%) 15.939 2.335 5.56 24.81
## Visceral Fat Rating (VFR) 9.078 4.333 1.00 31.00
## Bone Mass (BM) 2.803 0.509 1.40 4.00
## Muscle Mass (MM) 54.273 10.604 4.70 78.80
## Obesity (%) 35.850 109.800 0.40 1954.00
## Total Fat Content (TFC) 23.488 9.608 3.10 62.50
## Visceral Fat Area (VFA) 12.172 5.262 0.90 41.00
## Visceral Muscle Area (VMA) (Kg) 30.403 4.461 18.90 41.10
## Hepatic Fat Accumulation (HFA) 1.150 1.059 0.00 4.00
## Glucose 108.689 44.849 69.00 575.00
## Total Cholesterol (TC) 203.495 45.759 60.00 360.00
## Low Density Lipoprotein (LDL) 126.652 38.541 11.00 293.00
## High Density Lipoprotein (HDL) 49.476 17.719 25.00 273.00
## Triglyceride 144.502 97.904 1.39 838.00
## Aspartat Aminotransferaz (AST) 21.685 16.698 8.00 195.00
## Alanin Aminotransferaz (ALT) 26.856 27.884 3.00 372.00
## Alkaline Phosphatase (ALP) 73.113 24.181 7.00 197.00
## Creatinine 0.801 0.176 0.46 1.46
## Glomerular Filtration Rate (GFR) 100.819 16.971 10.60 132.00
## C-Reactive Protein (CRP) 1.854 4.990 0.00 43.40
## Hemoglobin (HGB) 14.418 1.776 8.50 18.80
## Vitamin D 21.401 9.982 3.50 53.10
## Skewness Kurtosis
## 0.133 0.349
## -0.080 -0.701
## 0.428 0.315
## 0.663 1.133
## 0.206 -0.378
## 0.020 -0.371
## 0.943 3.468
## -0.509 1.374
## 0.133 -0.525
## -0.124 -0.507
## -0.047 1.820
## 0.790 2.134
## 0.205 -0.825
## -0.102 0.359
## 16.715 288.973
## 0.802 1.115
## 1.048 2.919
## -0.056 -0.473
## 0.174 -1.357
## 5.882 45.419
## 0.430 0.485
## 0.535 1.080
## 6.468 77.178
## 2.760 12.649
## 6.929 60.062
## 7.209 76.924
## 0.788 2.507
## 0.615 0.135
## -1.796 6.391
## 5.360 33.233
## -0.381 0.133
## 0.276 -0.238
desc %>% kb(caption = "Statistik Deskriptif Variabel Numerik") %>%
scroll_box(width = "100%", height = "400px")| Variable | Mean | SD | Min | Max | Skewness | Kurtosis | |
|---|---|---|---|---|---|---|---|
| Age | Age | 48.069 | 12.115 | 20.00 | 96.00 | 0.133 | 0.349 |
| Height | Height | 167.157 | 10.053 | 145.00 | 191.00 | -0.080 | -0.701 |
| Weight | Weight | 80.565 | 15.709 | 42.90 | 143.50 | 0.428 | 0.315 |
| Body Mass Index (BMI) | Body Mass Index (BMI) | 28.877 | 5.314 | 17.40 | 49.70 | 0.663 | 1.133 |
| Total Body Water (TBW) | Total Body Water (TBW) | 40.588 | 7.930 | 13.00 | 66.20 | 0.206 | -0.378 |
| Extracellular Water (ECW) | Extracellular Water (ECW) | 17.071 | 3.162 | 9.00 | 27.80 | 0.020 | -0.371 |
| Intracellular Water (ICW) | Intracellular Water (ICW) | 23.634 | 5.349 | 13.80 | 57.10 | 0.943 | 3.468 |
| Extracellular Fluid/Total Body Water (ECF/TBW) | Extracellular Fluid/Total Body Water (ECF/TBW) | 42.212 | 3.244 | 29.23 | 52.00 | -0.509 | 1.374 |
| Total Body Fat Ratio (TBFR) (%) | Total Body Fat Ratio (TBFR) (%) | 28.275 | 8.444 | 6.30 | 50.92 | 0.133 | -0.525 |
| Lean Mass (LM) (%) | Lean Mass (LM) (%) | 71.638 | 8.438 | 48.99 | 93.67 | -0.124 | -0.507 |
| Body Protein Content (Protein) (%) | Body Protein Content (Protein) (%) | 15.939 | 2.335 | 5.56 | 24.81 | -0.047 | 1.820 |
| Visceral Fat Rating (VFR) | Visceral Fat Rating (VFR) | 9.078 | 4.333 | 1.00 | 31.00 | 0.790 | 2.134 |
| Bone Mass (BM) | Bone Mass (BM) | 2.803 | 0.509 | 1.40 | 4.00 | 0.205 | -0.825 |
| Muscle Mass (MM) | Muscle Mass (MM) | 54.273 | 10.604 | 4.70 | 78.80 | -0.102 | 0.359 |
| Obesity (%) | Obesity (%) | 35.850 | 109.800 | 0.40 | 1954.00 | 16.715 | 288.973 |
| Total Fat Content (TFC) | Total Fat Content (TFC) | 23.488 | 9.608 | 3.10 | 62.50 | 0.802 | 1.115 |
| Visceral Fat Area (VFA) | Visceral Fat Area (VFA) | 12.172 | 5.262 | 0.90 | 41.00 | 1.048 | 2.919 |
| Visceral Muscle Area (VMA) (Kg) | Visceral Muscle Area (VMA) (Kg) | 30.403 | 4.461 | 18.90 | 41.10 | -0.056 | -0.473 |
| Hepatic Fat Accumulation (HFA) | Hepatic Fat Accumulation (HFA) | 1.150 | 1.059 | 0.00 | 4.00 | 0.174 | -1.357 |
| Glucose | Glucose | 108.689 | 44.849 | 69.00 | 575.00 | 5.882 | 45.419 |
| Total Cholesterol (TC) | Total Cholesterol (TC) | 203.495 | 45.759 | 60.00 | 360.00 | 0.430 | 0.485 |
| Low Density Lipoprotein (LDL) | Low Density Lipoprotein (LDL) | 126.652 | 38.541 | 11.00 | 293.00 | 0.535 | 1.080 |
| High Density Lipoprotein (HDL) | High Density Lipoprotein (HDL) | 49.476 | 17.719 | 25.00 | 273.00 | 6.468 | 77.178 |
| Triglyceride | Triglyceride | 144.502 | 97.904 | 1.39 | 838.00 | 2.760 | 12.649 |
| Aspartat Aminotransferaz (AST) | Aspartat Aminotransferaz (AST) | 21.685 | 16.698 | 8.00 | 195.00 | 6.929 | 60.062 |
| Alanin Aminotransferaz (ALT) | Alanin Aminotransferaz (ALT) | 26.856 | 27.884 | 3.00 | 372.00 | 7.209 | 76.924 |
| Alkaline Phosphatase (ALP) | Alkaline Phosphatase (ALP) | 73.113 | 24.181 | 7.00 | 197.00 | 0.788 | 2.507 |
| Creatinine | Creatinine | 0.801 | 0.176 | 0.46 | 1.46 | 0.615 | 0.135 |
| Glomerular Filtration Rate (GFR) | Glomerular Filtration Rate (GFR) | 100.819 | 16.971 | 10.60 | 132.00 | -1.796 | 6.391 |
| C-Reactive Protein (CRP) | C-Reactive Protein (CRP) | 1.854 | 4.990 | 0.00 | 43.40 | 5.360 | 33.233 |
| Hemoglobin (HGB) | Hemoglobin (HGB) | 14.418 | 1.776 | 8.50 | 18.80 | -0.381 | 0.133 |
| Vitamin D | Vitamin D | 21.401 | 9.982 | 3.50 | 53.10 | 0.276 | -0.238 |
Variabel dengan |Skewness| > 1 menunjukkan
distribusi menceng. PCA dengan scale = TRUE
menstandardisasi data (Z-score) sehingga perbedaan satuan tidak
memengaruhi hasil.
Syarat: > 30% pasang variabel harus memiliki |r| > 0.3.
\[r_{xy} = \frac{\sum(x_i - \bar{x})(y_i - \bar{y})}{\sqrt{\sum(x_i-\bar{x})^2} \cdot \sqrt{\sum(y_i-\bar{y})^2}}\]
## 3. Correlation Matrix
high_corr <- which(abs(mat_corr) > 0.7 & upper.tri(mat_corr), arr.ind = TRUE)
cat("Pairs with |r| > 0.70:", nrow(high_corr), "\n")## Pairs with |r| > 0.70: 39
if (nrow(high_corr) > 0) {
for (i in seq_len(min(10, nrow(high_corr)))) {
r1 <- rownames(mat_corr)[high_corr[i, 1]]
r2 <- colnames(mat_corr)[high_corr[i, 2]]
cat(sprintf(" %s <--> %s r = %.3f\n", r1, r2,
mat_corr[high_corr[i, 1], high_corr[i, 2]]))
}
if (nrow(high_corr) > 10) cat(" ... and", nrow(high_corr) - 10, "more pairs\n")
}## Weight <--> Body Mass Index (BMI) r = 0.791
## Height <--> Total Body Water (TBW) r = 0.713
## Weight <--> Total Body Water (TBW) r = 0.767
## Weight <--> Extracellular Water (ECW) r = 0.789
## Total Body Water (TBW) <--> Extracellular Water (ECW) r = 0.904
## Height <--> Intracellular Water (ICW) r = 0.739
## Total Body Water (TBW) <--> Intracellular Water (ICW) r = 0.817
## Extracellular Water (ECW) <--> Intracellular Water (ICW) r = 0.784
## Body Mass Index (BMI) <--> Total Body Fat Ratio (TBFR) (%) r = 0.753
## Body Mass Index (BMI) <--> Lean Mass (LM) (%) r = -0.752
## ... and 29 more pairs
corrplot(mat_corr,
method = "color", type = "upper",
addCoef.col = "black", number.cex = 0.45,
tl.cex = 0.60, tl.col = "black",
col = colorRampPalette(c("#053061","#2166AC","#F7F7F7","#D6604D","#67001F"))(200),
title = "Correlation Matrix",
mar = c(0, 0, 2, 0))Matriks Korelasi — Merah: positif, Biru: negatif
KMO — MSA Scale: ≥ 0.90 Marvelous | ≥ 0.80 Meritorious | ≥ 0.70 Middling | ≥ 0.60 Mediocre | ≥ 0.50 Miserable | < 0.50 Unacceptable → dibuang
Prosedur: Variabel dengan MSA < 0.50 dibuang satu per satu (terkecil dahulu), KMO dihitung ulang setiap iterasi.
## 4. KMO Test
data_iter <- data_num
dropped_vars <- character(0)
repeat {
mat_iter <- cor(data_iter, use = "complete.obs")
kmo_iter <- KMO(mat_iter)
msa_iter <- data.frame(Variable = names(kmo_iter$MSAi),
MSA = round(kmo_iter$MSAi, 4)) %>% arrange(MSA)
if (nrow(filter(msa_iter, MSA < 0.50)) == 0) break
worst <- msa_iter$Variable[1]
cat("Dropping:", worst, "(MSA =", msa_iter$MSA[1], ")\n")
dropped_vars <- c(dropped_vars, worst)
data_iter <- select(data_iter, -all_of(worst))
}## Dropping: Total Cholesterol (TC) (MSA = 0.4001 )
## Dropping: Low Density Lipoprotein (LDL) (MSA = 0.3743 )
## Dropping: Extracellular Fluid/Total Body Water (ECF/TBW) (MSA = 0.4832 )
mat_final <- cor(data_iter, use = "complete.obs")
kmo_final <- KMO(mat_final)
msa_final <- data.frame(Variable = names(kmo_final$MSAi),
MSA = round(kmo_final$MSAi, 4)) %>% arrange(MSA)
cat("\nOverall MSA:", round(kmo_final$MSA, 4), "\n\n")##
## Overall MSA: 0.8388
## Variable MSA
## Alkaline Phosphatase (ALP) 0.5054
## Age 0.5266
## C-Reactive Protein (CRP) 0.5630
## Aspartat Aminotransferaz (AST) 0.5959
## Glomerular Filtration Rate (GFR) 0.5976
## Alanin Aminotransferaz (ALT) 0.6562
## Vitamin D 0.6833
## Glucose 0.7289
## Body Protein Content (Protein) (%) 0.7578
## Height 0.7711
## Visceral Fat Rating (VFR) 0.7730
## Body Mass Index (BMI) 0.7777
## Weight 0.7881
## Visceral Fat Area (VFA) 0.8111
## Lean Mass (LM) (%) 0.8318
## Triglyceride 0.8374
## Total Body Fat Ratio (TBFR) (%) 0.8462
## Intracellular Water (ICW) 0.8492
## Obesity (%) 0.8500
## Creatinine 0.8661
## Total Body Water (TBW) 0.8753
## Extracellular Water (ECW) 0.8894
## Total Fat Content (TFC) 0.9002
## High Density Lipoprotein (HDL) 0.9145
## Hepatic Fat Accumulation (HFA) 0.9378
## Hemoglobin (HGB) 0.9390
## Muscle Mass (MM) 0.9674
## Bone Mass (BM) 0.9761
## Visceral Muscle Area (VMA) (Kg) 0.9797
low_final <- filter(msa_final, MSA < 0.50)
cat("\nStatus:", ifelse(nrow(low_final) == 0, "PASSED", "REVIEW"), "\n")##
## Status: PASSED
kmo_kat <- dplyr::case_when(
kmo_final$MSA >= 0.90 ~ "Marvelous",
kmo_final$MSA >= 0.80 ~ "Meritorious",
kmo_final$MSA >= 0.70 ~ "Middling",
kmo_final$MSA >= 0.60 ~ "Mediocre",
kmo_final$MSA >= 0.50 ~ "Miserable",
TRUE ~ "Unacceptable"
)
data.frame(
Metrik = c("Variabel Awal","Variabel Dibuang (MSA < 0.50)",
"Variabel Final","KMO Overall Final","Klasifikasi","Status"),
Nilai = c(p, length(dropped_vars), ncol(data_iter),
round(kmo_final$MSA, 4), kmo_kat,
ifelse(nrow(low_final) == 0,
"✅ PASSED — Semua MSA >= 0.50",
"⚠️ REVIEW — Ada MSA < 0.50"))
) %>% kb(caption = "Ringkasan KMO Final") %>%
row_spec(6, bold = TRUE,
color = ifelse(nrow(low_final) == 0, "darkgreen", "darkorange"))| Metrik | Nilai |
|---|---|
| Variabel Awal | 32 |
| Variabel Dibuang (MSA < 0.50) | 3 |
| Variabel Final | 29 |
| KMO Overall Final | 0.8388 |
| Klasifikasi | Meritorious |
| Status | ✅ PASSED — Semua MSA >= 0.50 | |
msa_final %>%
mutate(Kategori = dplyr::case_when(
MSA >= 0.90 ~ "Marvelous",
MSA >= 0.80 ~ "Meritorious",
MSA >= 0.70 ~ "Middling",
MSA >= 0.60 ~ "Mediocre",
MSA >= 0.50 ~ "Miserable",
TRUE ~ "⚠️ Unacceptable"
)) %>%
kb(caption = "MSA per Variabel Final (urut ascending)") %>%
scroll_box(width = "100%", height = "380px")| Variable | MSA | Kategori | |
|---|---|---|---|
| Alkaline Phosphatase (ALP) | Alkaline Phosphatase (ALP) | 0.5054 | Miserable |
| Age | Age | 0.5266 | Miserable |
| C-Reactive Protein (CRP) | C-Reactive Protein (CRP) | 0.5630 | Miserable |
| Aspartat Aminotransferaz (AST) | Aspartat Aminotransferaz (AST) | 0.5959 | Miserable |
| Glomerular Filtration Rate (GFR) | Glomerular Filtration Rate (GFR) | 0.5976 | Miserable |
| Alanin Aminotransferaz (ALT) | Alanin Aminotransferaz (ALT) | 0.6562 | Mediocre |
| Vitamin D | Vitamin D | 0.6833 | Mediocre |
| Glucose | Glucose | 0.7289 | Middling |
| Body Protein Content (Protein) (%) | Body Protein Content (Protein) (%) | 0.7578 | Middling |
| Height | Height | 0.7711 | Middling |
| Visceral Fat Rating (VFR) | Visceral Fat Rating (VFR) | 0.7730 | Middling |
| Body Mass Index (BMI) | Body Mass Index (BMI) | 0.7777 | Middling |
| Weight | Weight | 0.7881 | Middling |
| Visceral Fat Area (VFA) | Visceral Fat Area (VFA) | 0.8111 | Meritorious |
| Lean Mass (LM) (%) | Lean Mass (LM) (%) | 0.8318 | Meritorious |
| Triglyceride | Triglyceride | 0.8374 | Meritorious |
| Total Body Fat Ratio (TBFR) (%) | Total Body Fat Ratio (TBFR) (%) | 0.8462 | Meritorious |
| Intracellular Water (ICW) | Intracellular Water (ICW) | 0.8492 | Meritorious |
| Obesity (%) | Obesity (%) | 0.8500 | Meritorious |
| Creatinine | Creatinine | 0.8661 | Meritorious |
| Total Body Water (TBW) | Total Body Water (TBW) | 0.8753 | Meritorious |
| Extracellular Water (ECW) | Extracellular Water (ECW) | 0.8894 | Meritorious |
| Total Fat Content (TFC) | Total Fat Content (TFC) | 0.9002 | Marvelous |
| High Density Lipoprotein (HDL) | High Density Lipoprotein (HDL) | 0.9145 | Marvelous |
| Hepatic Fat Accumulation (HFA) | Hepatic Fat Accumulation (HFA) | 0.9378 | Marvelous |
| Hemoglobin (HGB) | Hemoglobin (HGB) | 0.9390 | Marvelous |
| Muscle Mass (MM) | Muscle Mass (MM) | 0.9674 | Marvelous |
| Bone Mass (BM) | Bone Mass (BM) | 0.9761 | Marvelous |
| Visceral Muscle Area (VMA) (Kg) | Visceral Muscle Area (VMA) (Kg) | 0.9797 | Marvelous |
ggplot(msa_final %>% mutate(
Short = abbreviate(Variable, minlength = 10),
Category = dplyr::case_when(
MSA < 0.50 ~ "< 0.50",
MSA < 0.70 ~ "0.50 - 0.70",
TRUE ~ ">= 0.70"
)
), aes(x = reorder(Short, MSA), y = MSA, fill = Category)) +
geom_bar(stat = "identity", width = 0.75) +
geom_hline(yintercept = 0.50, linetype = "dashed", color = "red", linewidth = 0.8) +
geom_hline(yintercept = 0.70, linetype = "dashed", color = "orange", linewidth = 0.8) +
geom_hline(yintercept = 0.80, linetype = "dashed", color = "green", linewidth = 0.8) +
scale_fill_manual(values = c("< 0.50" = "#D73027",
"0.50 - 0.70" = "#FEE090",
">= 0.70" = "#4575B4")) +
coord_flip() +
labs(title = "KMO Per-Variable MSA",
x = "Variable", y = "MSA", fill = "Category") +
theme_minimal(base_size = 11)MSA per Variabel — semua harus >= 0.50
KMO Overall = 0.8388 (Meritorious) ✅. 3 variabel dibuang: Total Cholesterol (TC), Low Density Lipoprotein (LDL), Extracellular Fluid/Total Body Water (ECF/TBW).
H₀: Matriks korelasi = Identitas (tidak ada korelasi) | Tolak H₀ jika p < 0.05
\[\chi^2 = -\left[(n-1) - \frac{2p+5}{6}\right] \ln|R|\]
## 5. Bartlett's Test of Sphericity
bartlett_final <- cortest.bartlett(mat_final, n = n, diag = TRUE)
cat("Chi-square:", round(bartlett_final$chisq, 3), "\n")## Chi-square: 11645.02
## df: 406
## p-value: 0e+00
## Status: PASSED
b_pass <- bartlett_final$p.value < 0.05
data.frame(
Statistik = c("Chi-square (χ²)","Degrees of Freedom",
"p-value","Level Signifikansi","Keputusan"),
Nilai = c(
round(bartlett_final$chisq, 4),
bartlett_final$df,
format(bartlett_final$p.value, scientific = TRUE),
"α = 0.05",
ifelse(b_pass,
"✅ TOLAK H₀ — Korelasi signifikan, PCA justified",
"❌ GAGAL TOLAK H₀ — Data tidak cocok untuk PCA")
)
) %>% kb(caption = "Bartlett's Test of Sphericity") %>%
row_spec(5, bold = TRUE, color = ifelse(b_pass, "darkgreen","red"))| Statistik | Nilai |
|---|---|
| Chi-square (χ²) | 11645.0164 |
| Degrees of Freedom | 406 |
| p-value | 0e+00 |
| Level Signifikansi | α = 0.05 |
| Keputusan | ✅ TOLAK H₀ — Korelasi signifikan, PCA justified | |
## Assumption Test Summary
summary_df <- data.frame(
Assumption = c(
"Sample size (n >= 50)",
"Obs-to-variable ratio (>= 5:1)",
"KMO Overall MSA (>= 0.50)",
"Per-variable MSA (all >= 0.50)",
"Bartlett's Test (p < 0.05)"
),
Result = c(
as.character(n),
paste0(round(n / p, 2), ":1"),
as.character(round(kmo_final$MSA, 4)),
paste0(nrow(low_final), " variable(s) below 0.50"),
format(bartlett_final$p.value, digits = 3, scientific = TRUE)
),
Status = c(
ifelse(n >= 50, "PASSED", "FAILED"),
ifelse((n / p) >= 5, "PASSED", "FAILED"),
ifelse(kmo_final$MSA >= 0.50, "PASSED", "FAILED"),
ifelse(nrow(low_final) == 0, "PASSED", "REVIEW"),
ifelse(bartlett_final$p.value < 0.05, "PASSED", "FAILED")
)
)
print(summary_df, row.names = FALSE)## Assumption Result Status
## Sample size (n >= 50) 319 PASSED
## Obs-to-variable ratio (>= 5:1) 9.97:1 PASSED
## KMO Overall MSA (>= 0.50) 0.8388 PASSED
## Per-variable MSA (all >= 0.50) 0 variable(s) below 0.50 PASSED
## Bartlett's Test (p < 0.05) 0e+00 PASSED
summary_df %>%
kb(caption = "Ringkasan Semua Asumsi PCA") %>%
column_spec(3, bold = TRUE,
color = ifelse(grepl("PASSED", summary_df$Status),
"darkgreen","darkorange"))| Assumption | Result | Status |
|---|---|---|
| Sample size (n >= 50) | 319 | PASSED |
| Obs-to-variable ratio (>= 5:1) | 9.97:1 | PASSED |
| KMO Overall MSA (>= 0.50) | 0.8388 | PASSED |
| Per-variable MSA (all >= 0.50) | 0 variable(s) below 0.50 | PASSED |
| Bartlett’s Test (p < 0.05) | 0e+00 | PASSED |
Kesimpulan: Seluruh asumsi PCA terpenuhi ✅ — analisis dapat dilanjutkan ke tahap PCA.
# Bridge: data_pca = data_iter (29 variabel post-KMO)
data_pca <- data_iter
p_final <- ncol(data_pca)
n_final <- nrow(data_pca)
drop_log <- dropped_varsTransformasi linear: Yi = ei1·Z1 + ei2·Z2 + … + eip·Zp
Loading: ρ(Yi, Zk) = eik × √λi di mana λi = eigenvalue ke-i
pca_result <- prcomp(data_pca, scale. = TRUE, center = TRUE)
eigenvalues <- pca_result$sdev^2
prop_var <- eigenvalues / sum(eigenvalues)
cum_var <- cumsum(prop_var)
n_kaiser <- sum(eigenvalues > 1)
n_var70 <- which(cum_var >= 0.70)[1]
n_var80 <- which(cum_var >= 0.80)[1]
n_comp <- n_kaiser
cat(sprintf("Komponen dengan eigenvalue > 1 (Kaiser Rule): %d\n", n_comp))## Komponen dengan eigenvalue > 1 (Kaiser Rule): 7
## Variansi kumulatif yang dijelaskan: 76.45%
eig_df <- data.frame(
Komponen = paste0("PC", 1:length(eigenvalues)),
Eigenvalue = round(eigenvalues, 4),
`Variansi(%)` = round(prop_var * 100, 2),
`Kumulatif(%)` = round(cum_var * 100, 2),
Keputusan = ifelse(eigenvalues > 1, "✅ Retain", "❌ Drop"),
check.names = FALSE
)
head(eig_df, 15) %>%
kb(caption = "Tabel Eigenvalue — 15 Komponen Pertama") %>%
column_spec(5, bold = TRUE,
color = ifelse(head(eig_df,15)$Keputusan == "✅ Retain",
"darkgreen","red"))| Komponen | Eigenvalue | Variansi(%) | Kumulatif(%) | Keputusan |
|---|---|---|---|---|
| PC1 | 8.7859 | 30.30 | 30.30 | ✅ Retain | |
| PC2 | 6.1447 | 21.19 | 51.48 | ✅ Retain | |
| PC3 | 2.0819 | 7.18 | 58.66 | ✅ Retain | |
| PC4 | 1.6823 | 5.80 | 64.46 | ✅ Retain | |
| PC5 | 1.3655 | 4.71 | 69.17 | ✅ Retain | |
| PC6 | 1.1053 | 3.81 | 72.98 | ✅ Retain | |
| PC7 | 1.0051 | 3.47 | 76.45 | ✅ Retain | |
| PC8 | 0.9758 | 3.36 | 79.82 | ❌ Drop | |
| PC9 | 0.8816 | 3.04 | 82.86 | ❌ Drop | |
| PC10 | 0.7620 | 2.63 | 85.48 | ❌ Drop | |
| PC11 | 0.7099 | 2.45 | 87.93 | ❌ Drop | |
| PC12 | 0.6177 | 2.13 | 90.06 | ❌ Drop | |
| PC13 | 0.5336 | 1.84 | 91.90 | ❌ Drop | |
| PC14 | 0.4492 | 1.55 | 93.45 | ❌ Drop | |
| PC15 | 0.3856 | 1.33 | 94.78 | ❌ Drop | |
data.frame(
Kriteria = c("Kaiser's Rule (eigenvalue > 1)",
"Variansi Kumulatif >= 70%",
"Variansi Kumulatif >= 80%"),
N_Komponen = c(n_kaiser, n_var70, n_var80),
Kumulatif = c(sprintf("%.2f%%", cum_var[n_kaiser]*100),
sprintf("%.2f%%", cum_var[n_var70]*100),
sprintf("%.2f%%", cum_var[n_var80]*100)),
Peran = c("✅ Kriteria UTAMA","— Pendukung","— Pendukung")
) %>% kb(caption = "Perbandingan Kriteria Retensi") %>%
row_spec(1, bold = TRUE, background = "#EBF5FB")| Kriteria | N_Komponen | Kumulatif | Peran |
|---|---|---|---|
| Kaiser’s Rule (eigenvalue > 1) | 7 | 76.45% | ✅ Kriteria UTAMA | |
| Variansi Kumulatif >= 70% | 6 | 72.98% | — Pendukung |
| Variansi Kumulatif >= 80% | 9 | 82.86% | — Pendukung |
fviz_eig(pca_result, ncp = p_final, addlabels = TRUE,
barfill = "#3498DB", barcolor = "#2980B9",
linecolor = "#E74C3C") +
geom_hline(yintercept = 100 / p_final,
linetype = "dashed", color = "#E74C3C", linewidth = 0.9) +
annotate("text",
x = n_comp + 0.8,
y = eigenvalues[n_comp] / sum(eigenvalues) * 100 + 1.5,
label = sprintf("Retain PC1-PC%d\n(%.2f%%)", n_comp, cum_var[n_comp]*100),
color = "#1A5276", size = 3.5, hjust = 0) +
labs(title = "Scree Plot PCA — Gallstone Dataset",
subtitle = sprintf("Kaiser: %d komponen | Kumulatif: %.2f%%",
n_comp, cum_var[n_comp]*100)) +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))Scree Plot — Garis putus = rata-rata eigenvalue (Kaiser Rule)
Keputusan: 7 principal component dipertahankan (PC1–PC7), menjelaskan total 76.45% variansi dari 29 variabel.
loadings_mat <- pca_result$rotation[,1:n_comp] %*% diag(sqrt(eigenvalues[1:n_comp]))
colnames(loadings_mat) <- paste0("PC", 1:n_comp)
h2_u <- rowSums(loadings_mat^2)
load_df <- as.data.frame(round(loadings_mat, 3))
load_df$h2 <- round(h2_u, 3)
load_df <- load_df %>% arrange(desc(h2))
low_h2_vars <- rownames(load_df)[load_df$h2 < 0.50]load_df %>%
kb(caption = "Unrotated Loading Matrix + Communality (h²)") %>%
column_spec(n_comp + 2, bold = TRUE,
color = ifelse(load_df$h2 >= 0.70, "darkgreen",
ifelse(load_df$h2 >= 0.50, "#E67E22", "red"))) %>%
scroll_box(width = "100%", height = "450px")| PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | h2 | |
|---|---|---|---|---|---|---|---|---|
| Weight | 0.894 | -0.382 | -0.111 | 0.100 | 0.047 | 0.061 | -0.003 | 0.973 |
| Lean Mass (LM) (%) | 0.022 | 0.965 | 0.066 | 0.013 | 0.019 | -0.025 | -0.028 | 0.939 |
| Muscle Mass (MM) | 0.930 | 0.236 | -0.061 | 0.093 | 0.051 | 0.050 | -0.030 | 0.939 |
| Total Body Fat Ratio (TBFR) (%) | -0.017 | -0.964 | -0.069 | -0.010 | -0.009 | 0.019 | 0.021 | 0.936 |
| Total Fat Content (TFC) | 0.383 | -0.875 | -0.105 | 0.042 | 0.020 | 0.046 | 0.014 | 0.929 |
| Alanin Aminotransferaz (ALT) | 0.374 | 0.156 | -0.242 | -0.830 | 0.086 | 0.061 | -0.019 | 0.923 |
| Body Mass Index (BMI) | 0.523 | -0.801 | -0.034 | 0.008 | 0.032 | -0.001 | 0.015 | 0.918 |
| Visceral Fat Area (VFA) | 0.673 | -0.666 | -0.073 | 0.071 | 0.059 | 0.039 | 0.012 | 0.912 |
| Total Body Water (TBW) | 0.923 | 0.196 | -0.048 | 0.080 | 0.073 | 0.008 | 0.001 | 0.904 |
| Aspartat Aminotransferaz (AST) | 0.271 | 0.154 | -0.268 | -0.848 | 0.124 | -0.024 | -0.023 | 0.904 |
| Extracellular Water (ECW) | 0.919 | 0.054 | -0.035 | 0.056 | 0.125 | -0.097 | 0.046 | 0.879 |
| Visceral Fat Rating (VFR) | 0.711 | -0.485 | 0.320 | -0.074 | 0.130 | -0.092 | 0.022 | 0.875 |
| Intracellular Water (ICW) | 0.860 | 0.302 | -0.119 | 0.099 | 0.022 | 0.055 | -0.034 | 0.859 |
| Height | 0.643 | 0.592 | -0.137 | 0.155 | 0.015 | 0.129 | -0.018 | 0.824 |
| Visceral Muscle Area (VMA) (Kg) | 0.860 | 0.220 | -0.032 | 0.071 | 0.027 | 0.087 | -0.021 | 0.803 |
| Glomerular Filtration Rate (GFR) | -0.084 | 0.116 | -0.818 | 0.109 | -0.219 | -0.172 | 0.116 | 0.794 |
| Bone Mass (BM) | 0.848 | 0.213 | -0.040 | 0.121 | 0.064 | -0.032 | -0.012 | 0.785 |
| Age | 0.019 | -0.330 | 0.749 | -0.208 | 0.169 | -0.093 | 0.003 | 0.751 |
| Obesity (%) | 0.044 | -0.208 | 0.005 | -0.097 | -0.223 | -0.245 | -0.756 | 0.736 |
| Creatinine | 0.448 | 0.454 | 0.508 | -0.019 | 0.115 | 0.090 | -0.111 | 0.698 |
| Body Protein Content (Protein) (%) | -0.021 | 0.722 | 0.271 | -0.011 | -0.024 | 0.155 | -0.080 | 0.626 |
| Vitamin D | -0.010 | 0.144 | 0.151 | 0.017 | 0.397 | -0.496 | 0.422 | 0.626 |
| Triglyceride | 0.431 | 0.026 | 0.180 | -0.076 | -0.588 | -0.088 | 0.084 | 0.584 |
| Glucose | 0.201 | -0.085 | 0.345 | -0.049 | -0.611 | -0.168 | 0.074 | 0.575 |
| Hemoglobin (HGB) | 0.580 | 0.438 | 0.061 | -0.054 | -0.120 | -0.109 | -0.009 | 0.561 |
| Alkaline Phosphatase (ALP) | 0.065 | -0.150 | 0.144 | -0.301 | -0.347 | 0.360 | 0.397 | 0.546 |
| C-Reactive Protein (CRP) | -0.033 | -0.200 | 0.052 | 0.081 | 0.078 | 0.680 | -0.066 | 0.524 |
| Hepatic Fat Accumulation (HFA) | 0.525 | -0.386 | 0.059 | -0.051 | -0.087 | -0.159 | -0.046 | 0.466 |
| High Density Lipoprotein (HDL) | -0.410 | -0.214 | 0.000 | -0.057 | 0.354 | 0.041 | -0.195 | 0.382 |
load_df %>% select(-h2) %>%
rownames_to_column("Variable") %>%
pivot_longer(-Variable, names_to = "PC", values_to = "Loading") %>%
ggplot(aes(x = PC, y = Variable, fill = Loading)) +
geom_tile(color = "white") +
geom_text(aes(label = sprintf("%.2f", Loading)), size = 2.5) +
scale_fill_gradient2(low = "#2166AC", mid = "white", high = "#D6604D",
midpoint = 0, limits = c(-1,1)) +
labs(title = "Heatmap Component Loadings (Unrotated)",
x = "Principal Component", y = NULL) +
theme_minimal(base_size = 9) +
theme(axis.text.y = element_text(size = 7))Heatmap Loading Unrotated
VARIMAX (ortogonal): Memaksimalkan variansi loading per kolom. Total variansi tidak berubah — hanya redistribusi antar faktor untuk interpretasi lebih mudah.
pca_rot <- principal(data_pca, nfactors = n_comp,
rotate = "varimax", scores = TRUE)
rot_load <- unclass(pca_rot$loadings)[, 1:n_comp]
colnames(rot_load) <- paste0("RC", 1:n_comp)
rot_df <- as.data.frame(round(rot_load, 3))
rot_df$h2 <- round(pca_rot$communality, 3)rot_df %>%
kb(caption = "Rotated Component Matrix (VARIMAX) + Communality") %>%
scroll_box(width = "100%", height = "450px")| RC1 | RC2 | RC3 | RC4 | RC5 | RC6 | RC7 | h2 | |
|---|---|---|---|---|---|---|---|---|
| Age | -0.161 | 0.216 | 0.807 | 0.008 | 0.106 | -0.126 | 0.002 | 0.751 |
| Height | 0.825 | -0.339 | -0.144 | 0.037 | 0.022 | 0.062 | -0.050 | 0.824 |
| Weight | 0.733 | 0.649 | 0.027 | 0.045 | 0.073 | 0.083 | 0.010 | 0.973 |
| Body Mass Index (BMI) | 0.231 | 0.917 | 0.115 | 0.020 | 0.066 | 0.066 | 0.033 | 0.918 |
| Total Body Water (TBW) | 0.934 | 0.115 | 0.030 | 0.100 | 0.079 | -0.035 | -0.011 | 0.904 |
| Extracellular Water (ECW) | 0.879 | 0.258 | 0.065 | 0.108 | 0.063 | -0.143 | -0.017 | 0.879 |
| Intracellular Water (ICW) | 0.916 | -0.001 | -0.065 | 0.095 | 0.076 | 0.024 | 0.009 | 0.859 |
| Total Body Fat Ratio (TBFR) (%) | -0.319 | 0.903 | 0.041 | -0.053 | -0.005 | 0.116 | 0.028 | 0.936 |
| Lean Mass (LM) (%) | 0.326 | -0.902 | -0.041 | 0.052 | -0.006 | -0.121 | -0.021 | 0.939 |
| Body Protein Content (Protein) (%) | 0.199 | -0.737 | 0.180 | 0.004 | 0.053 | 0.086 | -0.021 | 0.626 |
| Visceral Fat Rating (VFR) | 0.471 | 0.640 | 0.459 | 0.074 | 0.135 | -0.092 | 0.020 | 0.875 |
| Bone Mass (BM) | 0.876 | 0.077 | 0.015 | 0.046 | 0.073 | -0.066 | 0.014 | 0.785 |
| Muscle Mass (MM) | 0.958 | 0.076 | 0.011 | 0.095 | 0.082 | 0.015 | 0.006 | 0.939 |
| Obesity (%) | -0.051 | 0.151 | 0.039 | 0.059 | 0.090 | 0.069 | 0.832 | 0.736 |
| Total Fat Content (TFC) | 0.093 | 0.949 | 0.038 | -0.021 | 0.021 | 0.126 | 0.022 | 0.929 |
| Visceral Fat Area (VFA) | 0.432 | 0.842 | 0.078 | 0.007 | 0.044 | 0.086 | 0.012 | 0.912 |
| Visceral Muscle Area (VMA) (Kg) | 0.881 | 0.061 | 0.032 | 0.095 | 0.100 | 0.052 | -0.012 | 0.803 |
| Hepatic Fat Accumulation (HFA) | 0.328 | 0.516 | 0.123 | 0.071 | 0.212 | -0.082 | 0.142 | 0.466 |
| Glucose | 0.041 | 0.076 | 0.150 | -0.074 | 0.727 | -0.050 | 0.098 | 0.575 |
| High Density Lipoprotein (HDL) | -0.401 | 0.072 | 0.124 | 0.010 | -0.433 | 0.042 | 0.107 | 0.382 |
| Triglyceride | 0.298 | 0.064 | 0.025 | 0.040 | 0.697 | 0.009 | 0.060 | 0.584 |
| Aspartat Aminotransferaz (AST) | 0.155 | -0.023 | -0.037 | 0.934 | -0.024 | -0.065 | 0.026 | 0.904 |
| Alanin Aminotransferaz (ALT) | 0.251 | -0.004 | -0.012 | 0.927 | 0.024 | 0.019 | 0.000 | 0.923 |
| Alkaline Phosphatase (ALP) | -0.098 | 0.124 | 0.096 | 0.228 | 0.446 | 0.293 | -0.419 | 0.546 |
| Creatinine | 0.540 | -0.368 | 0.512 | 0.016 | 0.086 | 0.018 | 0.024 | 0.698 |
| Glomerular Filtration Rate (GFR) | -0.007 | -0.003 | -0.881 | 0.060 | -0.017 | -0.120 | 0.001 | 0.794 |
| C-Reactive Protein (CRP) | -0.033 | 0.122 | 0.133 | -0.078 | -0.173 | 0.649 | -0.182 | 0.524 |
| Hemoglobin (HGB) | 0.635 | -0.237 | 0.030 | 0.145 | 0.248 | -0.125 | 0.055 | 0.561 |
| Vitamin D | 0.033 | -0.070 | 0.159 | -0.028 | -0.162 | -0.692 | -0.296 | 0.626 |
data.frame(
Unrotated = paste0("PC", 1:n_comp),
Var_Unrot = paste0(round(prop_var[1:n_comp]*100, 2), "%"),
Rotated = paste0("RC", 1:n_comp),
Var_Rot = paste0(round(pca_rot$Vaccounted[2, 1:n_comp]*100, 2), "%"),
Kumulatif = paste0(round(pca_rot$Vaccounted[3, 1:n_comp]*100, 2), "%")
) %>% kb(caption = "Perbandingan Variansi Unrotated vs Rotated (Total tidak berubah)")| Unrotated | Var_Unrot | Rotated | Var_Rot | Kumulatif |
|---|---|---|---|---|
| PC1 | 30.3% | RC1 | 27.8% | 27.8% |
| PC2 | 21.19% | RC2 | 21.52% | 49.32% |
| PC3 | 7.18% | RC3 | 7.24% | 56.56% |
| PC4 | 5.8% | RC4 | 6.54% | 63.1% |
| PC5 | 4.71% | RC5 | 5.73% | 68.83% |
| PC6 | 3.81% | RC6 | 4.02% | 72.84% |
| PC7 | 3.47% | RC7 | 3.61% | 76.45% |
rot_df %>% select(-h2) %>%
rownames_to_column("Variable") %>%
pivot_longer(-Variable, names_to = "RC", values_to = "Loading") %>%
ggplot(aes(x = RC, y = Variable, fill = Loading)) +
geom_tile(color = "white") +
geom_text(aes(label = ifelse(abs(Loading) >= 0.30,
sprintf("%.2f", Loading), "")),
size = 2.5) +
scale_fill_gradient2(low = "#2166AC", mid = "white", high = "#D6604D",
midpoint = 0, limits = c(-1,1)) +
labs(title = "Heatmap Rotated Component Loadings (VARIMAX)",
subtitle = "Teks jika |loading| >= 0.30",
x = "Rotated Component", y = NULL) +
theme_minimal(base_size = 9) +
theme(axis.text.y = element_text(size = 7))Heatmap VARIMAX — teks jika |loading| >= 0.30
dom_list <- lapply(1:n_comp, function(k) {
ld <- rot_load[, k]
dom <- sort(ld[abs(ld) > 0.40], decreasing = TRUE)
if (length(dom) > 0)
data.frame(Faktor = paste0("RC", k), Variabel = names(dom),
Loading = round(dom, 4))
})
bind_rows(dom_list) %>%
kb(caption = "Variabel Dominan per Faktor (|loading| > 0.40)") %>%
scroll_box(width = "100%", height = "400px")| Faktor | Variabel | Loading | |
|---|---|---|---|
| Muscle Mass (MM) | RC1 | Muscle Mass (MM) | 0.9577 |
| Total Body Water (TBW) | RC1 | Total Body Water (TBW) | 0.9341 |
| Intracellular Water (ICW) | RC1 | Intracellular Water (ICW) | 0.9160 |
| Visceral Muscle Area (VMA) (Kg) | RC1 | Visceral Muscle Area (VMA) (Kg) | 0.8809 |
| Extracellular Water (ECW) | RC1 | Extracellular Water (ECW) | 0.8787 |
| Bone Mass (BM) | RC1 | Bone Mass (BM) | 0.8760 |
| Height | RC1 | Height | 0.8245 |
| Weight…8 | RC1 | Weight | 0.7331 |
| Hemoglobin (HGB) | RC1 | Hemoglobin (HGB) | 0.6349 |
| Creatinine…10 | RC1 | Creatinine | 0.5403 |
| Visceral Fat Rating (VFR)…11 | RC1 | Visceral Fat Rating (VFR) | 0.4712 |
| Visceral Fat Area (VFA)…12 | RC1 | Visceral Fat Area (VFA) | 0.4320 |
| High Density Lipoprotein (HDL)…13 | RC1 | High Density Lipoprotein (HDL) | -0.4015 |
| Total Fat Content (TFC) | RC2 | Total Fat Content (TFC) | 0.9495 |
| Body Mass Index (BMI) | RC2 | Body Mass Index (BMI) | 0.9170 |
| Total Body Fat Ratio (TBFR) (%) | RC2 | Total Body Fat Ratio (TBFR) (%) | 0.9031 |
| Visceral Fat Area (VFA)…17 | RC2 | Visceral Fat Area (VFA) | 0.8422 |
| Weight…18 | RC2 | Weight | 0.6487 |
| Visceral Fat Rating (VFR)…19 | RC2 | Visceral Fat Rating (VFR) | 0.6398 |
| Hepatic Fat Accumulation (HFA) | RC2 | Hepatic Fat Accumulation (HFA) | 0.5162 |
| Body Protein Content (Protein) (%) | RC2 | Body Protein Content (Protein) (%) | -0.7370 |
| Lean Mass (LM) (%) | RC2 | Lean Mass (LM) (%) | -0.9017 |
| Age | RC3 | Age | 0.8073 |
| Creatinine…24 | RC3 | Creatinine | 0.5121 |
| Visceral Fat Rating (VFR)…25 | RC3 | Visceral Fat Rating (VFR) | 0.4591 |
| Glomerular Filtration Rate (GFR) | RC3 | Glomerular Filtration Rate (GFR) | -0.8805 |
| Aspartat Aminotransferaz (AST) | RC4 | Aspartat Aminotransferaz (AST) | 0.9343 |
| Alanin Aminotransferaz (ALT) | RC4 | Alanin Aminotransferaz (ALT) | 0.9266 |
| Glucose | RC5 | Glucose | 0.7266 |
| Triglyceride | RC5 | Triglyceride | 0.6969 |
| Alkaline Phosphatase (ALP)…31 | RC5 | Alkaline Phosphatase (ALP) | 0.4459 |
| High Density Lipoprotein (HDL)…32 | RC5 | High Density Lipoprotein (HDL) | -0.4327 |
| C-Reactive Protein (CRP) | RC6 | C-Reactive Protein (CRP) | 0.6490 |
| Vitamin D | RC6 | Vitamin D | -0.6924 |
| Obesity (%) | RC7 | Obesity (%) | 0.8323 |
| Alkaline Phosphatase (ALP)…36 | RC7 | Alkaline Phosphatase (ALP) | -0.4189 |
fviz_pca_biplot(pca_result,
axes = c(1,2), geom.ind = "point",
col.ind = "grey60", alpha.ind = 0.3,
col.var = "contrib",
gradient.cols = c("#00AFBB","#E7B800","#FC4E07"),
repel = TRUE, label = "var",
arrowsize = 0.8, labelsize = 3.2) +
labs(title = "Biplot PCA: PC1 vs PC2",
subtitle = sprintf("PC1=%.1f%% | PC2=%.1f%% | Total=%.1f%%",
prop_var[1]*100, prop_var[2]*100,
(prop_var[1]+prop_var[2])*100)) +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))Biplot PCA — panjang & arah panah = korelasi variabel dengan PC
if (n_comp >= 3) {
fviz_pca_biplot(pca_result, axes = c(1,3),
geom.ind = "point", col.ind = "grey60", alpha.ind = 0.3,
col.var = "contrib",
gradient.cols = c("#00AFBB","#E7B800","#FC4E07"),
repel = TRUE, label = "var") +
labs(title = "Biplot PCA: PC1 vs PC3",
subtitle = sprintf("PC1=%.1f%% | PC3=%.1f%%",
prop_var[1]*100, prop_var[3]*100)) +
theme_minimal(base_size = 12)
}Biplot PCA — PC1 vs PC3
Data dibagi 50:50 secara acak. Selisih variansi per faktor < 5% → hasil stabil.
set.seed(42)
idx_split <- sample(1:nrow(data_pca), nrow(data_pca)/2)
data_s1 <- data_pca[ idx_split, ]
data_s2 <- data_pca[-idx_split, ]
fa_s1 <- principal(data_s1, nfactors = n_comp, rotate = "varimax")
fa_s2 <- principal(data_s2, nfactors = n_comp, rotate = "varimax")
n_show <- min(n_comp, 5)
var_s1 <- round(fa_s1$Vaccounted[2, 1:n_show]*100, 2)
var_s2 <- round(fa_s2$Vaccounted[2, 1:n_show]*100, 2)
sel_s <- round(abs(var_s1 - var_s2), 2)
data.frame(
Faktor = paste0("RC", 1:n_show),
`Sampel1` = paste0(var_s1, "%"),
`Sampel2` = paste0(var_s2, "%"),
Selisih = paste0(sel_s, "%"),
Konsisten = ifelse(sel_s < 5, "✅ Ya", "❌ Tidak"),
check.names = FALSE
) %>%
kb(caption = sprintf("Validasi Split-Sample (n1=%d vs n2=%d)",
nrow(data_s1), nrow(data_s2))) %>%
column_spec(5, bold = TRUE, color = ifelse(sel_s < 5, "darkgreen","red"))| Faktor | Sampel1 | Sampel2 | Selisih | Konsisten | |
|---|---|---|---|---|---|
| RC1 | RC1 | 27.55% | 27.96% | 0.41% | ✅ Ya | |
| RC2 | RC2 | 25.22% | 20.8% | 4.42% | ✅ Ya | |
| RC3 | RC3 | 7.12% | 7.6% | 0.48% | ✅ Ya | |
| RC4 | RC4 | 6.59% | 6.75% | 0.16% | ✅ Ya | |
| RC5 | RC5 | 5.31% | 6.61% | 1.3% | ✅ Ya | |
Selisih terbesar = 4.42%. Hasil konsisten ✅ (semua selisih < 5%).
pc_scores <- as.data.frame(pca_result$x[, 1:n_comp])
colnames(pc_scores) <- paste0("PC", 1:n_comp)
fa_scores <- as.data.frame(pca_rot$scores)
colnames(fa_scores) <- paste0("Factor_", 1:n_comp)
cat("Verifikasi independensi PC Scores (harus ≈ 0):\n")## Verifikasi independensi PC Scores (harus ≈ 0):
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## PC1 1 0 0 0 0 0 0
## PC2 0 1 0 0 0 0 0
## PC3 0 0 1 0 0 0 0
## PC4 0 0 0 1 0 0 0
## PC5 0 0 0 0 1 0 0
## PC6 0 0 0 0 0 1 0
## PC7 0 0 0 0 0 0 1
write.csv(cbind(data_pca, pc_scores), "OUTPUT_PCA_scores.csv", row.names = FALSE)
write.csv(cbind(data_pca, fa_scores), "OUTPUT_Factor_scores.csv", row.names = FALSE)
cat("File output berhasil disimpan.\n")## File output berhasil disimpan.
kmo_kat_f <- dplyr::case_when(
kmo_final$MSA >= 0.90 ~ "Marvelous",
kmo_final$MSA >= 0.80 ~ "Meritorious",
kmo_final$MSA >= 0.70 ~ "Middling",
kmo_final$MSA >= 0.60 ~ "Mediocre",
kmo_final$MSA >= 0.50 ~ "Miserable",
TRUE ~ "Unacceptable"
)
data.frame(
Metrik = c(
"Dataset","Observasi (n)","Variabel Awal (numerik)","Variabel Dibuang (KMO)",
"Variabel Final PCA","KMO Overall (Final)","Bartlett p-value",
"Komponen Dipertahankan","Variansi Total Dijelaskan","Validasi Split-Sample"
),
Nilai = c(
"Body Composition & Biomarker — Gallstone",
as.character(n), as.character(p),
ifelse(length(drop_log)>0,
paste0(length(drop_log)," var: ",paste(drop_log,collapse=", ")),
"Tidak ada (semua MSA >= 0.50)"),
as.character(p_final),
sprintf("%.4f (%s)", kmo_final$MSA, kmo_kat_f),
format(bartlett_final$p.value, digits=3, scientific=TRUE),
sprintf("%d komponen (PC1-PC%d)", n_comp, n_comp),
sprintf("%.2f%%", cum_var[n_comp]*100),
ifelse(max(sel_s)<5, "Konsisten ✅","Ada perbedaan >5% ⚠️")
)
) %>%
kb(caption = "Ringkasan Akhir Hasil Analisis PCA") %>%
column_spec(1, bold = TRUE, width = "22em") %>%
row_spec(c(8,9,10), background = "#EBF5FB", bold = TRUE)| Metrik | Nilai |
|---|---|
| Dataset | Body Composition & Biomarker — Gallstone |
| Observasi (n) | 319 |
| Variabel Awal (numerik) | 32 |
| Variabel Dibuang (KMO) | 3 var: Total Cholesterol (TC), Low Density Lipoprotein (LDL), Extracellular Fluid/Total Body Water (ECF/TBW) |
| Variabel Final PCA | 29 |
| KMO Overall (Final) | 0.8388 (Meritorious) |
| Bartlett p-value | 0e+00 |
| Komponen Dipertahankan | 7 komponen (PC1-PC7) |
| Variansi Total Dijelaskan | 76.45% |
| Validasi Split-Sample | Konsisten ✅ | |
data.frame(
Komponen = paste0("PC", 1:n_comp),
Eigenvalue = round(eigenvalues[1:n_comp], 4),
`Var(%)` = round(prop_var[1:n_comp]*100, 2),
`Kumulatif(%)` = round(cum_var[1:n_comp]*100, 2),
check.names = FALSE
) %>% kb(caption = "Variansi per Komponen yang Dipertahankan")| Komponen | Eigenvalue | Var(%) | Kumulatif(%) |
|---|---|---|---|
| PC1 | 8.7859 | 30.30 | 30.30 |
| PC2 | 6.1447 | 21.19 | 51.48 |
| PC3 | 2.0819 | 7.18 | 58.66 |
| PC4 | 1.6823 | 5.80 | 64.46 |
| PC5 | 1.3655 | 4.71 | 69.17 |
| PC6 | 1.1053 | 3.81 | 72.98 |
| PC7 | 1.0051 | 3.47 | 76.45 |
KMO = 0.839 (Meritorious) ✅, Bartlett p ≈ 0 ✅, semua asumsi PASSED. 3 variabel dibuang via KMO iteratif → 29 variabel final.
7 komponen menjelaskan 76.45% variansi dari 29 variabel.
Validasi split-sample: selisih maks 4.42% → hasil stabil ✅.
Tanggal Analisis: 28 February 2026
Analisis Multivariat — PCA Gallstone Dataset