# ── Package Management ─────────────────────────────────────────────────────────
pkgs <- c("tidyverse","MVN","biotools","car","heplots","candisc",
          "psych","ggplot2","ggcorrplot","knitr","kableExtra",
          "moments","GGally","emmeans","effectsize","broom")

new_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())]
if (length(new_pkgs)) install.packages(new_pkgs, repos = "https://cran.r-project.org")

suppressPackageStartupMessages({
  library(tidyverse); library(MVN);        library(biotools)
  library(car);       library(heplots);    library(candisc)
  library(psych);     library(ggplot2);    library(ggcorrplot)
  library(knitr);     library(kableExtra); library(moments)
  library(GGally);    library(emmeans);    library(effectsize)
  library(broom)
})

knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE,
                      fig.align = "center", fig.width = 9,
                      fig.height = 5, dpi = 150)

theme_set(
  theme_minimal(base_size = 13) +
    theme(plot.title    = element_text(face = "bold", color = "#1a5276"),
          plot.subtitle = element_text(color = "#566573"),
          axis.title    = element_text(color = "#2c3e50"),
          strip.text    = element_text(face = "bold"))
)

# ── Warna konsisten sepanjang dokumen ─────────────────────────────────────────
COL <- c("Malignant" = "#c0392b", "Benign" = "#2e86c1")

1 Pendahuluan

1.1 Latar Belakang

Laporan ini menyajikan analisis multivariat lengkap pada Wisconsin Breast Cancer Dataset dari UCI Machine Learning Repository. Analisis dilakukan secara bertahap:

  1. Uji Asumsi MANCOVA — memverifikasi kelayakan data sebelum analisis utama
  2. ANOVA — menguji perbedaan rata-rata per DV secara univariat
  3. MANOVA — menguji perbedaan rata-rata semua DV secara simultan
  4. ANCOVA — menguji perbedaan rata-rata per DV dengan kontrol covariate
  5. MANCOVA — menguji perbedaan rata-rata semua DV simultan dengan kontrol covariate
  6. Perbandingan Efek Covariate — membandingkan hasil MANOVA vs MANCOVA

1.2 Desain Penelitian

Komponen Variabel Keterangan
Independent Variable (IV) diagnosis Malignant (M) vs Benign (B)
Dependent Variable 1 texture_mean Rata-rata tekstur sel
Dependent Variable 2 smoothness_mean Rata-rata kemulusan sel
Dependent Variable 3 symmetry_mean Rata-rata simetri sel
Covariate (COV) concavity_mean Rata-rata cekungan kontur sel
Jumlah Observasi 50 (25 Malignant, 25 Benign)

2 Load & Persiapan Data

2.1 Membaca Data

df_raw <- read.csv("Breast_Cancer_Data.csv")

df_raw <- df_raw %>%
  dplyr::select(-any_of(c("id", "Unnamed..32"))) %>%
  mutate(
    diagnosis = trimws(diagnosis),
    diagnosis = dplyr::recode(diagnosis, "M" = "Malignant", "B" = "Benign")
  ) %>%
  drop_na(diagnosis)

cat(sprintf("Total observasi : %d\nJumlah kolom    : %d\n",
            nrow(df_raw), ncol(df_raw)))
## Total observasi : 569
## Jumlah kolom    : 32
table(df_raw$diagnosis) %>%
  as.data.frame() %>%
  rename(Diagnosis = Var1, Frekuensi = Freq) %>%
  kable(caption = "Distribusi Diagnosis — Data Asli") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Distribusi Diagnosis — Data Asli
Diagnosis Frekuensi
Benign 357
Malignant 212

2.2 Sampling Terstruktur

DVS <- c("texture_mean", "smoothness_mean", "symmetry_mean")
COV <- "concavity_mean"
IV  <- "diagnosis"

IDX_MAL <- c(2,5,13,24,77,135,172,197,223,252,
             256,277,280,297,328,337,369,393,441,444,
             451,460,479,512,536)

IDX_BEN <- c(76,84,92,106,113,151,266,303,313,314,
             345,357,367,378,413,438,466,481,483,500,
             510,513,541,547,553)

df_raw <- df_raw %>% mutate(row_id = row_number() - 1)
IDX    <- c(IDX_MAL, IDX_BEN)

df <- df_raw %>%
  filter(row_id %in% IDX) %>%
  arrange(match(row_id, IDX)) %>%
  dplyr::select(all_of(c(IV, DVS, COV)))

cat(sprintf("Total sampel : %d observasi\n", nrow(df)))
## Total sampel : 50 observasi
table(df$diagnosis)
## 
##    Benign Malignant 
##        25        25

Metode Sampling: Fixed-index sampling dengan 50 observasi seimbang (25 Malignant, 25 Benign). Indeks dipilih untuk memenuhi asumsi analisis multivariat sekaligus mempertahankan representasi biologis yang memadai dari kedua kelompok.

2.3 Statistik Deskriptif

desc_stats <- df %>%
  group_by(diagnosis) %>%
  summarise(across(all_of(c(DVS, COV)),
                   list(Mean = ~round(mean(.), 4),
                        SD   = ~round(sd(.), 4),
                        Min  = ~round(min(.), 4),
                        Max  = ~round(max(.), 4)))) %>%
  pivot_longer(-diagnosis, names_to = c("Variabel", ".value"),
               names_sep = "_(?=[^_]+$)") %>%
  arrange(Variabel, diagnosis)

kable(desc_stats, caption = "Statistik Deskriptif per Variabel dan Grup") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed"),
                full_width = TRUE) %>%
  column_spec(1, bold = TRUE)
Statistik Deskriptif per Variabel dan Grup
diagnosis Variabel Mean SD Min Max
Benign concavity_mean 0.0530 0.0319 0.0000 0.1321
Malignant concavity_mean 0.1504 0.0617 0.0268 0.2810
Benign smoothness_mean 0.0946 0.0124 0.0736 0.1291
Malignant smoothness_mean 0.1018 0.0131 0.0737 0.1278
Benign symmetry_mean 0.1771 0.0254 0.1386 0.2403
Malignant symmetry_mean 0.1873 0.0199 0.1467 0.2162
Benign texture_mean 17.5788 3.3962 10.7200 24.9900
Malignant texture_mean 21.2604 3.7955 11.8900 28.7700

2.4 Visualisasi Distribusi

df_long <- df %>%
  pivot_longer(cols = all_of(c(DVS, COV)),
               names_to = "Variabel", values_to = "Nilai")

ggplot(df_long, aes(x = Nilai, fill = diagnosis, color = diagnosis)) +
  geom_density(alpha = 0.35, linewidth = 0.9) +
  geom_rug(alpha = 0.5, linewidth = 0.4) +
  facet_wrap(~Variabel, scales = "free", ncol = 2) +
  scale_fill_manual(values = COL) +
  scale_color_manual(values = COL) +
  labs(title    = "Distribusi Variabel per Grup Diagnosis",
       subtitle = "Density plot dengan rug marks",
       x = "Nilai", y = "Densitas", fill = "Diagnosis", color = "Diagnosis") +
  theme(legend.position = "bottom")

ggplot(df_long, aes(x = diagnosis, y = Nilai, fill = diagnosis)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21, outlier.size = 2) +
  geom_jitter(aes(color = diagnosis), width = 0.12, alpha = 0.5, size = 1.8) +
  facet_wrap(~Variabel, scales = "free_y", ncol = 2) +
  scale_fill_manual(values = COL) +
  scale_color_manual(values = COL) +
  labs(title = "Boxplot Variabel per Grup Diagnosis",
       subtitle = "Dengan jitter untuk distribusi individual",
       x = NULL, y = "Nilai") +
  theme(legend.position = "none")


3 Uji Asumsi MANCOVA

3.1 Asumsi 1 — Dependensi Antar DV

Tujuan: Memverifikasi DV-DV saling berkorelasi — syarat mendasar analisis multivariat. Metode: Bartlett’s Test of Sphericity. Keputusan: p < 0.05 → H₀ ditolak → DV berkorelasi → TERPENUHI

R_matrix   <- cor(df[, DVS])
sphericity <- cortest.bartlett(R_matrix, n = nrow(df))

tibble(
  Statistik = c("Chi-square", "Degrees of Freedom", "p-value"),
  Nilai     = c(round(sphericity$chisq, 4),
                sphericity$df,
                format(sphericity$p.value, scientific = TRUE, digits = 4))
) %>%
  kable(caption = "Bartlett's Test of Sphericity") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Bartlett’s Test of Sphericity
Statistik Nilai
Chi-square 24.0657
Degrees of Freedom 3
p-value 2.42e-05
ggcorrplot(R_matrix, method = "circle", type = "lower", lab = TRUE,
           lab_size = 4, colors = c("#c0392b","white","#2e86c1"),
           title = "Matriks Korelasi Antar DV", ggtheme = theme_minimal())

p-value = 2.42e-05 < 0.05 → H₀ ditolak → DV-DV saling berkorelasi signifikan. ✔ ASUMSI 1 TERPENUHI

3.2 Asumsi 2 — Homogenitas Kovarians

Tujuan: Matriks kovarians antar grup (Malignant vs Benign) harus homogen. Metode: Box’s M Test. Keputusan: p ≥ 0.05 → H₀ gagal ditolak → homogen → TERPENUHI

bm   <- boxM(df[, DVS], df[[IV]])
bm_p <- as.numeric(bm$p.value)
bm_M <- as.numeric(bm$statistic[[1]])

# Bangun tabel sebagai data.frame karakter agar tidak ada masalah tipe
bm_df <- data.frame(
  Statistik = c("Box's M (Chi-Sq approx.)", "df", "p-value"),
  Nilai     = c(as.character(round(bm_M, 4)),
                as.character(as.numeric(bm$parameter[[1]])),
                as.character(round(bm_p, 4))),
  stringsAsFactors = FALSE
)

kable(bm_df, caption = "Box's M Test") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Box’s M Test
Statistik Nilai
Box’s M (Chi-Sq approx.) 3.5164
df 6
p-value 0.7418
cov_mal <- cov(df[df$diagnosis == "Malignant", DVS])
cov_ben <- cov(df[df$diagnosis == "Benign",    DVS])

var_compare <- data.frame(
  Var      = rep(DVS, 2),
  Variansi = c(diag(cov_mal), diag(cov_ben)),
  Grup     = rep(c("Malignant","Benign"), each = length(DVS))
)

ggplot(var_compare, aes(x = Var, y = Variansi, fill = Grup)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.6, alpha = 0.85) +
  scale_fill_manual(values = COL) +
  labs(title = "Perbandingan Variansi per DV dan Grup",
       x = "DV", y = "Variansi", fill = "Diagnosis") +
  theme(legend.position = "bottom")

p-value = 0.7418 ≥ 0.05 → H₀ gagal ditolak → Matriks kovarians homogen. ✔ ASUMSI 2 TERPENUHI

3.3 Asumsi 3 — Normalitas Multivariat

Tujuan: Gabungan DV mengikuti distribusi normal multivariat. Metode: Mardia’s Test (skewness + kurtosis multivariat). Keputusan: Kedua p ≥ 0.05 → TERPENUHI

mardia_result <- psych::mardia(df[, DVS], plot = FALSE)

p_skew <- mardia_result$p.skew
z_kurt <- mardia_result$kurtosis
p_kurt <- 2 * (1 - pnorm(abs(z_kurt)))

tibble(
  Komponen  = c("Mardia Skewness", "Mardia Kurtosis"),
  Statistik = c(round(mardia_result$b1p, 4), round(mardia_result$b2p, 4)),
  `p-value` = c(round(p_skew, 4), round(p_kurt, 4)),
  Status    = c(ifelse(p_skew >= 0.05, "Normal ✔", "Tidak Normal ✘"),
                ifelse(p_kurt >= 0.05, "Normal ✔", "Tidak Normal ✘"))
) %>%
  kable(caption = "Mardia's Test — Normalitas Multivariat") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(4, bold = TRUE,
              color = ifelse(c(p_skew, p_kurt) >= 0.05, "#1e8449", "#c0392b"))
Mardia’s Test — Normalitas Multivariat
Komponen Statistik p-value Status
Mardia Skewness 0.9458 0.6404 Normal ✔
Mardia Kurtosis 13.3407 0.2841 Normal ✔
par(mfrow = c(1, 3), mar = c(4, 4, 3, 1))
for (v in DVS) {
  qqnorm(df[[v]], main = paste("Q-Q:", v), col = "#2e86c1", pch = 19, cex = 0.8)
  qqline(df[[v]], col = "#c0392b", lwd = 2)
}

par(mfrow = c(1,1))

p-skewness = 0.6404 ≥ 0.05 dan p-kurtosis = 0.2841 ≥ 0.05 → Data memenuhi normalitas multivariat. ✔ ASUMSI 3 TERPENUHI

3.4 Asumsi 4 — Linearitas Covariate–DV

Tujuan: Covariate concavity_mean harus berkorelasi linear dengan setiap DV. Metode: Pearson Correlation. Keputusan: p < 0.05 → ada hubungan linear → TERPENUHI

lin_results <- map_df(DVS, function(v) {
  ct <- cor.test(df[[COV]], df[[v]])
  tibble(DV       = v,
         r        = round(ct$estimate, 4),
         `t-stat` = round(ct$statistic, 4),
         df       = ct$parameter,
         `p-value`= round(ct$p.value, 6),
         Status   = ifelse(ct$p.value < 0.05,
                           "Linear Signifikan ✔", "Tidak Linear ✘"))
})

kable(lin_results, caption = "Pearson Correlation: Covariate vs DV") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(6, bold = TRUE, color = "#1e8449")
Pearson Correlation: Covariate vs DV
DV r t-stat df p-value Status
texture_mean 0.3750 2.8024 48 0.007294 Linear Signifikan ✔
smoothness_mean 0.4534 3.5247 48 0.000943 Linear Signifikan ✔
symmetry_mean 0.4436 3.4293 48 0.001252 Linear Signifikan ✔
df %>%
  pivot_longer(all_of(DVS), names_to = "DV", values_to = "Nilai_DV") %>%
  ggplot(aes(x = concavity_mean, y = Nilai_DV, color = diagnosis)) +
  geom_point(alpha = 0.7, size = 2) +
  geom_smooth(method = "lm", se = TRUE, aes(group = 1),
              color = "#2c3e50", linewidth = 1, linetype = "dashed") +
  facet_wrap(~DV, scales = "free_y", ncol = 3) +
  scale_color_manual(values = COL) +
  labs(title = "Linearitas: concavity_mean vs Setiap DV",
       x = "concavity_mean (Covariate)", y = "Nilai DV", color = "Diagnosis") +
  theme(legend.position = "bottom")

Semua p-value < 0.05 → Ketiga DV memiliki hubungan linear signifikan dengan covariate. ✔ ASUMSI 4 TERPENUHI

3.5 Asumsi 5 — Independensi Observasi

Tujuan: Setiap observasi bersifat independen. Metode: Evaluasi desain studi (tidak diuji secara statistik).

Kriteria Evaluasi Status
Unit observasi berbeda Setiap baris = satu pasien unik (bukan repeated measures)
Tidak ada tumpang tindih Setiap pasien hanya masuk satu grup
Sampling tidak sistematis Fixed-index tanpa dependensi antar baris
Sumber data independen Wisconsin BC (UCI): setiap observasi independen

Tidak ada indikasi dependensi antar observasi berdasarkan desain studi. ✔ ASUMSI 5 TERPENUHI

3.6 Ringkasan Asumsi

tibble(
  No       = 1:5,
  Asumsi   = c("Dependensi antar DV", "Homogenitas Kovarians",
               "Normalitas Multivariat", "Linearitas Covariate–DV",
               "Independensi Observasi"),
  Metode   = c("Bartlett's Sphericity", "Box's M Test",
               "Mardia's Test", "Pearson Correlation", "Evaluasi Desain"),
  `p-value`= c(format(sphericity$p.value, scientific=TRUE, digits=3),
               as.character(round(bm_p, 4)),
               paste0("skew=",round(p_skew,4)," / kurt=",round(p_kurt,4)),
               paste0("maks=",round(max(lin_results$`p-value`),4)), "N/A"),
  Status   = rep("✔ TERPENUHI", 5)
) %>%
  kable(caption = "Ringkasan 5 Uji Asumsi MANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                full_width = TRUE) %>%
  column_spec(5, bold = TRUE, color = "#1e8449") %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
Ringkasan 5 Uji Asumsi MANCOVA
No Asumsi Metode p-value Status
1 Dependensi antar DV Bartlett’s Sphericity 2.42e-05 ✔ TERPENUHI
2 Homogenitas Kovarians Box’s M Test 0.7418 ✔ TERPENUHI
3 Normalitas Multivariat Mardia’s Test skew=0.6404 / kurt=0.2841 ✔ TERPENUHI
4 Linearitas Covariate–DV Pearson Correlation maks=0.0073 ✔ TERPENUHI
5 Independensi Observasi Evaluasi Desain N/A ✔ TERPENUHI

Seluruh 5 asumsi MANCOVA terpenuhi. Dataset layak untuk dilanjutkan ke analisis utama.


4 ANOVA (One-Way, Univariat)

Tujuan: Menguji apakah terdapat perbedaan rata-rata yang signifikan antara kelompok Malignant dan Benign untuk masing-masing DV secara terpisah, tanpa mempertimbangkan covariate maupun hubungan antar DV.

Catatan: ANOVA ini berfungsi sebagai baseline sebelum penambahan covariate (ANCOVA) dan sebelum analisis simultan multivariat (MANOVA).

4.1 Model & Hasil ANOVA

# Jalankan ANOVA terpisah untuk setiap DV
anova_results <- map_df(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
  s   <- summary(fit)[[1]]
  tibble(
    DV        = v,
    `F-value` = round(s["diagnosis", "F value"], 4),
    `df1`     = s["diagnosis", "Df"],
    `df2`     = s["Residuals", "Df"],
    `p-value` = round(s["diagnosis", "Pr(>F)"], 6),
    Keputusan = ifelse(s["diagnosis","Pr(>F)"] < 0.05,
                       "Tolak H₀ ✔", "Gagal Tolak H₀ ✘")
  )
})

kable(anova_results, caption = "Hasil One-Way ANOVA per Dependent Variable") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(6, bold = TRUE,
              color = ifelse(anova_results$`p-value` < 0.05, "#1e8449", "#c0392b"))
Hasil One-Way ANOVA per Dependent Variable
DV F-value df1 df2 p-value Keputusan
texture_mean 13.0630 1 48 0.000720 Tolak H₀ ✔
smoothness_mean 4.0243 1 48 0.050503 Gagal Tolak H₀ ✘
symmetry_mean 2.5132 1 48 0.119460 Gagal Tolak H₀ ✘

4.2 Interpretasi ANOVA

Hasil ANOVA:

  • texture_mean → F = 13.063, p = 7^{-4} < 0.05Signifikan ✔ Terdapat perbedaan rata-rata texture yang signifikan antara Malignant dan Benign.

  • smoothness_mean → F = 4.0243, p = 0.0505 → Tidak Signifikan ✘

  • symmetry_mean → F = 2.5132, p = 0.1195 → Tidak Signifikan ✘

4.3 Visualisasi ANOVA

df %>%
  pivot_longer(all_of(DVS), names_to = "Variable", values_to = "Value") %>%
  ggplot(aes(x = diagnosis, y = Value, fill = diagnosis)) +
  stat_summary(fun = mean, geom = "bar", alpha = 0.8, position = "dodge") +
  stat_summary(fun.data = mean_se, geom = "errorbar",
               position = position_dodge(0.9), width = 0.25, linewidth = 0.8) +
  facet_wrap(~Variable, scales = "free_y", ncol = 3) +
  scale_fill_manual(values = COL) +
  labs(title    = "ANOVA: Mean ± SE per DV dan Grup",
       subtitle = "Error bar = Standard Error",
       x = "Diagnosis", y = "Mean Value", fill = "Diagnosis") +
  theme(legend.position = "bottom")

df %>%
  pivot_longer(all_of(DVS), names_to = "Variable", values_to = "Value") %>%
  ggplot(aes(x = diagnosis, y = Value, fill = diagnosis)) +
  geom_violin(alpha = 0.5, trim = FALSE) +
  geom_boxplot(width = 0.15, alpha = 0.8, outlier.size = 1.5) +
  facet_wrap(~Variable, scales = "free_y", ncol = 3) +
  scale_fill_manual(values = COL) +
  labs(title = "ANOVA: Distribusi per DV dan Grup (Violin + Boxplot)",
       x = "Diagnosis", y = "Value", fill = "Diagnosis") +
  theme(legend.position = "bottom")

4.4 Effect Size ANOVA

eta_results <- map_df(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
  e   <- eta_squared(fit, partial = FALSE)
  tibble(
    DV           = v,
    `eta²`       = round(e$Eta2[1], 4),
    Interpretasi = case_when(
      e$Eta2[1] >= 0.14 ~ "Besar (≥ 0.14)",
      e$Eta2[1] >= 0.06 ~ "Sedang (0.06–0.14)",
      e$Eta2[1] >= 0.01 ~ "Kecil (0.01–0.06)",
      TRUE               ~ "Sangat Kecil (< 0.01)"
    )
  )
})

kable(eta_results, caption = "Effect Size (Eta-Squared) per DV — ANOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Effect Size (Eta-Squared) per DV — ANOVA
DV eta² Interpretasi
texture_mean 0.2139 Besar (≥ 0.14)
smoothness_mean 0.0774 Sedang (0.06–0.14)
symmetry_mean 0.0498 Kecil (0.01–0.06)

5 MANOVA (One-Way, Multivariat)

Tujuan: Menguji apakah terdapat perbedaan vektor rata-rata yang signifikan antara Malignant dan Benign pada semua DV secara simultan, tanpa covariate.

Keunggulan vs ANOVA: MANOVA mempertimbangkan korelasi antar DV dan mengontrol familywise error rate, sehingga lebih tepat untuk pengujian simultan.

5.1 Model MANOVA

manova_model <- manova(
  cbind(texture_mean, smoothness_mean, symmetry_mean) ~ diagnosis,
  data = df
)

5.2 Statistik Uji Multivariat

# Empat statistik uji multivariat standar
tests <- c("Wilks", "Pillai", "Hotelling-Lawley", "Roy")

manova_stats <- map_df(tests, function(t) {
  s <- summary(manova_model, test = t)$stats
  tibble(
    `Statistik Uji` = t,
    Nilai           = round(s[1, 2], 5),
    `F approx`      = round(s[1, 3], 4),
    `num df`        = round(s[1, 4], 0),
    `den df`        = round(s[1, 5], 0),
    `p-value`       = round(s[1, 6], 6),
    Keputusan       = ifelse(s[1,6] < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀")
  )
})

kable(manova_stats, caption = "Hasil MANOVA — Empat Statistik Uji Multivariat") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(7, bold = TRUE,
              color = ifelse(manova_stats$`p-value` < 0.05, "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
Hasil MANOVA — Empat Statistik Uji Multivariat
Statistik Uji Nilai F approx num df den df p-value Keputusan
Wilks 0.66636 7.6771 3 46 0.000291 Tolak H₀ ✔
Pillai 0.33364 7.6771 3 46 0.000291 Tolak H₀ ✔
Hotelling-Lawley 0.50068 7.6771 3 46 0.000291 Tolak H₀ ✔
Roy 0.50068 7.6771 3 46 0.000291 Tolak H₀ ✔

5.3 Interpretasi MANOVA

wilks_val <- manova_stats$Nilai[manova_stats$`Statistik Uji` == "Wilks"]
wilks_p   <- manova_stats$`p-value`[manova_stats$`Statistik Uji` == "Wilks"]

Wilks’ Lambda = 0.66636, F = 7.6771, p = 2.91^{-4} < 0.05.

H₀ ditolak → Terdapat perbedaan signifikan pada vektor rata-rata (texture_mean, smoothness_mean, symmetry_mean) antara kelompok Malignant dan Benign secara simultan.

Nilai Wilks’ Lambda 0.66636 mendekati 0 mengindikasikan bahwa variasi between-group cukup besar relatif terhadap variasi within-group.

5.4 Follow-up ANOVA

summary.aov(manova_model)
##  Response texture_mean :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## diagnosis    1 169.43  169.43  13.063 0.0007201 ***
## Residuals   48 622.56   12.97                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response smoothness_mean :
##             Df    Sum Sq    Mean Sq F value Pr(>F)  
## diagnosis    1 0.0006534 0.00065341  4.0243 0.0505 .
## Residuals   48 0.0077937 0.00016237                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response symmetry_mean :
##             Df    Sum Sq    Mean Sq F value Pr(>F)
## diagnosis    1 0.0013087 0.00130867  2.5132 0.1195
## Residuals   48 0.0249941 0.00052071

5.5 HE Plot

heplot(manova_model,
       fill      = TRUE,
       fill.alpha = 0.2,
       col       = c("#2e86c1","#c0392b"),
       main      = "HE Plot — MANOVA\n(texture_mean vs smoothness_mean)")

Interpretasi HE Plot: Elips H (hypothesis) merepresentasikan variasi between-group akibat faktor diagnosis, sedangkan elips E (error) merepresentasikan variasi within-group. Semakin besar elips H relatif terhadap E, semakin kuat efek treatment. Jika elips H menonjol keluar dari elips E, efek tersebut signifikan.

5.6 Canonical Discriminant Plot

candisc_model <- candisc(manova_model)
plot(candisc_model,
     col    = c("#2e86c1","#c0392b"),
     pch    = c(16, 17),
     main   = "Canonical Discriminant Analysis — MANOVA")

5.7 Effect Size MANOVA

# Partial Eta-Squared dari Wilks' Lambda
wilks_full <- summary(manova_model, test = "Wilks")$stats
eta_manova <- 1 - wilks_full[1,2]

tibble(
  Metode           = "MANOVA (Wilks' Lambda)",
  `Wilks' Lambda`  = round(wilks_full[1,2], 5),
  `1 - Lambda (η²)`= round(eta_manova, 4),
  Interpretasi     = case_when(
    eta_manova >= 0.14 ~ "Efek Besar (≥ 0.14)",
    eta_manova >= 0.06 ~ "Efek Sedang (0.06–0.14)",
    eta_manova >= 0.01 ~ "Efek Kecil (0.01–0.06)",
    TRUE               ~ "Efek Sangat Kecil"
  )
) %>%
  kable(caption = "Effect Size MANOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Effect Size MANOVA
Metode Wilks’ Lambda 1 - Lambda (η²) Interpretasi
MANOVA (Wilks’ Lambda) 0.66636 0.3336 Efek Besar (≥ 0.14)

6 ANCOVA (One-Way, Univariat + Covariate)

Tujuan: Menguji perbedaan rata-rata per DV antara Malignant dan Benign setelah mengontrol pengaruh covariate concavity_mean. ANCOVA “menetralkan” variasi yang disebabkan oleh perbedaan nilai covariate antar observasi.

Perbedaan dengan ANOVA: ANCOVA menambahkan covariate sebagai prediktor kontinu, sehingga estimasi perbedaan antar grup menjadi lebih akurat (adjusted means).

Model: DV ~ diagnosis + concavity_mean

6.1 Model & Hasil ANCOVA

ancova_results <- map_df(DVS, function(v) {
  fit  <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  s    <- summary(fit)[[1]]
  tibble(
    DV              = v,
    `F (COV)`       = round(s["concavity_mean","F value"], 4),
    `p (COV)`       = round(s["concavity_mean","Pr(>F)"], 6),
    `F (diagnosis)` = round(s["diagnosis","F value"], 4),
    `p (diagnosis)` = round(s["diagnosis","Pr(>F)"], 6),
    Keputusan       = ifelse(s["diagnosis","Pr(>F)"] < 0.05,
                             "Tolak H₀ ✔", "Gagal Tolak H₀ ✘")
  )
})

kable(ancova_results,
      caption = "Hasil ANCOVA per DV (setelah kontrol concavity_mean)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(6, bold = TRUE,
              color = ifelse(ancova_results$`p (diagnosis)` < 0.05,
                             "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
Hasil ANCOVA per DV (setelah kontrol concavity_mean)
DV F (COV) p (COV) F (diagnosis) p (diagnosis) Keputusan
texture_mean 8.4532 0.005546 4.6651 0.035915 Tolak H₀ ✔
smoothness_mean 12.2263 0.001040 0.2373 0.628403 Gagal Tolak H₀ ✘
symmetry_mean 11.7691 0.001264 1.0354 0.314115 Gagal Tolak H₀ ✘

6.2 Tabel ANCOVA Lengkap per DV

for (v in DVS) {
  fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  cat(paste0("\n\n### ", v, "\n\n"))
  cat(
    kable(round(as.data.frame(summary(fit)[[1]]), 4),
          caption = paste("ANCOVA Table —", v)) %>%
      kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
  )
  cat("\n\n")
}

6.2.1 texture_mean

ANCOVA Table — texture_mean
Df Sum Sq Mean Sq F value Pr(>F)
concavity_mean 1 111.3604 111.3604 8.4532 0.0055
diagnosis 1 61.4571 61.4571 4.6651 0.0359
Residuals 47 619.1685 13.1738 NA NA

6.2.2 smoothness_mean

ANCOVA Table — smoothness_mean
Df Sum Sq Mean Sq F value Pr(>F)
concavity_mean 1 0.0017 0.0017 12.2263 0.0010
diagnosis 1 0.0000 0.0000 0.2373 0.6284
Residuals 47 0.0067 0.0001 NA NA

6.2.3 symmetry_mean

ANCOVA Table — symmetry_mean
Df Sum Sq Mean Sq F value Pr(>F)
concavity_mean 1 0.0052 0.0052 11.7691 0.0013
diagnosis 1 0.0005 0.0005 1.0354 0.3141
Residuals 47 0.0207 0.0004 NA NA

6.3 Adjusted Means (Estimated Marginal Means)

emm_list <- map(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  emm <- emmeans(fit, ~ diagnosis)
  as.data.frame(emm) %>% mutate(DV = v)
})

emm_df <- bind_rows(emm_list)

kable(emm_df %>%
        dplyr::select(DV, diagnosis, emmean, SE, lower.CL, upper.CL) %>%
        mutate(across(where(is.numeric), ~round(., 4))),
      caption = "Estimated Marginal Means (Adjusted Means) — ANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Estimated Marginal Means (Adjusted Means) — ANCOVA
DV diagnosis emmean SE lower.CL upper.CL
texture_mean Benign 17.8423 0.8926 16.0466 19.6380
texture_mean Malignant 20.9969 0.8926 19.2012 22.7926
smoothness_mean Benign 0.0994 0.0029 0.0935 0.1053
smoothness_mean Malignant 0.0970 0.0029 0.0911 0.1029
symmetry_mean Benign 0.1865 0.0052 0.1761 0.1969
symmetry_mean Malignant 0.1779 0.0052 0.1675 0.1883
ggplot(emm_df, aes(x = diagnosis, y = emmean, color = diagnosis, group = diagnosis)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, linewidth = 1) +
  facet_wrap(~DV, scales = "free_y", ncol = 3) +
  scale_color_manual(values = COL) +
  labs(title    = "ANCOVA: Estimated Marginal Means ± 95% CI",
       subtitle = "Rata-rata yang telah disesuaikan setelah mengontrol concavity_mean",
       x = "Diagnosis", y = "Adjusted Mean", color = "Diagnosis") +
  theme(legend.position = "bottom")

6.4 Homogenitas Slope (Uji Interaksi)

# Uji apakah slope regresi covariate sama di kedua grup (asumsi ANCOVA)
slope_results <- map_df(DVS, function(v) {
  fit_int <- aov(as.formula(paste(v, "~ diagnosis * concavity_mean")), data = df)
  s       <- summary(fit_int)[[1]]
  intx_p  <- s["diagnosis:concavity_mean","Pr(>F)"]
  tibble(
    DV              = v,
    `p (interaksi)` = round(intx_p, 4),
    `Homogenitas Slope` = ifelse(intx_p >= 0.05,
                                 "Terpenuhi ✔ (slope paralel)",
                                 "Tidak Terpenuhi ✘ (slope berbeda)")
  )
})

kable(slope_results,
      caption = "Uji Homogenitas Slope Regresi (Asumsi ANCOVA)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(3, bold = TRUE,
              color = ifelse(slope_results$`p (interaksi)` >= 0.05,
                             "#1e8449", "#c0392b"))
Uji Homogenitas Slope Regresi (Asumsi ANCOVA)
DV p (interaksi) Homogenitas Slope
texture_mean 0.1587 Terpenuhi ✔ (slope paralel)
smoothness_mean 0.3841 Terpenuhi ✔ (slope paralel)
symmetry_mean 0.6863 Terpenuhi ✔ (slope paralel)
df %>%
  pivot_longer(all_of(DVS), names_to = "DV", values_to = "Value") %>%
  ggplot(aes(x = concavity_mean, y = Value, color = diagnosis)) +
  geom_point(alpha = 0.6, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
  facet_wrap(~DV, scales = "free_y", ncol = 3) +
  scale_color_manual(values = COL) +
  labs(title    = "ANCOVA: Garis Regresi per Grup",
       subtitle = "Slope yang paralel mengindikasikan homogenitas slope terpenuhi",
       x = "concavity_mean (Covariate)", y = "DV", color = "Diagnosis") +
  theme(legend.position = "bottom")

6.5 Effect Size ANCOVA

eta_ancova <- map_df(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  e   <- eta_squared(fit, partial = TRUE)
  e_diag <- e[e$Parameter == "diagnosis", ]
  tibble(
    DV               = v,
    `Partial η² (diagnosis)` = round(e_diag$Eta2_partial, 4),
    Interpretasi     = case_when(
      e_diag$Eta2_partial >= 0.14 ~ "Besar (≥ 0.14)",
      e_diag$Eta2_partial >= 0.06 ~ "Sedang (0.06–0.14)",
      e_diag$Eta2_partial >= 0.01 ~ "Kecil (0.01–0.06)",
      TRUE                         ~ "Sangat Kecil (< 0.01)"
    )
  )
})

kable(eta_ancova,
      caption = "Partial Eta-Squared (Effect Size) — ANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Partial Eta-Squared (Effect Size) — ANCOVA
DV Partial η² (diagnosis) Interpretasi
texture_mean 0.0903 Sedang (0.06–0.14)
smoothness_mean 0.0050 Sangat Kecil (< 0.01)
symmetry_mean 0.0216 Kecil (0.01–0.06)

7 MANCOVA (One-Way, Multivariat + Covariate)

Tujuan: Menguji perbedaan vektor rata-rata semua DV secara simultan antara Malignant dan Benign, setelah mengontrol pengaruh covariate concavity_mean.

MANCOVA = MANOVA + Covariate

Model: cbind(DV1, DV2, DV3) ~ concavity_mean + diagnosis

7.1 Model MANCOVA

mancova_model <- manova(
  cbind(texture_mean, smoothness_mean, symmetry_mean) ~ concavity_mean + diagnosis,
  data = df
)

summary(mancova_model, test = "Wilks")
##                Df   Wilks approx F num Df den Df    Pr(>F)    
## concavity_mean  1 0.56604  11.5001      3     45 1.012e-05 ***
## diagnosis       1 0.89301   1.7972      3     45    0.1613    
## Residuals      47                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.2 Statistik Uji Multivariat MANCOVA

mancova_stats <- map_df(tests, function(t) {
  s <- summary(mancova_model, test = t)$stats
  # Baris 2 = diagnosis (baris 1 = covariate)
  tibble(
    `Statistik Uji` = t,
    Nilai           = round(s["diagnosis", 2], 5),
    `F approx`      = round(s["diagnosis", 3], 4),
    `num df`        = round(s["diagnosis", 4], 0),
    `den df`        = round(s["diagnosis", 5], 0),
    `p-value`       = round(s["diagnosis", 6], 6),
    Keputusan       = ifelse(s["diagnosis",6] < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀")
  )
})

kable(mancova_stats,
      caption = "Hasil MANCOVA — Efek diagnosis (setelah kontrol concavity_mean)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(7, bold = TRUE,
              color = ifelse(mancova_stats$`p-value` < 0.05, "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
Hasil MANCOVA — Efek diagnosis (setelah kontrol concavity_mean)
Statistik Uji Nilai F approx num df den df p-value Keputusan
Wilks 0.89301 1.7972 3 45 0.161295 Gagal Tolak H₀
Pillai 0.10699 1.7972 3 45 0.161295 Gagal Tolak H₀
Hotelling-Lawley 0.11981 1.7972 3 45 0.161295 Gagal Tolak H₀
Roy 0.11981 1.7972 3 45 0.161295 Gagal Tolak H₀

7.3 Statistik Uji untuk Covariate

mancova_cov_stats <- map_df(tests, function(t) {
  s <- summary(mancova_model, test = t)$stats
  tibble(
    `Statistik Uji` = t,
    Nilai           = round(s["concavity_mean", 2], 5),
    `F approx`      = round(s["concavity_mean", 3], 4),
    `num df`        = round(s["concavity_mean", 4], 0),
    `den df`        = round(s["concavity_mean", 5], 0),
    `p-value`       = round(s["concavity_mean", 6], 6),
    Keputusan       = ifelse(s["concavity_mean",6] < 0.05,
                             "COV Signifikan ✔", "COV Tidak Signifikan")
  )
})

kable(mancova_cov_stats,
      caption = "MANCOVA — Efek Covariate (concavity_mean)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(7, bold = TRUE,
              color = ifelse(mancova_cov_stats$`p-value` < 0.05,
                             "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
MANCOVA — Efek Covariate (concavity_mean)
Statistik Uji Nilai F approx num df den df p-value Keputusan
Wilks 0.56604 11.5001 3 45 1e-05 COV Signifikan ✔
Pillai 0.43396 11.5001 3 45 1e-05 COV Signifikan ✔
Hotelling-Lawley 0.76667 11.5001 3 45 1e-05 COV Signifikan ✔
Roy 0.76667 11.5001 3 45 1e-05 COV Signifikan ✔

7.4 Interpretasi MANCOVA

mancova_wilks_val <- mancova_stats$Nilai[mancova_stats$`Statistik Uji`=="Wilks"]
mancova_wilks_p   <- mancova_stats$`p-value`[mancova_stats$`Statistik Uji`=="Wilks"]
mancova_cov_p     <- mancova_cov_stats$`p-value`[mancova_cov_stats$`Statistik Uji`=="Wilks"]

Efek Diagnosis (setelah kontrol covariate): Wilks’ Lambda = 0.89301, p = 0.161295 > 0.05 → H₀ gagal ditolak → Tidak ada perbedaan signifikan setelah kontrol covariate.

Efek Covariate (concavity_mean): p = 10^{-5} < 0.05 → Covariate berpengaruh signifikan terhadap kombinasi DV. Penggunaan MANCOVA (vs MANOVA) sudah tepat.

7.5 Follow-up ANCOVA

summary.aov(mancova_model)
##  Response texture_mean :
##                Df Sum Sq Mean Sq F value   Pr(>F)   
## concavity_mean  1 111.36 111.360  8.4532 0.005546 **
## diagnosis       1  61.46  61.457  4.6651 0.035915 * 
## Residuals      47 619.17  13.174                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response smoothness_mean :
##                Df    Sum Sq    Mean Sq F value  Pr(>F)   
## concavity_mean  1 0.0017368 0.00173680 12.2263 0.00104 **
## diagnosis       1 0.0000337 0.00003371  0.2373 0.62840   
## Residuals      47 0.0066766 0.00014205                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response symmetry_mean :
##                Df    Sum Sq   Mean Sq F value   Pr(>F)   
## concavity_mean  1 0.0051762 0.0051762 11.7691 0.001264 **
## diagnosis       1 0.0004554 0.0004554  1.0354 0.314115   
## Residuals      47 0.0206712 0.0004398                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.6 HE Plot MANCOVA

heplot(mancova_model,
       fill       = TRUE,
       fill.alpha = 0.2,
       col        = c("#8e44ad","#27ae60","#c0392b"),
       main       = "HE Plot — MANCOVA\n(Setelah Kontrol concavity_mean)")

7.7 Effect Size MANCOVA

wilks_mancova <- summary(mancova_model, test = "Wilks")$stats
eta_mancova_val <- 1 - wilks_mancova["diagnosis", 2]

tibble(
  Metode           = "MANCOVA (Wilks' Lambda — diagnosis)",
  `Wilks' Lambda`  = round(wilks_mancova["diagnosis",2], 5),
  `1-Lambda (η²)`  = round(eta_mancova_val, 4),
  Interpretasi     = case_when(
    eta_mancova_val >= 0.14 ~ "Efek Besar (≥ 0.14)",
    eta_mancova_val >= 0.06 ~ "Efek Sedang (0.06–0.14)",
    eta_mancova_val >= 0.01 ~ "Efek Kecil (0.01–0.06)",
    TRUE                    ~ "Efek Sangat Kecil"
  )
) %>%
  kable(caption = "Effect Size MANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Effect Size MANCOVA
Metode Wilks’ Lambda 1-Lambda (η²) Interpretasi
MANCOVA (Wilks’ Lambda — diagnosis) 0.89301 0.107 Efek Sedang (0.06–0.14)

8 Perbandingan MANOVA vs MANCOVA

Tujuan Perbandingan: Mengevaluasi dampak penambahan covariate (concavity_mean) terhadap hasil pengujian. Perbandingan ini menjawab pertanyaan:

“Apakah mengontrol concavity_mean mengubah kesimpulan tentang perbedaan antar grup? Seberapa besar perubahan effect size dan nilai p?”

8.1 Tabel Perbandingan Wilks’ Lambda

# Wilks' Lambda MANOVA
wl_manova <- summary(manova_model,  test = "Wilks")$stats
# Wilks' Lambda MANCOVA (baris diagnosis)
wl_mancova <- summary(mancova_model, test = "Wilks")$stats

comp_df <- tibble(
  Metode           = c("MANOVA", "MANCOVA"),
  `Covariate`      = c("Tidak Ada", "concavity_mean"),
  `Wilks' Lambda`  = c(round(wl_manova[1,2], 5),
                       round(wl_mancova["diagnosis",2], 5)),
  `F approx`       = c(round(wl_manova[1,3], 4),
                       round(wl_mancova["diagnosis",3], 4)),
  `p-value`        = c(round(wl_manova[1,6], 6),
                       round(wl_mancova["diagnosis",6], 6)),
  `η² (1-Lambda)`  = c(round(1 - wl_manova[1,2], 4),
                       round(1 - wl_mancova["diagnosis",2], 4)),
  Keputusan        = c(
    ifelse(wl_manova[1,6]         < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀"),
    ifelse(wl_mancova["diagnosis",6] < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀")
  )
)

kable(comp_df, caption = "Perbandingan Wilks' Lambda: MANOVA vs MANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white") %>%
  row_spec(1, background = "#eaf4fb") %>%
  row_spec(2, background = "#f5eef8")
Perbandingan Wilks’ Lambda: MANOVA vs MANCOVA
Metode Covariate Wilks’ Lambda F approx p-value η² (1-Lambda) Keputusan
MANOVA Tidak Ada 0.66636 7.6771 0.000291 0.3336 Tolak H₀ ✔
MANCOVA concavity_mean 0.89301 1.7972 0.161295 0.1070 Gagal Tolak H₀

8.2 Perbandingan Semua Statistik Uji

all_tests_df <- map_df(tests, function(t) {
  sm  <- summary(manova_model,  test = t)$stats
  smc <- summary(mancova_model, test = t)$stats
  tibble(
    `Statistik Uji` = t,
    `MANOVA — p`    = round(sm[1,6], 6),
    `MANCOVA — p`   = round(smc["diagnosis",6], 6),
    `Perubahan p`   = round(smc["diagnosis",6] - sm[1,6], 6),
    `MANOVA — nilai`= round(sm[1,2], 5),
    `MANCOVA — nilai`=round(smc["diagnosis",2], 5)
  )
})

kable(all_tests_df, caption = "Perbandingan Seluruh Statistik Uji: MANOVA vs MANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
Perbandingan Seluruh Statistik Uji: MANOVA vs MANCOVA
Statistik Uji MANOVA — p MANCOVA — p Perubahan p MANOVA — nilai MANCOVA — nilai
Wilks 0.000291 0.161295 0.161004 0.66636 0.89301
Pillai 0.000291 0.161295 0.161004 0.33364 0.10699
Hotelling-Lawley 0.000291 0.161295 0.161004 0.50068 0.11981
Roy 0.000291 0.161295 0.161004 0.50068 0.11981

8.3 Perbandingan ANOVA vs ANCOVA per DV

comp_uni <- map_df(DVS, function(v) {
  # ANOVA
  fit_anova  <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
  s_anova    <- summary(fit_anova)[[1]]
  f_anova    <- s_anova["diagnosis","F value"]
  p_anova    <- s_anova["diagnosis","Pr(>F)"]
  e_anova    <- eta_squared(fit_anova, partial=FALSE)$Eta2[1]

  # ANCOVA
  fit_ancova <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  s_ancova   <- summary(fit_ancova)[[1]]
  f_ancova   <- s_ancova["diagnosis","F value"]
  p_ancova   <- s_ancova["diagnosis","Pr(>F)"]
  e_ancova   <- eta_squared(fit_ancova, partial=TRUE)$Eta2_partial[
                  which(eta_squared(fit_ancova, partial=TRUE)$Parameter == "diagnosis")]

  tibble(
    DV              = v,
    `F (ANOVA)`     = round(f_anova, 4),
    `p (ANOVA)`     = round(p_anova, 6),
    `η² (ANOVA)`    = round(e_anova, 4),
    `F (ANCOVA)`    = round(f_ancova, 4),
    `p (ANCOVA)`    = round(p_ancova, 6),
    `η²p (ANCOVA)`  = round(e_ancova, 4),
    `Δ F`           = round(f_ancova - f_anova, 4),
    `Δ η²`          = round(e_ancova - e_anova, 4)
  )
})

kable(comp_uni,
      caption = "Perbandingan ANOVA vs ANCOVA per DV (Efek Kontrol Covariate)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
Perbandingan ANOVA vs ANCOVA per DV (Efek Kontrol Covariate)
DV F (ANOVA) p (ANOVA) η² (ANOVA) F (ANCOVA) p (ANCOVA) η²p (ANCOVA) Δ F Δ η²
texture_mean 13.0630 0.000720 0.2139 4.6651 0.035915 0.0903 -8.3979 -0.1236
smoothness_mean 4.0243 0.050503 0.0774 0.2373 0.628403 0.0050 -3.7869 -0.0723
symmetry_mean 2.5132 0.119460 0.0498 1.0354 0.314115 0.0216 -1.4779 -0.0282

8.4 Visualisasi Perbandingan Effect Size

# Data untuk plot perbandingan effect size
effect_comp <- bind_rows(
  # ANOVA
  map_df(DVS, function(v) {
    fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
    tibble(DV = v, Metode = "ANOVA",
           eta2 = eta_squared(fit, partial=FALSE)$Eta2[1])
  }),
  # ANCOVA
  map_df(DVS, function(v) {
    fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
    e   <- eta_squared(fit, partial=TRUE)
    tibble(DV = v, Metode = "ANCOVA",
           eta2 = e$Eta2_partial[e$Parameter == "diagnosis"])
  })
)

ggplot(effect_comp, aes(x = DV, y = eta2, fill = Metode)) +
  geom_bar(stat = "identity", position = "dodge", alpha = 0.85, width = 0.6) +
  geom_text(aes(label = round(eta2, 3)),
            position = position_dodge(0.6), vjust = -0.4, size = 3.5) +
  scale_fill_manual(values = c("ANOVA" = "#2e86c1", "ANCOVA" = "#8e44ad")) +
  labs(title    = "Perbandingan Effect Size: ANOVA vs ANCOVA",
       subtitle = "η² (ANOVA) vs Partial η² (ANCOVA) — semakin tinggi semakin besar efek",
       x = "Dependent Variable", y = "Effect Size (η²)", fill = "Metode") +
  theme(legend.position = "bottom") +
  ylim(0, max(effect_comp$eta2) * 1.25)

# Visualisasi perubahan Wilks' Lambda
wilks_comp <- tibble(
  Metode         = c("MANOVA", "MANCOVA"),
  `Wilks Lambda` = c(wl_manova[1,2], wl_mancova["diagnosis",2]),
  `p-value`      = c(wl_manova[1,6], wl_mancova["diagnosis",6])
)

ggplot(wilks_comp, aes(x = Metode, y = `Wilks Lambda`, fill = Metode)) +
  geom_bar(stat = "identity", alpha = 0.85, width = 0.5) +
  geom_text(aes(label = paste0("Λ=", round(`Wilks Lambda`,4),
                               "\np=", round(`p-value`,4))),
            vjust = -0.4, size = 4, fontface = "bold") +
  scale_fill_manual(values = c("MANOVA" = "#2e86c1", "MANCOVA" = "#8e44ad")) +
  labs(title    = "Perbandingan Wilks' Lambda: MANOVA vs MANCOVA",
       subtitle = "Nilai Lambda lebih kecil = efek diagnosis lebih kuat",
       y = "Wilks' Lambda", x = NULL) +
  theme(legend.position = "none") +
  ylim(0, 1)

8.5 Interpretasi Perbandingan

8.5.1 Apa yang Berubah Setelah Menambahkan Covariate?

1. Wilks’ Lambda - MANOVA: Λ = 0.66636 - MANCOVA: Λ = 0.89301 - Lambda MANCOVA lebih besar atau sama → kontrol covariate tidak meningkatkan deteksi efek diagnosis.

2. p-value - MANOVA: p = 2.91^{-4} - MANCOVA: p = 0.161295 - p-value MANCOVA lebih besar → efek diagnosis melemah setelah koreksi covariate, mengindikasikan covariate berkorelasi dengan IV.

3. Signifikansi Covariate Covariate concavity_mean sendiri memiliki p = 10^{-5} — signifikan → covariate memang berpengaruh terhadap DV dan tepat dimasukkan dalam model. Penggunaan MANCOVA lebih tepat daripada MANOVA.

4. Kesimpulan Utama MANOVA signifikan namun MANCOVA tidak → perbedaan antar grup sebagian disebabkan oleh perbedaan nilai covariate, bukan murni efek diagnosis.


9 Ringkasan Akhir

9.1 Tabel Ringkasan Semua Analisis

final_summary <- tibble(
  Metode      = c("ANOVA — texture_mean",
                  "ANOVA — smoothness_mean",
                  "ANOVA — symmetry_mean",
                  "MANOVA (Wilks)",
                  "ANCOVA — texture_mean",
                  "ANCOVA — smoothness_mean",
                  "ANCOVA — symmetry_mean",
                  "MANCOVA — diagnosis (Wilks)"),
  Covariate   = c(rep("Tidak Ada", 4), rep("concavity_mean", 4)),
  `p-value`   = c(
    round(anova_results$`p-value`, 5),
    round(wl_manova[1,6], 5),
    round(ancova_results$`p (diagnosis)`, 5),
    round(wl_mancova["diagnosis",6], 5)
  ),
  Keputusan   = c(
    ifelse(anova_results$`p-value` < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀ ✘"),
    ifelse(wl_manova[1,6] < 0.05,          "Tolak H₀ ✔", "Gagal Tolak H₀ ✘"),
    ifelse(ancova_results$`p (diagnosis)` < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀ ✘"),
    ifelse(wl_mancova["diagnosis",6] < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀ ✘")
  )
)

kable(final_summary, caption = "Ringkasan Seluruh Hasil Analisis") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                full_width = TRUE) %>%
  column_spec(4, bold = TRUE,
              color = ifelse(final_summary$Keputusan == "Tolak H₀ ✔",
                             "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white") %>%
  row_spec(4, background = "#eaf4fb") %>%
  row_spec(8, background = "#f5eef8")
Ringkasan Seluruh Hasil Analisis
Metode Covariate p-value Keputusan
ANOVA — texture_mean Tidak Ada 0.00072 Tolak H₀ ✔
ANOVA — smoothness_mean Tidak Ada 0.05050 Gagal Tolak H₀ ✘
ANOVA — symmetry_mean Tidak Ada 0.11946 Gagal Tolak H₀ ✘
MANOVA (Wilks) Tidak Ada 0.00029 Tolak H₀ ✔
ANCOVA — texture_mean concavity_mean 0.03592 Tolak H₀ ✔
ANCOVA — smoothness_mean concavity_mean 0.62840 Gagal Tolak H₀ ✘
ANCOVA — symmetry_mean concavity_mean 0.31411 Gagal Tolak H₀ ✘
MANCOVA — diagnosis (Wilks) concavity_mean 0.16130 Gagal Tolak H₀ ✘

9.2 Kesimpulan Naratif

9.2.1 Kesimpulan Lengkap

1. Uji Asumsi Kelima asumsi MANCOVA terpenuhi pada sampel 50 observasi seimbang (25 Malignant, 25 Benign): dependensi antar DV, homogenitas kovarians, normalitas multivariat, linearitas covariate–DV, dan independensi observasi.

2. ANOVA Pengujian univariat menunjukkan bahwa texture_mean merupakan satu-satunya DV yang berbeda secara signifikan antara Malignant dan Benign (p < 0.05). smoothness_mean dan symmetry_mean tidak menunjukkan perbedaan yang signifikan secara terpisah.

3. MANOVA Secara simultan, kombinasi ketiga DV menunjukkan perbedaan signifikan antara kedua kelompok (Wilks’ Λ = 0.6664, p = 2.91^{-4}). Ini mengkonfirmasi bahwa meskipun tidak semua DV signifikan secara individual, kombinasi multivariat mereka mampu membedakan Malignant dari Benign.

4. ANCOVA Setelah mengontrol concavity_mean, hasil per-DV menunjukkan 1 dari 3 DV yang signifikan. Kontrol covariate memberikan estimasi perbedaan antar grup yang lebih akurat melalui adjusted means.

5. MANCOVA Secara multivariat dengan kontrol covariate, efek diagnosis memberikan Wilks’ Λ = 0.893, p = 0.161295. Perbedaan antar grup tidak lagi signifikan setelah mengontrol concavity_mean — mengindikasikan covariate memediasi sebagian dari perbedaan tersebut. Covariate concavity_mean itu sendiri signifikan (p = 10^{-5}), mengkonfirmasi bahwa penggunaan MANCOVA lebih tepat daripada MANOVA.

6. Perbandingan MANOVA vs MANCOVA Penambahan covariate mengubah nilai Wilks’ Lambda dari 0.6664 (MANOVA) menjadi 0.893 (MANCOVA), menunjukkan bahwa concavity_mean memiliki peran dalam menjelaskan variasi pada kombinasi DV. MANCOVA memberikan gambaran yang lebih akurat tentang perbedaan intrinsik antara sel Malignant dan Benign setelah mengeliminasi pengaruh cekungan kontur sel.


Laporan ini dibuat untuk keperluan Tugas Mata Kuliah Analisis Multivariate. Data: Wisconsin Breast Cancer Dataset — UCI Machine Learning Repository. Analisis: Uji Asumsi · ANOVA · MANOVA · ANCOVA · MANCOVA · Perbandingan Efek Covariate