# ── Package Management ─────────────────────────────────────────────────────────
pkgs <- c("tidyverse","MVN","biotools","car","heplots","candisc",
"psych","ggplot2","ggcorrplot","knitr","kableExtra",
"moments","GGally","emmeans","effectsize","broom")
new_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())]
if (length(new_pkgs)) install.packages(new_pkgs, repos = "https://cran.r-project.org")
suppressPackageStartupMessages({
library(tidyverse); library(MVN); library(biotools)
library(car); library(heplots); library(candisc)
library(psych); library(ggplot2); library(ggcorrplot)
library(knitr); library(kableExtra); library(moments)
library(GGally); library(emmeans); library(effectsize)
library(broom)
})
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE,
fig.align = "center", fig.width = 9,
fig.height = 5, dpi = 150)
theme_set(
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold", color = "#1a5276"),
plot.subtitle = element_text(color = "#566573"),
axis.title = element_text(color = "#2c3e50"),
strip.text = element_text(face = "bold"))
)
# ── Warna konsisten sepanjang dokumen ─────────────────────────────────────────
COL <- c("Malignant" = "#c0392b", "Benign" = "#2e86c1")Laporan ini menyajikan analisis multivariat lengkap pada Wisconsin Breast Cancer Dataset dari UCI Machine Learning Repository. Analisis dilakukan secara bertahap:
| Komponen | Variabel | Keterangan |
|---|---|---|
| Independent Variable (IV) | diagnosis |
Malignant (M) vs Benign (B) |
| Dependent Variable 1 | texture_mean |
Rata-rata tekstur sel |
| Dependent Variable 2 | smoothness_mean |
Rata-rata kemulusan sel |
| Dependent Variable 3 | symmetry_mean |
Rata-rata simetri sel |
| Covariate (COV) | concavity_mean |
Rata-rata cekungan kontur sel |
| Jumlah Observasi | — | 50 (25 Malignant, 25 Benign) |
df_raw <- read.csv("Breast_Cancer_Data.csv")
df_raw <- df_raw %>%
dplyr::select(-any_of(c("id", "Unnamed..32"))) %>%
mutate(
diagnosis = trimws(diagnosis),
diagnosis = dplyr::recode(diagnosis, "M" = "Malignant", "B" = "Benign")
) %>%
drop_na(diagnosis)
cat(sprintf("Total observasi : %d\nJumlah kolom : %d\n",
nrow(df_raw), ncol(df_raw)))## Total observasi : 569
## Jumlah kolom : 32
table(df_raw$diagnosis) %>%
as.data.frame() %>%
rename(Diagnosis = Var1, Frekuensi = Freq) %>%
kable(caption = "Distribusi Diagnosis — Data Asli") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Diagnosis | Frekuensi |
|---|---|
| Benign | 357 |
| Malignant | 212 |
DVS <- c("texture_mean", "smoothness_mean", "symmetry_mean")
COV <- "concavity_mean"
IV <- "diagnosis"
IDX_MAL <- c(2,5,13,24,77,135,172,197,223,252,
256,277,280,297,328,337,369,393,441,444,
451,460,479,512,536)
IDX_BEN <- c(76,84,92,106,113,151,266,303,313,314,
345,357,367,378,413,438,466,481,483,500,
510,513,541,547,553)
df_raw <- df_raw %>% mutate(row_id = row_number() - 1)
IDX <- c(IDX_MAL, IDX_BEN)
df <- df_raw %>%
filter(row_id %in% IDX) %>%
arrange(match(row_id, IDX)) %>%
dplyr::select(all_of(c(IV, DVS, COV)))
cat(sprintf("Total sampel : %d observasi\n", nrow(df)))## Total sampel : 50 observasi
##
## Benign Malignant
## 25 25
Metode Sampling: Fixed-index sampling dengan 50 observasi seimbang (25 Malignant, 25 Benign). Indeks dipilih untuk memenuhi asumsi analisis multivariat sekaligus mempertahankan representasi biologis yang memadai dari kedua kelompok.
desc_stats <- df %>%
group_by(diagnosis) %>%
summarise(across(all_of(c(DVS, COV)),
list(Mean = ~round(mean(.), 4),
SD = ~round(sd(.), 4),
Min = ~round(min(.), 4),
Max = ~round(max(.), 4)))) %>%
pivot_longer(-diagnosis, names_to = c("Variabel", ".value"),
names_sep = "_(?=[^_]+$)") %>%
arrange(Variabel, diagnosis)
kable(desc_stats, caption = "Statistik Deskriptif per Variabel dan Grup") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed"),
full_width = TRUE) %>%
column_spec(1, bold = TRUE)| diagnosis | Variabel | Mean | SD | Min | Max |
|---|---|---|---|---|---|
| Benign | concavity_mean | 0.0530 | 0.0319 | 0.0000 | 0.1321 |
| Malignant | concavity_mean | 0.1504 | 0.0617 | 0.0268 | 0.2810 |
| Benign | smoothness_mean | 0.0946 | 0.0124 | 0.0736 | 0.1291 |
| Malignant | smoothness_mean | 0.1018 | 0.0131 | 0.0737 | 0.1278 |
| Benign | symmetry_mean | 0.1771 | 0.0254 | 0.1386 | 0.2403 |
| Malignant | symmetry_mean | 0.1873 | 0.0199 | 0.1467 | 0.2162 |
| Benign | texture_mean | 17.5788 | 3.3962 | 10.7200 | 24.9900 |
| Malignant | texture_mean | 21.2604 | 3.7955 | 11.8900 | 28.7700 |
df_long <- df %>%
pivot_longer(cols = all_of(c(DVS, COV)),
names_to = "Variabel", values_to = "Nilai")
ggplot(df_long, aes(x = Nilai, fill = diagnosis, color = diagnosis)) +
geom_density(alpha = 0.35, linewidth = 0.9) +
geom_rug(alpha = 0.5, linewidth = 0.4) +
facet_wrap(~Variabel, scales = "free", ncol = 2) +
scale_fill_manual(values = COL) +
scale_color_manual(values = COL) +
labs(title = "Distribusi Variabel per Grup Diagnosis",
subtitle = "Density plot dengan rug marks",
x = "Nilai", y = "Densitas", fill = "Diagnosis", color = "Diagnosis") +
theme(legend.position = "bottom")ggplot(df_long, aes(x = diagnosis, y = Nilai, fill = diagnosis)) +
geom_boxplot(alpha = 0.7, outlier.shape = 21, outlier.size = 2) +
geom_jitter(aes(color = diagnosis), width = 0.12, alpha = 0.5, size = 1.8) +
facet_wrap(~Variabel, scales = "free_y", ncol = 2) +
scale_fill_manual(values = COL) +
scale_color_manual(values = COL) +
labs(title = "Boxplot Variabel per Grup Diagnosis",
subtitle = "Dengan jitter untuk distribusi individual",
x = NULL, y = "Nilai") +
theme(legend.position = "none")Tujuan: Memverifikasi DV-DV saling berkorelasi — syarat mendasar analisis multivariat. Metode: Bartlett’s Test of Sphericity. Keputusan: p < 0.05 → H₀ ditolak → DV berkorelasi → TERPENUHI
R_matrix <- cor(df[, DVS])
sphericity <- cortest.bartlett(R_matrix, n = nrow(df))
tibble(
Statistik = c("Chi-square", "Degrees of Freedom", "p-value"),
Nilai = c(round(sphericity$chisq, 4),
sphericity$df,
format(sphericity$p.value, scientific = TRUE, digits = 4))
) %>%
kable(caption = "Bartlett's Test of Sphericity") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Statistik | Nilai |
|---|---|
| Chi-square | 24.0657 |
| Degrees of Freedom | 3 |
| p-value | 2.42e-05 |
ggcorrplot(R_matrix, method = "circle", type = "lower", lab = TRUE,
lab_size = 4, colors = c("#c0392b","white","#2e86c1"),
title = "Matriks Korelasi Antar DV", ggtheme = theme_minimal())p-value = 2.42e-05 < 0.05 → H₀ ditolak → DV-DV saling berkorelasi signifikan. ✔ ASUMSI 1 TERPENUHI
Tujuan: Matriks kovarians antar grup (Malignant vs Benign) harus homogen. Metode: Box’s M Test. Keputusan: p ≥ 0.05 → H₀ gagal ditolak → homogen → TERPENUHI
bm <- boxM(df[, DVS], df[[IV]])
bm_p <- as.numeric(bm$p.value)
bm_M <- as.numeric(bm$statistic[[1]])
# Bangun tabel sebagai data.frame karakter agar tidak ada masalah tipe
bm_df <- data.frame(
Statistik = c("Box's M (Chi-Sq approx.)", "df", "p-value"),
Nilai = c(as.character(round(bm_M, 4)),
as.character(as.numeric(bm$parameter[[1]])),
as.character(round(bm_p, 4))),
stringsAsFactors = FALSE
)
kable(bm_df, caption = "Box's M Test") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Statistik | Nilai |
|---|---|
| Box’s M (Chi-Sq approx.) | 3.5164 |
| df | 6 |
| p-value | 0.7418 |
cov_mal <- cov(df[df$diagnosis == "Malignant", DVS])
cov_ben <- cov(df[df$diagnosis == "Benign", DVS])
var_compare <- data.frame(
Var = rep(DVS, 2),
Variansi = c(diag(cov_mal), diag(cov_ben)),
Grup = rep(c("Malignant","Benign"), each = length(DVS))
)
ggplot(var_compare, aes(x = Var, y = Variansi, fill = Grup)) +
geom_bar(stat = "identity", position = "dodge", width = 0.6, alpha = 0.85) +
scale_fill_manual(values = COL) +
labs(title = "Perbandingan Variansi per DV dan Grup",
x = "DV", y = "Variansi", fill = "Diagnosis") +
theme(legend.position = "bottom")p-value = 0.7418 ≥ 0.05 → H₀ gagal ditolak → Matriks kovarians homogen. ✔ ASUMSI 2 TERPENUHI
Tujuan: Gabungan DV mengikuti distribusi normal multivariat. Metode: Mardia’s Test (skewness + kurtosis multivariat). Keputusan: Kedua p ≥ 0.05 → TERPENUHI
mardia_result <- psych::mardia(df[, DVS], plot = FALSE)
p_skew <- mardia_result$p.skew
z_kurt <- mardia_result$kurtosis
p_kurt <- 2 * (1 - pnorm(abs(z_kurt)))
tibble(
Komponen = c("Mardia Skewness", "Mardia Kurtosis"),
Statistik = c(round(mardia_result$b1p, 4), round(mardia_result$b2p, 4)),
`p-value` = c(round(p_skew, 4), round(p_kurt, 4)),
Status = c(ifelse(p_skew >= 0.05, "Normal ✔", "Tidak Normal ✘"),
ifelse(p_kurt >= 0.05, "Normal ✔", "Tidak Normal ✘"))
) %>%
kable(caption = "Mardia's Test — Normalitas Multivariat") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
column_spec(4, bold = TRUE,
color = ifelse(c(p_skew, p_kurt) >= 0.05, "#1e8449", "#c0392b"))| Komponen | Statistik | p-value | Status |
|---|---|---|---|
| Mardia Skewness | 0.9458 | 0.6404 | Normal ✔ |
| Mardia Kurtosis | 13.3407 | 0.2841 | Normal ✔ |
par(mfrow = c(1, 3), mar = c(4, 4, 3, 1))
for (v in DVS) {
qqnorm(df[[v]], main = paste("Q-Q:", v), col = "#2e86c1", pch = 19, cex = 0.8)
qqline(df[[v]], col = "#c0392b", lwd = 2)
}p-skewness = 0.6404 ≥ 0.05 dan p-kurtosis = 0.2841 ≥ 0.05 → Data memenuhi normalitas multivariat. ✔ ASUMSI 3 TERPENUHI
Tujuan: Covariate concavity_mean harus
berkorelasi linear dengan setiap DV. Metode: Pearson
Correlation. Keputusan: p < 0.05 → ada hubungan
linear → TERPENUHI
lin_results <- map_df(DVS, function(v) {
ct <- cor.test(df[[COV]], df[[v]])
tibble(DV = v,
r = round(ct$estimate, 4),
`t-stat` = round(ct$statistic, 4),
df = ct$parameter,
`p-value`= round(ct$p.value, 6),
Status = ifelse(ct$p.value < 0.05,
"Linear Signifikan ✔", "Tidak Linear ✘"))
})
kable(lin_results, caption = "Pearson Correlation: Covariate vs DV") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
column_spec(6, bold = TRUE, color = "#1e8449")| DV | r | t-stat | df | p-value | Status |
|---|---|---|---|---|---|
| texture_mean | 0.3750 | 2.8024 | 48 | 0.007294 | Linear Signifikan ✔ |
| smoothness_mean | 0.4534 | 3.5247 | 48 | 0.000943 | Linear Signifikan ✔ |
| symmetry_mean | 0.4436 | 3.4293 | 48 | 0.001252 | Linear Signifikan ✔ |
df %>%
pivot_longer(all_of(DVS), names_to = "DV", values_to = "Nilai_DV") %>%
ggplot(aes(x = concavity_mean, y = Nilai_DV, color = diagnosis)) +
geom_point(alpha = 0.7, size = 2) +
geom_smooth(method = "lm", se = TRUE, aes(group = 1),
color = "#2c3e50", linewidth = 1, linetype = "dashed") +
facet_wrap(~DV, scales = "free_y", ncol = 3) +
scale_color_manual(values = COL) +
labs(title = "Linearitas: concavity_mean vs Setiap DV",
x = "concavity_mean (Covariate)", y = "Nilai DV", color = "Diagnosis") +
theme(legend.position = "bottom")Semua p-value < 0.05 → Ketiga DV memiliki hubungan linear signifikan dengan covariate. ✔ ASUMSI 4 TERPENUHI
Tujuan: Setiap observasi bersifat independen. Metode: Evaluasi desain studi (tidak diuji secara statistik).
| Kriteria | Evaluasi | Status |
|---|---|---|
| Unit observasi berbeda | Setiap baris = satu pasien unik (bukan repeated measures) | ✔ |
| Tidak ada tumpang tindih | Setiap pasien hanya masuk satu grup | ✔ |
| Sampling tidak sistematis | Fixed-index tanpa dependensi antar baris | ✔ |
| Sumber data independen | Wisconsin BC (UCI): setiap observasi independen | ✔ |
Tidak ada indikasi dependensi antar observasi berdasarkan desain studi. ✔ ASUMSI 5 TERPENUHI
tibble(
No = 1:5,
Asumsi = c("Dependensi antar DV", "Homogenitas Kovarians",
"Normalitas Multivariat", "Linearitas Covariate–DV",
"Independensi Observasi"),
Metode = c("Bartlett's Sphericity", "Box's M Test",
"Mardia's Test", "Pearson Correlation", "Evaluasi Desain"),
`p-value`= c(format(sphericity$p.value, scientific=TRUE, digits=3),
as.character(round(bm_p, 4)),
paste0("skew=",round(p_skew,4)," / kurt=",round(p_kurt,4)),
paste0("maks=",round(max(lin_results$`p-value`),4)), "N/A"),
Status = rep("✔ TERPENUHI", 5)
) %>%
kable(caption = "Ringkasan 5 Uji Asumsi MANCOVA") %>%
kable_styling(bootstrap_options = c("striped","hover","bordered"),
full_width = TRUE) %>%
column_spec(5, bold = TRUE, color = "#1e8449") %>%
row_spec(0, bold = TRUE, background = "#1a5276", color = "white")| No | Asumsi | Metode | p-value | Status |
|---|---|---|---|---|
| 1 | Dependensi antar DV | Bartlett’s Sphericity | 2.42e-05 | ✔ TERPENUHI |
| 2 | Homogenitas Kovarians | Box’s M Test | 0.7418 | ✔ TERPENUHI |
| 3 | Normalitas Multivariat | Mardia’s Test | skew=0.6404 / kurt=0.2841 | ✔ TERPENUHI |
| 4 | Linearitas Covariate–DV | Pearson Correlation | maks=0.0073 | ✔ TERPENUHI |
| 5 | Independensi Observasi | Evaluasi Desain | N/A | ✔ TERPENUHI |
Seluruh 5 asumsi MANCOVA terpenuhi. Dataset layak untuk dilanjutkan ke analisis utama.
Tujuan: Menguji apakah terdapat perbedaan rata-rata yang signifikan antara kelompok Malignant dan Benign untuk masing-masing DV secara terpisah, tanpa mempertimbangkan covariate maupun hubungan antar DV.
Catatan: ANOVA ini berfungsi sebagai baseline sebelum penambahan covariate (ANCOVA) dan sebelum analisis simultan multivariat (MANOVA).
# Jalankan ANOVA terpisah untuk setiap DV
anova_results <- map_df(DVS, function(v) {
fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
s <- summary(fit)[[1]]
tibble(
DV = v,
`F-value` = round(s["diagnosis", "F value"], 4),
`df1` = s["diagnosis", "Df"],
`df2` = s["Residuals", "Df"],
`p-value` = round(s["diagnosis", "Pr(>F)"], 6),
Keputusan = ifelse(s["diagnosis","Pr(>F)"] < 0.05,
"Tolak H₀ ✔", "Gagal Tolak H₀ ✘")
)
})
kable(anova_results, caption = "Hasil One-Way ANOVA per Dependent Variable") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
column_spec(6, bold = TRUE,
color = ifelse(anova_results$`p-value` < 0.05, "#1e8449", "#c0392b"))| DV | F-value | df1 | df2 | p-value | Keputusan |
|---|---|---|---|---|---|
| texture_mean | 13.0630 | 1 | 48 | 0.000720 | Tolak H₀ ✔ |
| smoothness_mean | 4.0243 | 1 | 48 | 0.050503 | Gagal Tolak H₀ ✘ |
| symmetry_mean | 2.5132 | 1 | 48 | 0.119460 | Gagal Tolak H₀ ✘ |
Hasil ANOVA:
texture_mean → F = 13.063, p = 7^{-4} < 0.05 → Signifikan ✔ Terdapat perbedaan rata-rata texture yang signifikan antara Malignant dan Benign.
smoothness_mean → F = 4.0243, p = 0.0505 → Tidak Signifikan ✘
symmetry_mean → F = 2.5132, p = 0.1195 → Tidak Signifikan ✘
df %>%
pivot_longer(all_of(DVS), names_to = "Variable", values_to = "Value") %>%
ggplot(aes(x = diagnosis, y = Value, fill = diagnosis)) +
stat_summary(fun = mean, geom = "bar", alpha = 0.8, position = "dodge") +
stat_summary(fun.data = mean_se, geom = "errorbar",
position = position_dodge(0.9), width = 0.25, linewidth = 0.8) +
facet_wrap(~Variable, scales = "free_y", ncol = 3) +
scale_fill_manual(values = COL) +
labs(title = "ANOVA: Mean ± SE per DV dan Grup",
subtitle = "Error bar = Standard Error",
x = "Diagnosis", y = "Mean Value", fill = "Diagnosis") +
theme(legend.position = "bottom")df %>%
pivot_longer(all_of(DVS), names_to = "Variable", values_to = "Value") %>%
ggplot(aes(x = diagnosis, y = Value, fill = diagnosis)) +
geom_violin(alpha = 0.5, trim = FALSE) +
geom_boxplot(width = 0.15, alpha = 0.8, outlier.size = 1.5) +
facet_wrap(~Variable, scales = "free_y", ncol = 3) +
scale_fill_manual(values = COL) +
labs(title = "ANOVA: Distribusi per DV dan Grup (Violin + Boxplot)",
x = "Diagnosis", y = "Value", fill = "Diagnosis") +
theme(legend.position = "bottom")eta_results <- map_df(DVS, function(v) {
fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
e <- eta_squared(fit, partial = FALSE)
tibble(
DV = v,
`eta²` = round(e$Eta2[1], 4),
Interpretasi = case_when(
e$Eta2[1] >= 0.14 ~ "Besar (≥ 0.14)",
e$Eta2[1] >= 0.06 ~ "Sedang (0.06–0.14)",
e$Eta2[1] >= 0.01 ~ "Kecil (0.01–0.06)",
TRUE ~ "Sangat Kecil (< 0.01)"
)
)
})
kable(eta_results, caption = "Effect Size (Eta-Squared) per DV — ANOVA") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| DV | eta² | Interpretasi |
|---|---|---|
| texture_mean | 0.2139 | Besar (≥ 0.14) |
| smoothness_mean | 0.0774 | Sedang (0.06–0.14) |
| symmetry_mean | 0.0498 | Kecil (0.01–0.06) |
Tujuan: Menguji apakah terdapat perbedaan vektor rata-rata yang signifikan antara Malignant dan Benign pada semua DV secara simultan, tanpa covariate.
Keunggulan vs ANOVA: MANOVA mempertimbangkan korelasi antar DV dan mengontrol familywise error rate, sehingga lebih tepat untuk pengujian simultan.
# Empat statistik uji multivariat standar
tests <- c("Wilks", "Pillai", "Hotelling-Lawley", "Roy")
manova_stats <- map_df(tests, function(t) {
s <- summary(manova_model, test = t)$stats
tibble(
`Statistik Uji` = t,
Nilai = round(s[1, 2], 5),
`F approx` = round(s[1, 3], 4),
`num df` = round(s[1, 4], 0),
`den df` = round(s[1, 5], 0),
`p-value` = round(s[1, 6], 6),
Keputusan = ifelse(s[1,6] < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀")
)
})
kable(manova_stats, caption = "Hasil MANOVA — Empat Statistik Uji Multivariat") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
column_spec(7, bold = TRUE,
color = ifelse(manova_stats$`p-value` < 0.05, "#1e8449", "#c0392b")) %>%
row_spec(0, bold = TRUE, background = "#1a5276", color = "white")| Statistik Uji | Nilai | F approx | num df | den df | p-value | Keputusan |
|---|---|---|---|---|---|---|
| Wilks | 0.66636 | 7.6771 | 3 | 46 | 0.000291 | Tolak H₀ ✔ |
| Pillai | 0.33364 | 7.6771 | 3 | 46 | 0.000291 | Tolak H₀ ✔ |
| Hotelling-Lawley | 0.50068 | 7.6771 | 3 | 46 | 0.000291 | Tolak H₀ ✔ |
| Roy | 0.50068 | 7.6771 | 3 | 46 | 0.000291 | Tolak H₀ ✔ |
wilks_val <- manova_stats$Nilai[manova_stats$`Statistik Uji` == "Wilks"]
wilks_p <- manova_stats$`p-value`[manova_stats$`Statistik Uji` == "Wilks"]Wilks’ Lambda = 0.66636, F = 7.6771, p = 2.91^{-4} < 0.05.
H₀ ditolak → Terdapat perbedaan signifikan pada vektor rata-rata (texture_mean, smoothness_mean, symmetry_mean) antara kelompok Malignant dan Benign secara simultan.
Nilai Wilks’ Lambda 0.66636 mendekati 0 mengindikasikan bahwa variasi between-group cukup besar relatif terhadap variasi within-group.
## Response texture_mean :
## Df Sum Sq Mean Sq F value Pr(>F)
## diagnosis 1 169.43 169.43 13.063 0.0007201 ***
## Residuals 48 622.56 12.97
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response smoothness_mean :
## Df Sum Sq Mean Sq F value Pr(>F)
## diagnosis 1 0.0006534 0.00065341 4.0243 0.0505 .
## Residuals 48 0.0077937 0.00016237
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response symmetry_mean :
## Df Sum Sq Mean Sq F value Pr(>F)
## diagnosis 1 0.0013087 0.00130867 2.5132 0.1195
## Residuals 48 0.0249941 0.00052071
heplot(manova_model,
fill = TRUE,
fill.alpha = 0.2,
col = c("#2e86c1","#c0392b"),
main = "HE Plot — MANOVA\n(texture_mean vs smoothness_mean)")Interpretasi HE Plot: Elips H
(hypothesis) merepresentasikan variasi between-group akibat
faktor diagnosis, sedangkan elips E
(error) merepresentasikan variasi within-group. Semakin besar
elips H relatif terhadap E, semakin kuat efek treatment. Jika elips H
menonjol keluar dari elips E, efek tersebut signifikan.
candisc_model <- candisc(manova_model)
plot(candisc_model,
col = c("#2e86c1","#c0392b"),
pch = c(16, 17),
main = "Canonical Discriminant Analysis — MANOVA")# Partial Eta-Squared dari Wilks' Lambda
wilks_full <- summary(manova_model, test = "Wilks")$stats
eta_manova <- 1 - wilks_full[1,2]
tibble(
Metode = "MANOVA (Wilks' Lambda)",
`Wilks' Lambda` = round(wilks_full[1,2], 5),
`1 - Lambda (η²)`= round(eta_manova, 4),
Interpretasi = case_when(
eta_manova >= 0.14 ~ "Efek Besar (≥ 0.14)",
eta_manova >= 0.06 ~ "Efek Sedang (0.06–0.14)",
eta_manova >= 0.01 ~ "Efek Kecil (0.01–0.06)",
TRUE ~ "Efek Sangat Kecil"
)
) %>%
kable(caption = "Effect Size MANOVA") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Metode | Wilks’ Lambda | 1 - Lambda (η²) | Interpretasi |
|---|---|---|---|
| MANOVA (Wilks’ Lambda) | 0.66636 | 0.3336 | Efek Besar (≥ 0.14) |
Tujuan: Menguji perbedaan rata-rata per DV antara
Malignant dan Benign setelah mengontrol pengaruh
covariate concavity_mean. ANCOVA “menetralkan”
variasi yang disebabkan oleh perbedaan nilai covariate antar
observasi.
Perbedaan dengan ANOVA: ANCOVA menambahkan covariate sebagai prediktor kontinu, sehingga estimasi perbedaan antar grup menjadi lebih akurat (adjusted means).
Model:
DV ~ diagnosis + concavity_mean
ancova_results <- map_df(DVS, function(v) {
fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
s <- summary(fit)[[1]]
tibble(
DV = v,
`F (COV)` = round(s["concavity_mean","F value"], 4),
`p (COV)` = round(s["concavity_mean","Pr(>F)"], 6),
`F (diagnosis)` = round(s["diagnosis","F value"], 4),
`p (diagnosis)` = round(s["diagnosis","Pr(>F)"], 6),
Keputusan = ifelse(s["diagnosis","Pr(>F)"] < 0.05,
"Tolak H₀ ✔", "Gagal Tolak H₀ ✘")
)
})
kable(ancova_results,
caption = "Hasil ANCOVA per DV (setelah kontrol concavity_mean)") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
column_spec(6, bold = TRUE,
color = ifelse(ancova_results$`p (diagnosis)` < 0.05,
"#1e8449", "#c0392b")) %>%
row_spec(0, bold = TRUE, background = "#1a5276", color = "white")| DV | F (COV) | p (COV) | F (diagnosis) | p (diagnosis) | Keputusan |
|---|---|---|---|---|---|
| texture_mean | 8.4532 | 0.005546 | 4.6651 | 0.035915 | Tolak H₀ ✔ |
| smoothness_mean | 12.2263 | 0.001040 | 0.2373 | 0.628403 | Gagal Tolak H₀ ✘ |
| symmetry_mean | 11.7691 | 0.001264 | 1.0354 | 0.314115 | Gagal Tolak H₀ ✘ |
for (v in DVS) {
fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
cat(paste0("\n\n### ", v, "\n\n"))
cat(
kable(round(as.data.frame(summary(fit)[[1]]), 4),
caption = paste("ANCOVA Table —", v)) %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
)
cat("\n\n")
}| Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
|---|---|---|---|---|---|
| concavity_mean | 1 | 111.3604 | 111.3604 | 8.4532 | 0.0055 |
| diagnosis | 1 | 61.4571 | 61.4571 | 4.6651 | 0.0359 |
| Residuals | 47 | 619.1685 | 13.1738 | NA | NA |
| Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
|---|---|---|---|---|---|
| concavity_mean | 1 | 0.0017 | 0.0017 | 12.2263 | 0.0010 |
| diagnosis | 1 | 0.0000 | 0.0000 | 0.2373 | 0.6284 |
| Residuals | 47 | 0.0067 | 0.0001 | NA | NA |
| Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
|---|---|---|---|---|---|
| concavity_mean | 1 | 0.0052 | 0.0052 | 11.7691 | 0.0013 |
| diagnosis | 1 | 0.0005 | 0.0005 | 1.0354 | 0.3141 |
| Residuals | 47 | 0.0207 | 0.0004 | NA | NA |
emm_list <- map(DVS, function(v) {
fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
emm <- emmeans(fit, ~ diagnosis)
as.data.frame(emm) %>% mutate(DV = v)
})
emm_df <- bind_rows(emm_list)
kable(emm_df %>%
dplyr::select(DV, diagnosis, emmean, SE, lower.CL, upper.CL) %>%
mutate(across(where(is.numeric), ~round(., 4))),
caption = "Estimated Marginal Means (Adjusted Means) — ANCOVA") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| DV | diagnosis | emmean | SE | lower.CL | upper.CL |
|---|---|---|---|---|---|
| texture_mean | Benign | 17.8423 | 0.8926 | 16.0466 | 19.6380 |
| texture_mean | Malignant | 20.9969 | 0.8926 | 19.2012 | 22.7926 |
| smoothness_mean | Benign | 0.0994 | 0.0029 | 0.0935 | 0.1053 |
| smoothness_mean | Malignant | 0.0970 | 0.0029 | 0.0911 | 0.1029 |
| symmetry_mean | Benign | 0.1865 | 0.0052 | 0.1761 | 0.1969 |
| symmetry_mean | Malignant | 0.1779 | 0.0052 | 0.1675 | 0.1883 |
ggplot(emm_df, aes(x = diagnosis, y = emmean, color = diagnosis, group = diagnosis)) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, linewidth = 1) +
facet_wrap(~DV, scales = "free_y", ncol = 3) +
scale_color_manual(values = COL) +
labs(title = "ANCOVA: Estimated Marginal Means ± 95% CI",
subtitle = "Rata-rata yang telah disesuaikan setelah mengontrol concavity_mean",
x = "Diagnosis", y = "Adjusted Mean", color = "Diagnosis") +
theme(legend.position = "bottom")# Uji apakah slope regresi covariate sama di kedua grup (asumsi ANCOVA)
slope_results <- map_df(DVS, function(v) {
fit_int <- aov(as.formula(paste(v, "~ diagnosis * concavity_mean")), data = df)
s <- summary(fit_int)[[1]]
intx_p <- s["diagnosis:concavity_mean","Pr(>F)"]
tibble(
DV = v,
`p (interaksi)` = round(intx_p, 4),
`Homogenitas Slope` = ifelse(intx_p >= 0.05,
"Terpenuhi ✔ (slope paralel)",
"Tidak Terpenuhi ✘ (slope berbeda)")
)
})
kable(slope_results,
caption = "Uji Homogenitas Slope Regresi (Asumsi ANCOVA)") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
column_spec(3, bold = TRUE,
color = ifelse(slope_results$`p (interaksi)` >= 0.05,
"#1e8449", "#c0392b"))| DV | p (interaksi) | Homogenitas Slope |
|---|---|---|
| texture_mean | 0.1587 | Terpenuhi ✔ (slope paralel) |
| smoothness_mean | 0.3841 | Terpenuhi ✔ (slope paralel) |
| symmetry_mean | 0.6863 | Terpenuhi ✔ (slope paralel) |
df %>%
pivot_longer(all_of(DVS), names_to = "DV", values_to = "Value") %>%
ggplot(aes(x = concavity_mean, y = Value, color = diagnosis)) +
geom_point(alpha = 0.6, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
facet_wrap(~DV, scales = "free_y", ncol = 3) +
scale_color_manual(values = COL) +
labs(title = "ANCOVA: Garis Regresi per Grup",
subtitle = "Slope yang paralel mengindikasikan homogenitas slope terpenuhi",
x = "concavity_mean (Covariate)", y = "DV", color = "Diagnosis") +
theme(legend.position = "bottom")eta_ancova <- map_df(DVS, function(v) {
fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
e <- eta_squared(fit, partial = TRUE)
e_diag <- e[e$Parameter == "diagnosis", ]
tibble(
DV = v,
`Partial η² (diagnosis)` = round(e_diag$Eta2_partial, 4),
Interpretasi = case_when(
e_diag$Eta2_partial >= 0.14 ~ "Besar (≥ 0.14)",
e_diag$Eta2_partial >= 0.06 ~ "Sedang (0.06–0.14)",
e_diag$Eta2_partial >= 0.01 ~ "Kecil (0.01–0.06)",
TRUE ~ "Sangat Kecil (< 0.01)"
)
)
})
kable(eta_ancova,
caption = "Partial Eta-Squared (Effect Size) — ANCOVA") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| DV | Partial η² (diagnosis) | Interpretasi |
|---|---|---|
| texture_mean | 0.0903 | Sedang (0.06–0.14) |
| smoothness_mean | 0.0050 | Sangat Kecil (< 0.01) |
| symmetry_mean | 0.0216 | Kecil (0.01–0.06) |
Tujuan: Menguji perbedaan vektor
rata-rata semua DV secara simultan antara Malignant dan Benign,
setelah mengontrol pengaruh covariate
concavity_mean.
MANCOVA = MANOVA + Covariate
Model:
cbind(DV1, DV2, DV3) ~ concavity_mean + diagnosis
mancova_model <- manova(
cbind(texture_mean, smoothness_mean, symmetry_mean) ~ concavity_mean + diagnosis,
data = df
)
summary(mancova_model, test = "Wilks")## Df Wilks approx F num Df den Df Pr(>F)
## concavity_mean 1 0.56604 11.5001 3 45 1.012e-05 ***
## diagnosis 1 0.89301 1.7972 3 45 0.1613
## Residuals 47
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mancova_stats <- map_df(tests, function(t) {
s <- summary(mancova_model, test = t)$stats
# Baris 2 = diagnosis (baris 1 = covariate)
tibble(
`Statistik Uji` = t,
Nilai = round(s["diagnosis", 2], 5),
`F approx` = round(s["diagnosis", 3], 4),
`num df` = round(s["diagnosis", 4], 0),
`den df` = round(s["diagnosis", 5], 0),
`p-value` = round(s["diagnosis", 6], 6),
Keputusan = ifelse(s["diagnosis",6] < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀")
)
})
kable(mancova_stats,
caption = "Hasil MANCOVA — Efek diagnosis (setelah kontrol concavity_mean)") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
column_spec(7, bold = TRUE,
color = ifelse(mancova_stats$`p-value` < 0.05, "#1e8449", "#c0392b")) %>%
row_spec(0, bold = TRUE, background = "#1a5276", color = "white")| Statistik Uji | Nilai | F approx | num df | den df | p-value | Keputusan |
|---|---|---|---|---|---|---|
| Wilks | 0.89301 | 1.7972 | 3 | 45 | 0.161295 | Gagal Tolak H₀ |
| Pillai | 0.10699 | 1.7972 | 3 | 45 | 0.161295 | Gagal Tolak H₀ |
| Hotelling-Lawley | 0.11981 | 1.7972 | 3 | 45 | 0.161295 | Gagal Tolak H₀ |
| Roy | 0.11981 | 1.7972 | 3 | 45 | 0.161295 | Gagal Tolak H₀ |
mancova_cov_stats <- map_df(tests, function(t) {
s <- summary(mancova_model, test = t)$stats
tibble(
`Statistik Uji` = t,
Nilai = round(s["concavity_mean", 2], 5),
`F approx` = round(s["concavity_mean", 3], 4),
`num df` = round(s["concavity_mean", 4], 0),
`den df` = round(s["concavity_mean", 5], 0),
`p-value` = round(s["concavity_mean", 6], 6),
Keputusan = ifelse(s["concavity_mean",6] < 0.05,
"COV Signifikan ✔", "COV Tidak Signifikan")
)
})
kable(mancova_cov_stats,
caption = "MANCOVA — Efek Covariate (concavity_mean)") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
column_spec(7, bold = TRUE,
color = ifelse(mancova_cov_stats$`p-value` < 0.05,
"#1e8449", "#c0392b")) %>%
row_spec(0, bold = TRUE, background = "#1a5276", color = "white")| Statistik Uji | Nilai | F approx | num df | den df | p-value | Keputusan |
|---|---|---|---|---|---|---|
| Wilks | 0.56604 | 11.5001 | 3 | 45 | 1e-05 | COV Signifikan ✔ |
| Pillai | 0.43396 | 11.5001 | 3 | 45 | 1e-05 | COV Signifikan ✔ |
| Hotelling-Lawley | 0.76667 | 11.5001 | 3 | 45 | 1e-05 | COV Signifikan ✔ |
| Roy | 0.76667 | 11.5001 | 3 | 45 | 1e-05 | COV Signifikan ✔ |
mancova_wilks_val <- mancova_stats$Nilai[mancova_stats$`Statistik Uji`=="Wilks"]
mancova_wilks_p <- mancova_stats$`p-value`[mancova_stats$`Statistik Uji`=="Wilks"]
mancova_cov_p <- mancova_cov_stats$`p-value`[mancova_cov_stats$`Statistik Uji`=="Wilks"]Efek Diagnosis (setelah kontrol covariate): Wilks’ Lambda = 0.89301, p = 0.161295 > 0.05 → H₀ gagal ditolak → Tidak ada perbedaan signifikan setelah kontrol covariate.
Efek Covariate (concavity_mean): p = 10^{-5} < 0.05 → Covariate berpengaruh signifikan terhadap kombinasi DV. Penggunaan MANCOVA (vs MANOVA) sudah tepat.
## Response texture_mean :
## Df Sum Sq Mean Sq F value Pr(>F)
## concavity_mean 1 111.36 111.360 8.4532 0.005546 **
## diagnosis 1 61.46 61.457 4.6651 0.035915 *
## Residuals 47 619.17 13.174
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response smoothness_mean :
## Df Sum Sq Mean Sq F value Pr(>F)
## concavity_mean 1 0.0017368 0.00173680 12.2263 0.00104 **
## diagnosis 1 0.0000337 0.00003371 0.2373 0.62840
## Residuals 47 0.0066766 0.00014205
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response symmetry_mean :
## Df Sum Sq Mean Sq F value Pr(>F)
## concavity_mean 1 0.0051762 0.0051762 11.7691 0.001264 **
## diagnosis 1 0.0004554 0.0004554 1.0354 0.314115
## Residuals 47 0.0206712 0.0004398
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
heplot(mancova_model,
fill = TRUE,
fill.alpha = 0.2,
col = c("#8e44ad","#27ae60","#c0392b"),
main = "HE Plot — MANCOVA\n(Setelah Kontrol concavity_mean)")wilks_mancova <- summary(mancova_model, test = "Wilks")$stats
eta_mancova_val <- 1 - wilks_mancova["diagnosis", 2]
tibble(
Metode = "MANCOVA (Wilks' Lambda — diagnosis)",
`Wilks' Lambda` = round(wilks_mancova["diagnosis",2], 5),
`1-Lambda (η²)` = round(eta_mancova_val, 4),
Interpretasi = case_when(
eta_mancova_val >= 0.14 ~ "Efek Besar (≥ 0.14)",
eta_mancova_val >= 0.06 ~ "Efek Sedang (0.06–0.14)",
eta_mancova_val >= 0.01 ~ "Efek Kecil (0.01–0.06)",
TRUE ~ "Efek Sangat Kecil"
)
) %>%
kable(caption = "Effect Size MANCOVA") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Metode | Wilks’ Lambda | 1-Lambda (η²) | Interpretasi |
|---|---|---|---|
| MANCOVA (Wilks’ Lambda — diagnosis) | 0.89301 | 0.107 | Efek Sedang (0.06–0.14) |
Tujuan Perbandingan: Mengevaluasi dampak
penambahan covariate (concavity_mean) terhadap
hasil pengujian. Perbandingan ini menjawab pertanyaan:
“Apakah mengontrol concavity_mean mengubah kesimpulan tentang perbedaan antar grup? Seberapa besar perubahan effect size dan nilai p?”
# Wilks' Lambda MANOVA
wl_manova <- summary(manova_model, test = "Wilks")$stats
# Wilks' Lambda MANCOVA (baris diagnosis)
wl_mancova <- summary(mancova_model, test = "Wilks")$stats
comp_df <- tibble(
Metode = c("MANOVA", "MANCOVA"),
`Covariate` = c("Tidak Ada", "concavity_mean"),
`Wilks' Lambda` = c(round(wl_manova[1,2], 5),
round(wl_mancova["diagnosis",2], 5)),
`F approx` = c(round(wl_manova[1,3], 4),
round(wl_mancova["diagnosis",3], 4)),
`p-value` = c(round(wl_manova[1,6], 6),
round(wl_mancova["diagnosis",6], 6)),
`η² (1-Lambda)` = c(round(1 - wl_manova[1,2], 4),
round(1 - wl_mancova["diagnosis",2], 4)),
Keputusan = c(
ifelse(wl_manova[1,6] < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀"),
ifelse(wl_mancova["diagnosis",6] < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀")
)
)
kable(comp_df, caption = "Perbandingan Wilks' Lambda: MANOVA vs MANCOVA") %>%
kable_styling(bootstrap_options = c("striped","hover","bordered"),
full_width = TRUE) %>%
row_spec(0, bold = TRUE, background = "#1a5276", color = "white") %>%
row_spec(1, background = "#eaf4fb") %>%
row_spec(2, background = "#f5eef8")| Metode | Covariate | Wilks’ Lambda | F approx | p-value | η² (1-Lambda) | Keputusan |
|---|---|---|---|---|---|---|
| MANOVA | Tidak Ada | 0.66636 | 7.6771 | 0.000291 | 0.3336 | Tolak H₀ ✔ |
| MANCOVA | concavity_mean | 0.89301 | 1.7972 | 0.161295 | 0.1070 | Gagal Tolak H₀ |
all_tests_df <- map_df(tests, function(t) {
sm <- summary(manova_model, test = t)$stats
smc <- summary(mancova_model, test = t)$stats
tibble(
`Statistik Uji` = t,
`MANOVA — p` = round(sm[1,6], 6),
`MANCOVA — p` = round(smc["diagnosis",6], 6),
`Perubahan p` = round(smc["diagnosis",6] - sm[1,6], 6),
`MANOVA — nilai`= round(sm[1,2], 5),
`MANCOVA — nilai`=round(smc["diagnosis",2], 5)
)
})
kable(all_tests_df, caption = "Perbandingan Seluruh Statistik Uji: MANOVA vs MANCOVA") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
row_spec(0, bold = TRUE, background = "#1a5276", color = "white")| Statistik Uji | MANOVA — p | MANCOVA — p | Perubahan p | MANOVA — nilai | MANCOVA — nilai |
|---|---|---|---|---|---|
| Wilks | 0.000291 | 0.161295 | 0.161004 | 0.66636 | 0.89301 |
| Pillai | 0.000291 | 0.161295 | 0.161004 | 0.33364 | 0.10699 |
| Hotelling-Lawley | 0.000291 | 0.161295 | 0.161004 | 0.50068 | 0.11981 |
| Roy | 0.000291 | 0.161295 | 0.161004 | 0.50068 | 0.11981 |
comp_uni <- map_df(DVS, function(v) {
# ANOVA
fit_anova <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
s_anova <- summary(fit_anova)[[1]]
f_anova <- s_anova["diagnosis","F value"]
p_anova <- s_anova["diagnosis","Pr(>F)"]
e_anova <- eta_squared(fit_anova, partial=FALSE)$Eta2[1]
# ANCOVA
fit_ancova <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
s_ancova <- summary(fit_ancova)[[1]]
f_ancova <- s_ancova["diagnosis","F value"]
p_ancova <- s_ancova["diagnosis","Pr(>F)"]
e_ancova <- eta_squared(fit_ancova, partial=TRUE)$Eta2_partial[
which(eta_squared(fit_ancova, partial=TRUE)$Parameter == "diagnosis")]
tibble(
DV = v,
`F (ANOVA)` = round(f_anova, 4),
`p (ANOVA)` = round(p_anova, 6),
`η² (ANOVA)` = round(e_anova, 4),
`F (ANCOVA)` = round(f_ancova, 4),
`p (ANCOVA)` = round(p_ancova, 6),
`η²p (ANCOVA)` = round(e_ancova, 4),
`Δ F` = round(f_ancova - f_anova, 4),
`Δ η²` = round(e_ancova - e_anova, 4)
)
})
kable(comp_uni,
caption = "Perbandingan ANOVA vs ANCOVA per DV (Efek Kontrol Covariate)") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
row_spec(0, bold = TRUE, background = "#1a5276", color = "white")| DV | F (ANOVA) | p (ANOVA) | η² (ANOVA) | F (ANCOVA) | p (ANCOVA) | η²p (ANCOVA) | Δ F | Δ η² |
|---|---|---|---|---|---|---|---|---|
| texture_mean | 13.0630 | 0.000720 | 0.2139 | 4.6651 | 0.035915 | 0.0903 | -8.3979 | -0.1236 |
| smoothness_mean | 4.0243 | 0.050503 | 0.0774 | 0.2373 | 0.628403 | 0.0050 | -3.7869 | -0.0723 |
| symmetry_mean | 2.5132 | 0.119460 | 0.0498 | 1.0354 | 0.314115 | 0.0216 | -1.4779 | -0.0282 |
# Data untuk plot perbandingan effect size
effect_comp <- bind_rows(
# ANOVA
map_df(DVS, function(v) {
fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
tibble(DV = v, Metode = "ANOVA",
eta2 = eta_squared(fit, partial=FALSE)$Eta2[1])
}),
# ANCOVA
map_df(DVS, function(v) {
fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
e <- eta_squared(fit, partial=TRUE)
tibble(DV = v, Metode = "ANCOVA",
eta2 = e$Eta2_partial[e$Parameter == "diagnosis"])
})
)
ggplot(effect_comp, aes(x = DV, y = eta2, fill = Metode)) +
geom_bar(stat = "identity", position = "dodge", alpha = 0.85, width = 0.6) +
geom_text(aes(label = round(eta2, 3)),
position = position_dodge(0.6), vjust = -0.4, size = 3.5) +
scale_fill_manual(values = c("ANOVA" = "#2e86c1", "ANCOVA" = "#8e44ad")) +
labs(title = "Perbandingan Effect Size: ANOVA vs ANCOVA",
subtitle = "η² (ANOVA) vs Partial η² (ANCOVA) — semakin tinggi semakin besar efek",
x = "Dependent Variable", y = "Effect Size (η²)", fill = "Metode") +
theme(legend.position = "bottom") +
ylim(0, max(effect_comp$eta2) * 1.25)# Visualisasi perubahan Wilks' Lambda
wilks_comp <- tibble(
Metode = c("MANOVA", "MANCOVA"),
`Wilks Lambda` = c(wl_manova[1,2], wl_mancova["diagnosis",2]),
`p-value` = c(wl_manova[1,6], wl_mancova["diagnosis",6])
)
ggplot(wilks_comp, aes(x = Metode, y = `Wilks Lambda`, fill = Metode)) +
geom_bar(stat = "identity", alpha = 0.85, width = 0.5) +
geom_text(aes(label = paste0("Λ=", round(`Wilks Lambda`,4),
"\np=", round(`p-value`,4))),
vjust = -0.4, size = 4, fontface = "bold") +
scale_fill_manual(values = c("MANOVA" = "#2e86c1", "MANCOVA" = "#8e44ad")) +
labs(title = "Perbandingan Wilks' Lambda: MANOVA vs MANCOVA",
subtitle = "Nilai Lambda lebih kecil = efek diagnosis lebih kuat",
y = "Wilks' Lambda", x = NULL) +
theme(legend.position = "none") +
ylim(0, 1)1. Wilks’ Lambda - MANOVA: Λ = 0.66636 - MANCOVA: Λ = 0.89301 - Lambda MANCOVA lebih besar atau sama → kontrol covariate tidak meningkatkan deteksi efek diagnosis.
2. p-value - MANOVA: p = 2.91^{-4} - MANCOVA: p = 0.161295 - p-value MANCOVA lebih besar → efek diagnosis melemah setelah koreksi covariate, mengindikasikan covariate berkorelasi dengan IV.
3. Signifikansi Covariate Covariate
concavity_mean sendiri memiliki p = 10^{-5} —
signifikan → covariate memang berpengaruh terhadap DV
dan tepat dimasukkan dalam model. Penggunaan MANCOVA lebih tepat
daripada MANOVA.
4. Kesimpulan Utama MANOVA signifikan namun MANCOVA tidak → perbedaan antar grup sebagian disebabkan oleh perbedaan nilai covariate, bukan murni efek diagnosis.
final_summary <- tibble(
Metode = c("ANOVA — texture_mean",
"ANOVA — smoothness_mean",
"ANOVA — symmetry_mean",
"MANOVA (Wilks)",
"ANCOVA — texture_mean",
"ANCOVA — smoothness_mean",
"ANCOVA — symmetry_mean",
"MANCOVA — diagnosis (Wilks)"),
Covariate = c(rep("Tidak Ada", 4), rep("concavity_mean", 4)),
`p-value` = c(
round(anova_results$`p-value`, 5),
round(wl_manova[1,6], 5),
round(ancova_results$`p (diagnosis)`, 5),
round(wl_mancova["diagnosis",6], 5)
),
Keputusan = c(
ifelse(anova_results$`p-value` < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀ ✘"),
ifelse(wl_manova[1,6] < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀ ✘"),
ifelse(ancova_results$`p (diagnosis)` < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀ ✘"),
ifelse(wl_mancova["diagnosis",6] < 0.05, "Tolak H₀ ✔", "Gagal Tolak H₀ ✘")
)
)
kable(final_summary, caption = "Ringkasan Seluruh Hasil Analisis") %>%
kable_styling(bootstrap_options = c("striped","hover","bordered"),
full_width = TRUE) %>%
column_spec(4, bold = TRUE,
color = ifelse(final_summary$Keputusan == "Tolak H₀ ✔",
"#1e8449", "#c0392b")) %>%
row_spec(0, bold = TRUE, background = "#1a5276", color = "white") %>%
row_spec(4, background = "#eaf4fb") %>%
row_spec(8, background = "#f5eef8")| Metode | Covariate | p-value | Keputusan |
|---|---|---|---|
| ANOVA — texture_mean | Tidak Ada | 0.00072 | Tolak H₀ ✔ |
| ANOVA — smoothness_mean | Tidak Ada | 0.05050 | Gagal Tolak H₀ ✘ |
| ANOVA — symmetry_mean | Tidak Ada | 0.11946 | Gagal Tolak H₀ ✘ |
| MANOVA (Wilks) | Tidak Ada | 0.00029 | Tolak H₀ ✔ |
| ANCOVA — texture_mean | concavity_mean | 0.03592 | Tolak H₀ ✔ |
| ANCOVA — smoothness_mean | concavity_mean | 0.62840 | Gagal Tolak H₀ ✘ |
| ANCOVA — symmetry_mean | concavity_mean | 0.31411 | Gagal Tolak H₀ ✘ |
| MANCOVA — diagnosis (Wilks) | concavity_mean | 0.16130 | Gagal Tolak H₀ ✘ |
1. Uji Asumsi Kelima asumsi MANCOVA terpenuhi pada sampel 50 observasi seimbang (25 Malignant, 25 Benign): dependensi antar DV, homogenitas kovarians, normalitas multivariat, linearitas covariate–DV, dan independensi observasi.
2. ANOVA Pengujian univariat menunjukkan bahwa
texture_mean merupakan satu-satunya DV yang berbeda secara
signifikan antara Malignant dan Benign (p < 0.05).
smoothness_mean dan symmetry_mean tidak
menunjukkan perbedaan yang signifikan secara terpisah.
3. MANOVA Secara simultan, kombinasi ketiga DV menunjukkan perbedaan signifikan antara kedua kelompok (Wilks’ Λ = 0.6664, p = 2.91^{-4}). Ini mengkonfirmasi bahwa meskipun tidak semua DV signifikan secara individual, kombinasi multivariat mereka mampu membedakan Malignant dari Benign.
4. ANCOVA Setelah mengontrol
concavity_mean, hasil per-DV menunjukkan 1 dari 3 DV yang
signifikan. Kontrol covariate memberikan estimasi perbedaan antar grup
yang lebih akurat melalui adjusted means.
5. MANCOVA Secara multivariat dengan kontrol
covariate, efek diagnosis memberikan Wilks’ Λ = 0.893, p = 0.161295.
Perbedaan antar grup tidak lagi signifikan setelah mengontrol
concavity_mean — mengindikasikan covariate memediasi sebagian dari
perbedaan tersebut. Covariate concavity_mean itu sendiri
signifikan (p = 10^{-5}), mengkonfirmasi bahwa penggunaan MANCOVA lebih
tepat daripada MANOVA.
6. Perbandingan MANOVA vs MANCOVA Penambahan
covariate mengubah nilai Wilks’ Lambda dari 0.6664 (MANOVA) menjadi
0.893 (MANCOVA), menunjukkan bahwa concavity_mean memiliki
peran dalam menjelaskan variasi pada kombinasi DV. MANCOVA memberikan
gambaran yang lebih akurat tentang perbedaan intrinsik antara sel
Malignant dan Benign setelah mengeliminasi pengaruh cekungan kontur
sel.
Laporan ini dibuat untuk keperluan Tugas Mata Kuliah Analisis Multivariate. Data: Wisconsin Breast Cancer Dataset — UCI Machine Learning Repository. Analisis: Uji Asumsi · ANOVA · MANOVA · ANCOVA · MANCOVA · Perbandingan Efek Covariate