# Package Management
packages <- c("tidyverse","MVN","biotools","car","heplots",
"corrplot","psych","ggplot2","ggcorrplot",
"knitr","kableExtra","moments","GGally")
installed <- rownames(installed.packages())
to_install <- packages[!packages %in% installed]
if (length(to_install) > 0) install.packages(to_install, repos = "https://cran.r-project.org")
suppressPackageStartupMessages({
library(tidyverse)
library(MVN)
library(biotools)
library(car)
library(psych)
library(ggplot2)
library(ggcorrplot)
library(knitr)
library(kableExtra)
library(moments)
library(GGally)
})
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE,
fig.align = "center",
fig.width = 8,
fig.height = 5,
dpi = 150
)
# Tema ggplot global
theme_set(
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", color = "#1a5276"),
plot.subtitle = element_text(color = "#566573"),
axis.title = element_text(color = "#2c3e50"),
strip.text = element_text(face = "bold")
)
)Analisis MANCOVA (Multivariate Analysis of Covariance) merupakan perluasan dari MANOVA yang menyertakan satu atau lebih covariate (kovariat) untuk mengontrol variansi yang tidak relevan. Sebelum menjalankan MANCOVA, terdapat 5 asumsi yang harus dipenuhi agar hasil analisis valid dan dapat dipercaya.
Dataset yang digunakan adalah Wisconsin Breast Cancer Dataset dari UCI Machine Learning Repository. Dataset ini berisi fitur-fitur yang diekstrak dari gambar sel tumor payudara dan umum digunakan dalam penelitian biomedis.
| Komponen | Keterangan |
|---|---|
| Independent Variable (IV) | diagnosis — Malignant (M) / Benign (B) |
| Dependent Variables (DVs) | texture_mean, smoothness_mean,
symmetry_mean |
| Covariate (COV) | concavity_mean |
| Metode Analisis | MANCOVA |
| Jumlah Observasi | 50 (25 Malignant, 25 Benign) |
Berikut alur pengujian asumsi secara berurutan:
df_raw <- read.csv("Breast_Cancer_Data.csv")
df_raw <- df_raw %>%
dplyr::select(-any_of(c("id", "Unnamed..32"))) %>%
mutate(
diagnosis = trimws(diagnosis),
diagnosis = dplyr::recode(diagnosis,
"M" = "Malignant",
"B" = "Benign")
) %>%
drop_na(diagnosis)
cat(sprintf("Jumlah baris : %d\nJumlah kolom : %d\n",
nrow(df_raw), ncol(df_raw)))## Jumlah baris : 569
## Jumlah kolom : 32
Distribusi kelas pada data mentah:
table(df_raw$diagnosis) %>%
as.data.frame() %>%
rename(Diagnosis = Var1, Frekuensi = Freq) %>%
kable(caption = "Distribusi Diagnosis (Data Mentah)") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Diagnosis | Frekuensi |
|---|---|
| Benign | 357 |
| Malignant | 212 |
DVS <- c("texture_mean", "smoothness_mean", "symmetry_mean")
COV <- "concavity_mean"
IV <- "diagnosis"
IDX_MAL <- c(2,5,13,24,77,135,172,197,223,252,
256,277,280,297,328,337,369,393,441,444,
451,460,479,512,536)
IDX_BEN <- c(76,84,92,106,113,151,266,303,313,314,
345,357,367,378,413,438,466,481,483,500,
510,513,541,547,553)
df_raw <- df_raw %>% mutate(row_id = row_number() - 1)
IDX <- c(IDX_MAL, IDX_BEN)
df <- df_raw %>%
filter(row_id %in% IDX) %>%
arrange(match(row_id, IDX)) %>%
dplyr::select(all_of(c(IV, DVS, COV)))
cat(sprintf("Total sampel : %d observasi\n", nrow(df)))## Total sampel : 50 observasi
##
## Benign Malignant
## 25 25
Metode Sampling: Fixed-index sampling berdasarkan indeks baris yang telah ditentukan sebelumnya. Sampling menghasilkan 50 observasi seimbang — 25 Malignant dan 25 Benign — untuk meminimalkan bias dan meningkatkan stabilitas analisis.
desc_stats <- df %>%
group_by(diagnosis) %>%
summarise(across(all_of(c(DVS, COV)),
list(
Mean = ~round(mean(.), 5),
SD = ~round(sd(.), 5),
Min = ~round(min(.), 5),
Max = ~round(max(.), 5)
))) %>%
pivot_longer(-diagnosis, names_to = c("Variabel", ".value"),
names_sep = "_(?=[^_]+$)") %>%
arrange(Variabel, diagnosis)
kable(desc_stats,
caption = "Statistik Deskriptif per Variabel dan Grup") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed"),
full_width = TRUE) %>%
column_spec(1, bold = TRUE)| diagnosis | Variabel | Mean | SD | Min | Max |
|---|---|---|---|---|---|
| Benign | concavity_mean | 0.05301 | 0.03194 | 0.00000 | 0.1321 |
| Malignant | concavity_mean | 0.15039 | 0.06166 | 0.02685 | 0.2810 |
| Benign | smoothness_mean | 0.09458 | 0.01240 | 0.07355 | 0.1291 |
| Malignant | smoothness_mean | 0.10181 | 0.01308 | 0.07371 | 0.1278 |
| Benign | symmetry_mean | 0.17707 | 0.02542 | 0.13860 | 0.2403 |
| Malignant | symmetry_mean | 0.18730 | 0.01989 | 0.14670 | 0.2162 |
| Benign | texture_mean | 17.57880 | 3.39616 | 10.72000 | 24.9900 |
| Malignant | texture_mean | 21.26040 | 3.79553 | 11.89000 | 28.7700 |
df_long <- df %>%
pivot_longer(cols = all_of(c(DVS, COV)),
names_to = "Variabel", values_to = "Nilai")
ggplot(df_long, aes(x = Nilai, fill = diagnosis, color = diagnosis)) +
geom_density(alpha = 0.35, linewidth = 0.9) +
geom_rug(alpha = 0.5, linewidth = 0.5) +
facet_wrap(~Variabel, scales = "free", ncol = 2) +
scale_fill_manual(values = c("Malignant" = "#c0392b", "Benign" = "#2e86c1")) +
scale_color_manual(values = c("Malignant" = "#c0392b", "Benign" = "#2e86c1")) +
labs(title = "Distribusi Variabel per Grup Diagnosis",
subtitle = "Density plot dengan rug marks",
x = "Nilai", y = "Densitas", fill = "Diagnosis", color = "Diagnosis") +
theme(legend.position = "bottom")ggplot(df_long, aes(x = diagnosis, y = Nilai, fill = diagnosis)) +
geom_boxplot(alpha = 0.7, outlier.shape = 21, outlier.size = 2) +
geom_jitter(aes(color = diagnosis), width = 0.12, alpha = 0.5, size = 1.8) +
facet_wrap(~Variabel, scales = "free_y", ncol = 2) +
scale_fill_manual(values = c("Malignant" = "#c0392b", "Benign" = "#2e86c1")) +
scale_color_manual(values = c("Malignant" = "#c0392b", "Benign" = "#2e86c1")) +
labs(title = "Boxplot Variabel per Grup Diagnosis",
subtitle = "Dengan jitter untuk menampilkan distribusi individual",
x = NULL, y = "Nilai") +
theme(legend.position = "none")Tujuan: Memverifikasi bahwa variabel-variabel dependen (DV) tidak independen satu sama lain — artinya mereka saling berkorelasi. Ini merupakan syarat mendasar MANCOVA: jika DV tidak berkorelasi sama sekali, analisis multivariat tidak memberikan keuntungan dibanding analisis univariat terpisah.
Metode: Bartlett’s Test of Sphericity mengasumsikan H₀: matriks korelasi = matriks identitas (DV-DV tidak berkorelasi). Kita ingin menolak H₀ untuk membuktikan adanya korelasi.
Keputusan: Jika p-value < 0.05, maka H₀ ditolak → DV-DV saling berkorelasi → asumsi terpenuhi.
R_matrix <- cor(df[, DVS])
sphericity <- cortest.bartlett(R_matrix, n = nrow(df))
tibble(
Statistik = c("Chi-square", "Degrees of Freedom", "p-value"),
Nilai = c(round(sphericity$chisq, 4),
sphericity$df,
format(sphericity$p.value, scientific = TRUE, digits = 4))
) %>%
kable(caption = "Hasil Bartlett's Test of Sphericity") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Statistik | Nilai |
|---|---|
| Chi-square | 24.0657 |
| Degrees of Freedom | 3 |
| p-value | 2.42e-05 |
Interpretasi: p-value = 2.42e-05 < 0.05.
H₀ ditolak → DV-DV saling berkorelasi secara
signifikan.
✔ ASUMSI 1 TERPENUHI
R_round <- round(R_matrix, 4)
kable(R_round, caption = "Matriks Korelasi antar DV") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| texture_mean | smoothness_mean | symmetry_mean | |
|---|---|---|---|
| texture_mean | 1.0000 | -0.1386 | -0.0124 |
| smoothness_mean | -0.1386 | 1.0000 | 0.6184 |
| symmetry_mean | -0.0124 | 0.6184 | 1.0000 |
ggcorrplot(R_matrix,
method = "circle",
type = "lower",
lab = TRUE,
lab_size = 4,
colors = c("#c0392b","white","#2e86c1"),
title = "Matriks Korelasi Antar DV",
ggtheme = theme_minimal())Tujuan: Memastikan bahwa matriks kovarians antar kelompok IV (Malignant vs Benign) adalah sama (homogen). Pelanggaran asumsi ini mempengaruhi ketepatan uji F pada MANCOVA.
Metode: Box’s M Test. H₀: semua grup memiliki matriks kovarians yang sama.
Keputusan: Jika p-value ≥ 0.05, maka H₀ gagal ditolak → matriks kovarians homogen → asumsi terpenuhi.
Box’s M sangat sensitif terhadap pelanggaran normalitas. p-value > 0.001 umumnya sudah dianggap aman dalam praktik.
bm <- boxM(df[, DVS], df[[IV]])
tibble(
Statistik = c("Box's M (Chi-Sq approx.)", "df", "p-value"),
Nilai = c(round(as.numeric(bm$statistic), 4),
as.numeric(bm$parameter),
round(as.numeric(bm$p.value), 4))
) %>%
kable(caption = "Hasil Box's M Test") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Statistik | Nilai |
|---|---|
| Box’s M (Chi-Sq approx.) | 3.5164 |
| df | 6.0000 |
| p-value | 0.7418 |
Interpretasi: p-value = 0.7418 ≥ 0.05.
H₀ gagal ditolak → Matriks kovarians antar grup
homogen.
✔ ASUMSI 2 TERPENUHI
cov_mal <- cov(df[df$diagnosis == "Malignant", DVS])
cov_ben <- cov(df[df$diagnosis == "Benign", DVS])
kable(round(cov_mal, 6), caption = "Matriks Kovarians — Malignant") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| texture_mean | smoothness_mean | symmetry_mean | |
|---|---|---|---|
| texture_mean | 14.406037 | -0.023032 | -0.020564 |
| smoothness_mean | -0.023032 | 0.000171 | 0.000160 |
| symmetry_mean | -0.020564 | 0.000160 | 0.000395 |
kable(round(cov_ben, 6), caption = "Matriks Kovarians — Benign") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| texture_mean | smoothness_mean | symmetry_mean | |
|---|---|---|---|
| texture_mean | 11.533911 | -0.005765 | -0.001420 |
| smoothness_mean | -0.005765 | 0.000154 | 0.000185 |
| symmetry_mean | -0.001420 | 0.000185 | 0.000646 |
# Visualisasi perbedaan varians antar grup
var_compare <- bind_rows(
as.data.frame(cov_mal) %>% mutate(Var = rownames(cov_mal), Grup = "Malignant"),
as.data.frame(cov_ben) %>% mutate(Var = rownames(cov_ben), Grup = "Benign")
) %>%
rowwise() %>%
mutate(Variansi = get(Var)) %>%
ungroup() %>%
dplyr::select(Var, Grup, Variansi)
ggplot(var_compare, aes(x = Var, y = Variansi, fill = Grup)) +
geom_bar(stat = "identity", position = "dodge", width = 0.6, alpha = 0.85) +
scale_fill_manual(values = c("Malignant" = "#c0392b", "Benign" = "#2e86c1")) +
labs(title = "Perbandingan Variansi per DV dan Grup",
subtitle = "Diagonal matriks kovarians (variansi masing-masing DV)",
x = "Variabel Dependen", y = "Variansi", fill = "Diagnosis") +
theme(legend.position = "bottom")Tujuan: Memastikan bahwa gabungan DV mengikuti distribusi normal multivariat. MANCOVA mengasumsikan residual berdistribusi normal secara multivariat.
Metode: Mardia’s Test — menguji skewness (kemencengan) dan kurtosis (keruncingan) multivariat secara terpisah.
Keputusan: Kedua p-value (skewness dan kurtosis) harus ≥ 0.05 untuk menyatakan normalitas multivariat terpenuhi.
mardia_result <- psych::mardia(df[, DVS], plot = FALSE)
p_skew <- mardia_result$p.skew
z_kurt <- mardia_result$kurtosis
p_kurt <- 2 * (1 - pnorm(abs(z_kurt)))
tibble(
Komponen = c("Mardia Skewness", "Mardia Kurtosis"),
Statistik = c(round(mardia_result$b1p, 4), round(mardia_result$b2p, 4)),
`Z / Chi²` = c("—", round(z_kurt, 4)),
`p-value` = c(round(p_skew, 4), round(p_kurt, 4)),
Status = c(
ifelse(p_skew >= 0.05, "Normal ✔", "Tidak Normal ✘"),
ifelse(p_kurt >= 0.05, "Normal ✔", "Tidak Normal ✘")
)
) %>%
kable(caption = "Hasil Mardia's Test — Normalitas Multivariat") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
column_spec(5, bold = TRUE,
color = ifelse(c(p_skew, p_kurt) >= 0.05, "#1e8449", "#c0392b"))| Komponen | Statistik | Z / Chi² | p-value | Status |
|---|---|---|---|---|
| Mardia Skewness | 0.9458 | — | 0.6404 | Normal ✔ |
| Mardia Kurtosis | 13.3407 | -1.0711 | 0.2841 | Normal ✔ |
Interpretasi: p-skewness = 0.6404 ≥ 0.05 dan
p-kurtosis = 0.2841 ≥ 0.05.
Keduanya tidak signifikan → Data memenuhi normalitas
multivariat.
✔ ASUMSI 3 TERPENUHI
Sebagai verifikasi tambahan, uji normalitas univariat dilakukan per variabel:
sw_results <- map_df(DVS, function(v) {
sw <- shapiro.test(df[[v]])
tibble(
Variabel = v,
W = round(sw$statistic, 5),
`p-value` = round(sw$p.value, 5),
Status = ifelse(sw$p.value >= 0.05, "Normal ✔", "Tidak Normal ✘")
)
})
kable(sw_results, caption = "Shapiro-Wilk Test per Variabel Dependen") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Variabel | W | p-value | Status |
|---|---|---|---|
| texture_mean | 0.98849 | 0.90475 | Normal ✔ |
| smoothness_mean | 0.97732 | 0.44547 | Normal ✔ |
| symmetry_mean | 0.98450 | 0.75012 | Normal ✔ |
Tujuan: Memastikan bahwa covariate
(concavity_mean) memiliki hubungan linear
dengan setiap DV. Jika hubungannya tidak linear, memasukkan covariate
sebagai kontrol linear tidak akan efektif.
Metode: Uji korelasi Pearson antara covariate dan masing-masing DV.
Keputusan: Jika p-value < 0.05, maka ada hubungan linear yang signifikan → asumsi terpenuhi.
lin_results <- map_df(DVS, function(v) {
ct <- cor.test(df[[COV]], df[[v]])
tibble(
DV = v,
r = round(ct$estimate, 4),
`t-stat` = round(ct$statistic, 4),
df = ct$parameter,
`p-value` = round(ct$p.value, 6),
Status = ifelse(ct$p.value < 0.05,
"Linear Signifikan ✔",
"Tidak Linear ✘")
)
})
kable(lin_results, caption = "Hasil Uji Linearitas: Covariate vs DV") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
column_spec(6, bold = TRUE, color = "#1e8449")| DV | r | t-stat | df | p-value | Status |
|---|---|---|---|---|---|
| texture_mean | 0.3750 | 2.8024 | 48 | 0.007294 | Linear Signifikan ✔ |
| smoothness_mean | 0.4534 | 3.5247 | 48 | 0.000943 | Linear Signifikan ✔ |
| symmetry_mean | 0.4436 | 3.4293 | 48 | 0.001252 | Linear Signifikan ✔ |
Interpretasi: Semua p-value < 0.05 → Ketiga DV
memiliki hubungan linear signifikan dengan covariate
concavity_mean.
✔ ASUMSI 4 TERPENUHI
df_scatter <- df %>%
pivot_longer(cols = all_of(DVS), names_to = "DV", values_to = "Nilai_DV")
ggplot(df_scatter, aes(x = concavity_mean, y = Nilai_DV, color = diagnosis)) +
geom_point(alpha = 0.7, size = 2.2) +
geom_smooth(method = "lm", se = TRUE, aes(group = 1),
color = "#2c3e50", linewidth = 1, linetype = "dashed") +
facet_wrap(~DV, scales = "free_y", ncol = 3) +
scale_color_manual(values = c("Malignant" = "#c0392b", "Benign" = "#2e86c1")) +
labs(title = "Linearitas: concavity_mean vs Setiap DV",
subtitle = "Garis regresi linear keseluruhan ditampilkan",
x = "concavity_mean (Covariate)",
y = "Nilai DV",
color = "Diagnosis") +
theme(legend.position = "bottom")Tujuan: Memastikan bahwa setiap observasi bersifat independen — nilai satu observasi tidak mempengaruhi atau dipengaruhi oleh observasi lainnya.
Catatan: Asumsi ini tidak diuji secara statistik, melainkan dievaluasi berdasarkan desain pengumpulan data.
Evaluasi independensi observasi berdasarkan struktur dataset:
| Kriteria | Evaluasi | Status |
|---|---|---|
| Unit observasi berbeda | Setiap baris mewakili pasien yang berbeda (bukan repeated measures) | ✔ |
| Tidak ada tumpang tindih antar grup | Setiap pasien hanya termasuk satu grup (Malignant ATAU Benign) | ✔ |
| Proses sampling tidak sistematis | Fixed-index sampling tanpa dependensi antar baris | ✔ |
| Sumber data independen | Data Wisconsin BC dari UCI: setiap observasi dikumpulkan secara independen | ✔ |
Kesimpulan: Berdasarkan desain studi dan sumber
data, tidak ada indikasi dependensi antar observasi. Setiap entri data
mewakili satu pasien unik, tanpa pengukuran berulang maupun
pengelompokan bersarang.
✔ ASUMSI 5 TERPENUHI
summary_df <- tibble(
No = 1:5,
Asumsi = c("Dependensi antar DV",
"Homogenitas Matriks Kovarians",
"Normalitas Multivariat",
"Linearitas Covariate–DV",
"Independensi Observasi"),
Metode = c("Bartlett's Test of Sphericity",
"Box's M Test",
"Mardia's Test",
"Pearson Correlation",
"Evaluasi Desain"),
`p-value` = c(
format(sphericity$p.value, scientific = TRUE, digits = 3),
as.character(round(bm$p.value, 4)),
paste0("skew=", round(p_skew,4), " / kurt=", round(p_kurt,4)),
paste0("maks=", round(max(lin_results[["p-value"]]), 4)),
"N/A"
),
Keputusan = c("H₀ Ditolak", "H₀ Gagal Ditolak",
"H₀ Gagal Ditolak", "H₀ Ditolak", "—"),
Status = rep("✔ TERPENUHI", 5)
)
kable(summary_df,
caption = "Ringkasan Uji Asumsi MANCOVA — Breast Cancer Dataset") %>%
kable_styling(bootstrap_options = c("striped","hover","bordered"),
full_width = TRUE) %>%
column_spec(6, bold = TRUE, color = "#1e8449") %>%
row_spec(0, bold = TRUE, background = "#1a5276", color = "white")| No | Asumsi | Metode | p-value | Keputusan | Status |
|---|---|---|---|---|---|
| 1 | Dependensi antar DV | Bartlett’s Test of Sphericity | 2.42e-05 | H₀ Ditolak | ✔ TERPENUHI |
| 2 | Homogenitas Matriks Kovarians | Box’s M Test | 0.7418 | H₀ Gagal Ditolak | ✔ TERPENUHI |
| 3 | Normalitas Multivariat | Mardia’s Test | skew=0.6404 / kurt=0.2841 | H₀ Gagal Ditolak | ✔ TERPENUHI |
| 4 | Linearitas Covariate–DV | Pearson Correlation | maks=0.0073 | H₀ Ditolak | ✔ TERPENUHI |
| 5 | Independensi Observasi | Evaluasi Desain | N/A | — | ✔ TERPENUHI |
Seluruh 5 asumsi MANCOVA telah terpenuhi berdasarkan pengujian yang dilakukan:
concavity_mean memiliki hubungan linear
dengan semua DV (semua p < 0.05)Dataset dan variabel yang dipilih layak untuk dianalisis menggunakan MANCOVA.