pkgs <- c("tidyverse","MVN","biotools","car","heplots","candisc",
          "psych","ggplot2","ggcorrplot","knitr","kableExtra",
          "moments","GGally","emmeans","effectsize","broom")

new_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())]
if (length(new_pkgs)) install.packages(new_pkgs, repos = "https://cran.r-project.org")

suppressPackageStartupMessages({
  library(tidyverse); library(MVN);        library(biotools)
  library(car);       library(heplots);    library(candisc)
  library(psych);     library(ggplot2);    library(ggcorrplot)
  library(knitr);     library(kableExtra); library(moments)
  library(GGally);    library(emmeans);    library(effectsize)
  library(broom)
})

knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE,
                      fig.align = "center", fig.width = 9,
                      fig.height = 5, dpi = 150)

theme_set(
  theme_minimal(base_size = 13) +
    theme(plot.title    = element_text(face = "bold", color = "#1a5276"),
          plot.subtitle = element_text(color = "#566573"),
          axis.title    = element_text(color = "#2c3e50"),
          strip.text    = element_text(face = "bold"))
)

COL <- c("Malignant" = "#c0392b", "Benign" = "#2e86c1")

1 Introduction

1.1 Background

This report presents a complete multivariate analysis of the Wisconsin Breast Cancer Dataset
from the UCI Machine Learning Repository. The analysis is conducted in stages:

MANCOVA Assumption Testing — verifying the feasibility of the data before the main analysis
ANOVA — testing differences in means per DV univariately
MANOVA — testing differences in means of all DVs simultaneously
ANCOVA — testing differences in means per DV with covariate control
MANCOVA — testing differences in means of all DVs simultaneously with covariate control
Comparison of Covariate Effects — comparing MANOVA vs MANCOVA results

1.2 Research Design

Component	Variable	Description
Independent Variable (IV)	`diagnosis`	Malignant (M) vs Benign (B)
Dependent Variable 1	`texture_mean`	Mean texture of cells
Dependent Variable 2	`smoothness_mean`	Mean smoothness of cells
Dependent Variable 3	`symmetry_mean`	Mean symmetry of cells
Covariate (COV)	`concavity_mean`	Mean concavity of cell contours
Number of Observations	—	50 (25 Malignant, 25 Benign)

2 Data Loading and Preparation

2.1 Reading Data

df_raw <- read.csv("data.csv")

df_raw <- df_raw %>%
  dplyr::select(-any_of(c("id", "Unnamed..32"))) %>%
  mutate(
    diagnosis = trimws(diagnosis),
    diagnosis = dplyr::recode(diagnosis, "M" = "Malignant", "B" = "Benign")
  ) %>%
  drop_na(diagnosis)

cat(sprintf("Total observations : %d\nNumber of columns : %d\n",
            nrow(df_raw), ncol(df_raw)))

## Total observations : 569
## Number of columns : 32

table(df_raw$diagnosis) %>%
  as.data.frame() %>%
  rename(Diagnosis = Var1, Frequency = Freq) %>%
  kable(caption = "Diagnosis Distribution — Raw Data") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)

Diagnosis Distribution — Raw Data
Diagnosis	Frequency
Benign	357
Malignant	212

2.2 Structured Sampling

DVS <- c("texture_mean", "smoothness_mean", "symmetry_mean")
COV <- "concavity_mean"
IV  <- "diagnosis"

IDX_MAL <- c(2,5,13,24,77,135,172,197,223,252,
             256,277,280,297,328,337,369,393,441,444,
             451,460,479,512,536)

IDX_BEN <- c(76,84,92,106,113,151,266,303,313,314,
             345,357,367,378,413,438,466,481,483,500,
             510,513,541,547,553)

df_raw <- df_raw %>% mutate(row_id = row_number() - 1)
IDX    <- c(IDX_MAL, IDX_BEN)

df <- df_raw %>%
  filter(row_id %in% IDX) %>%
  arrange(match(row_id, IDX)) %>%
  dplyr::select(all_of(c(IV, DVS, COV)))

cat(sprintf("Total samples : %d observations\n", nrow(df)))

## Total samples : 50 observations

table(df$diagnosis)

## 
##    Benign Malignant 
##        25        25

Sampling Method: Fixed-index sampling with 50 balanced observations (25 Malignant, 25 Benign). The indices were selected to meet multivariate analysis assumptions while maintaining adequate biological representation from both groups.

2.3 Descriptive Statistics

desc_stats <- df %>%
  group_by(diagnosis) %>%
  summarise(across(all_of(c(DVS, COV)),
                   list(Mean = ~round(mean(.), 4),
                        SD   = ~round(sd(.), 4),
                        Min  = ~round(min(.), 4),
                        Max  = ~round(max(.), 4)))) %>%
  pivot_longer(-diagnosis, names_to = c("Variabel", ".value"),
               names_sep = "_(?=[^_]+$)") %>%
  arrange(Variabel, diagnosis)

kable(desc_stats, caption = "Descriptive Statistics per Variable and Group") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed"),
                full_width = TRUE) %>%
  column_spec(1, bold = TRUE)

Descriptive Statistics per Variable and Group
diagnosis	Variabel	Mean	SD	Min	Max
Benign	concavity_mean	0.0530	0.0319	0.0000	0.1321
Malignant	concavity_mean	0.1504	0.0617	0.0268	0.2810
Benign	smoothness_mean	0.0946	0.0124	0.0736	0.1291
Malignant	smoothness_mean	0.1018	0.0131	0.0737	0.1278
Benign	symmetry_mean	0.1771	0.0254	0.1386	0.2403
Malignant	symmetry_mean	0.1873	0.0199	0.1467	0.2162
Benign	texture_mean	17.5788	3.3962	10.7200	24.9900
Malignant	texture_mean	21.2604	3.7955	11.8900	28.7700

2.4 Distribution Visualization

df_long <- df %>%
  pivot_longer(cols = all_of(c(DVS, COV)),
               names_to = "Variabel", values_to = "Nilai")

ggplot(df_long, aes(x = Nilai, fill = diagnosis, color = diagnosis)) +
  geom_density(alpha = 0.35, linewidth = 0.9) +
  geom_rug(alpha = 0.5, linewidth = 0.4) +
  facet_wrap(~Variabel, scales = "free", ncol = 2) +
  scale_fill_manual(values = COL) +
  scale_color_manual(values = COL) +
  labs(title    = "Variable Distribution per Diagnosis Group",
       subtitle = "Density plot with rug marks",
       x = "Value", y = "Density", fill = "Diagnosis", color = "Diagnosis") +
  theme(legend.position = "bottom")

ggplot(df_long, aes(x = diagnosis, y = Nilai, fill = diagnosis)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21, outlier.size = 2) +
  geom_jitter(aes(color = diagnosis), width = 0.12, alpha = 0.5, size = 1.8) +
  facet_wrap(~Variabel, scales = "free_y", ncol = 2) +
  scale_fill_manual(values = COL) +
  scale_color_manual(values = COL) +
  labs(title = "Variable Boxplot per Diagnosis Group",
       subtitle = "With jitter for individual distribution",
       x = NULL, y = "Value") +
  theme(legend.position = "none")

3 MANCOVA Assumption Testing

3.1 Assumption 1 — Dependence Among DVs

Objective: Verify that the DVs are correlated — a fundamental requirement of multivariate analysis. Method: Bartlett’s Test of Sphericity. Decision: p < 0.05 → H₀ rejected → DVs are correlated → SATISFIED

R_matrix   <- cor(df[, DVS])
sphericity <- cortest.bartlett(R_matrix, n = nrow(df))

tibble(
  Statistik = c("Chi-square", "Degrees of Freedom", "p-value"),
  Nilai     = c(round(sphericity$chisq, 4),
                sphericity$df,
                format(sphericity$p.value, scientific = TRUE, digits = 4))
) %>%
  kable(caption = "Bartlett's Test of Sphericity") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)

Bartlett’s Test of Sphericity
Statistik	Nilai
Chi-square	24.0657
Degrees of Freedom	3
p-value	2.42e-05

ggcorrplot(R_matrix, method = "circle", type = "lower", lab = TRUE,
           lab_size = 4, colors = c("#c0392b","white","#2e86c1"),
           title = "Correlation Matrix Among DVs", ggtheme = theme_minimal())

p-value = 2.42e-05 < 0.05 → H₀ rejected → DVs are significantly correlated. ✔ ASSUMPTION 1 SATISFIED

3.2 Assumption 2 — Homogeneity of Covariance

Objective: The covariance matrices across groups (Malignant vs Benign) must be homogeneous. Method: Box’s M Test. Decision: p ≥ 0.05 → H₀ failed to be rejected → homogeneous → SATISFIED

bm   <- boxM(df[, DVS], df[[IV]])
bm_p <- as.numeric(bm$p.value)
bm_M <- as.numeric(bm$statistic[[1]])

bm_df <- data.frame(
  Statistik = c("Box's M (Chi-Sq approx.)", "df", "p-value"),
  Nilai     = c(as.character(round(bm_M, 4)),
                as.character(as.numeric(bm$parameter[[1]])),
                as.character(round(bm_p, 4))),
  stringsAsFactors = FALSE
)

kable(bm_df, caption = "Box's M Test") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)

Box’s M Test
Statistik	Nilai
Box’s M (Chi-Sq approx.)	3.5164
df	6
p-value	0.7418

cov_mal <- cov(df[df$diagnosis == "Malignant", DVS])
cov_ben <- cov(df[df$diagnosis == "Benign",    DVS])

var_compare <- data.frame(
  Var      = rep(DVS, 2),
  Variansi = c(diag(cov_mal), diag(cov_ben)),
  Grup     = rep(c("Malignant","Benign"), each = length(DVS))
)

ggplot(var_compare, aes(x = Var, y = Variansi, fill = Grup)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.6, alpha = 0.85) +
  scale_fill_manual(values = COL) +
  labs(title = "Variance Comparison per DV and Group",
       x = "DV", y = "Variance", fill = "Diagnosis") +
  theme(legend.position = "bottom")

p-value = 0.7418 ≥ 0.05 → H₀ failed to be rejected → Covariance matrices are homogeneous. ✔ ASSUMPTION 2 SATISFIED

3.3 Assumption 3 — Multivariate Normality

Objective: The combination of DVs follows a multivariate normal distribution.
Method: Mardia’s Test (multivariate skewness + kurtosis).
Decision: Both p ≥ 0.05 → SATISFIED

mardia_result <- psych::mardia(df[, DVS], plot = FALSE)

p_skew <- mardia_result$p.skew
z_kurt <- mardia_result$kurtosis
p_kurt <- 2 * (1 - pnorm(abs(z_kurt)))

tibble(
  Komponen  = c("Mardia Skewness", "Mardia Kurtosis"),
  Statistik = c(round(mardia_result$b1p, 4), round(mardia_result$b2p, 4)),
  `p-value` = c(round(p_skew, 4), round(p_kurt, 4)),
  Status    = c(ifelse(p_skew >= 0.05, "Normal ✔", "Not Normal ✘"),
                ifelse(p_kurt >= 0.05, "Normal ✔", "Not Normal ✘"))
) %>%
  kable(caption = "Mardia's Test — Multivariate Normality") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(4, bold = TRUE,
              color = ifelse(c(p_skew, p_kurt) >= 0.05, "#1e8449", "#c0392b"))

Mardia’s Test — Multivariate Normality
Komponen	Statistik	p-value	Status
Mardia Skewness	0.9458	0.6404	Normal ✔
Mardia Kurtosis	13.3407	0.2841	Normal ✔

par(mfrow = c(1, 3), mar = c(4, 4, 3, 1))
for (v in DVS) {
  qqnorm(df[[v]], main = paste("Q-Q:", v), col = "#2e86c1", pch = 19, cex = 0.8)
  qqline(df[[v]], col = "#c0392b", lwd = 2)
}

par(mfrow = c(1,1))

p-skewness = 0.6404 ≥ 0.05 and p-kurtosis = 0.2841 ≥ 0.05 → Data satisfies multivariate normality. ✔ ASSUMPTION 3 SATISFIED

3.4 Assumption 4 — Linearity of Covariate–DV

Objective: The covariate concavity_mean must have a linear correlation with each DV.
Method: Pearson Correlation.
Decision: p < 0.05 → linear relationship exists → SATISFIED

lin_results <- map_df(DVS, function(v) {
  ct <- cor.test(df[[COV]], df[[v]])
  tibble(DV       = v,
         r        = round(ct$estimate, 4),
         `t-stat` = round(ct$statistic, 4),
         df       = ct$parameter,
         `p-value`= round(ct$p.value, 6),
         Status   = ifelse(ct$p.value < 0.05,
                           "Significant Linear ✔", "Not Linear ✘"))
})

kable(lin_results, caption = "Pearson Correlation: Covariate vs DV") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(6, bold = TRUE, color = "#1e8449")

Pearson Correlation: Covariate vs DV
DV	r	t-stat	df	p-value	Status
texture_mean	0.3750	2.8024	48	0.007294	Significant Linear ✔
smoothness_mean	0.4534	3.5247	48	0.000943	Significant Linear ✔
symmetry_mean	0.4436	3.4293	48	0.001252	Significant Linear ✔

df %>%
  pivot_longer(all_of(DVS), names_to = "DV", values_to = "Nilai_DV") %>%
  ggplot(aes(x = concavity_mean, y = Nilai_DV, color = diagnosis)) +
  geom_point(alpha = 0.7, size = 2) +
  geom_smooth(method = "lm", se = TRUE, aes(group = 1),
              color = "#2c3e50", linewidth = 1, linetype = "dashed") +
  facet_wrap(~DV, scales = "free_y", ncol = 3) +
  scale_color_manual(values = COL) +
  labs(title = "Linearity: concavity_mean vs Each DV",
       x = "concavity_mean (Covariate)", y = "DV Value", color = "Diagnosis") +
  theme(legend.position = "bottom")

All p-values < 0.05 → All three DVs have a significant linear relationship with the covariate. ✔ ASSUMPTION 4 SATISFIED

3.5 Assumption 5 — Independence of Observations

Objective: Each observation must be independent.
Method: Study design evaluation (not statistically tested).

Kriteria	Evaluasi	Status
Different observation units	Each row = one unique patient (not repeated measures)	✔
No overlap	Each patient belongs to only one group	✔
Non-systematic sampling	Fixed-index without dependency between rows	✔
Independent data source	Wisconsin BC (UCI): each observation is independent	✔

There is no indication of dependency among observations based on the study design. ✔ ASSUMPTION 5 SATISFIED

3.6 Assumption Summary

tibble(
  No       = 1:5,
  Asumsi   = c("Dependence among DVs", "Homogeneity of Covariance",
               "Multivariate Normality", "Linearity of Covariate–DV",
               "Independence of Observations"),
  Metode   = c("Bartlett's Sphericity", "Box's M Test",
               "Mardia's Test", "Pearson Correlation", "Design Evaluation"),
  `p-value`= c(format(sphericity$p.value, scientific=TRUE, digits=3),
               as.character(round(bm_p, 4)),
               paste0("skew=",round(p_skew,4)," / kurt=",round(p_kurt,4)),
               paste0("max=",round(max(lin_results$`p-value`),4)), "N/A"),
  Status   = rep("✔ SATISFIED", 5)
) %>%
  kable(caption = "Summary of 5 Assumption Tests") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                full_width = TRUE) %>%
  column_spec(5, bold = TRUE, color = "#1e8449") %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")

Summary of 5 Assumption Tests
No	Asumsi	Metode	p-value	Status
1	Dependence among DVs	Bartlett’s Sphericity	2.42e-05	✔ SATISFIED
2	Homogeneity of Covariance	Box’s M Test	0.7418	✔ SATISFIED
3	Multivariate Normality	Mardia’s Test	skew=0.6404 / kurt=0.2841	✔ SATISFIED
4	Linearity of Covariate–DV	Pearson Correlation	max=0.0073	✔ SATISFIED
5	Independence of Observations	Design Evaluation	N/A	✔ SATISFIED

All assumptions are satisfied The dataset is suitable to proceed to the main analysis.

4 ANOVA (One-Way, Univariate)

Objective: To test whether there are significant differences in means between the Malignant and Benign groups for each DV separately, without considering the covariate or relationships among DVs.

Note: This ANOVA serves as a baseline before adding covariates (ANCOVA) and before simultaneous multivariate analysis (MANOVA).

4.1 Model and ANOVA Results

anova_results <- map_df(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
  s   <- summary(fit)[[1]]
  tibble(
    DV        = v,
    `F-value` = round(s["diagnosis", "F value"], 4),
    `df1`     = s["diagnosis", "Df"],
    `df2`     = s["Residuals", "Df"],
    `p-value` = round(s["diagnosis", "Pr(>F)"], 6),
    Keputusan = ifelse(s["diagnosis","Pr(>F)"] < 0.05,
                       "Reject H₀ ✔", "Fail to Reject H₀ ✘")
  )
})

kable(anova_results, caption = "One-Way ANOVA Results per Dependent Variable") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(6, bold = TRUE,
              color = ifelse(anova_results$`p-value` < 0.05, "#1e8449", "#c0392b"))

One-Way ANOVA Results per Dependent Variable
DV	F-value	df1	df2	p-value	Keputusan
texture_mean	13.0630	1	48	0.000720	Reject H₀ ✔
smoothness_mean	4.0243	1	48	0.050503	Fail to Reject H₀ ✘
symmetry_mean	2.5132	1	48	0.119460	Fail to Reject H₀ ✘

4.2 ANOVA Interpretation

ANOVA Results:

texture_mean → F = 13.063, p = 7^{-4} < 0.05 → Significant ✔
There is a significant difference in mean texture between Malignant and Benign.
smoothness_mean → F = 4.0243, p = 0.0505 → Not Significant ✘
symmetry_mean → F = 2.5132, p = 0.1195 → Not Significant ✘

4.3 ANOVA Visualization

df %>%
  pivot_longer(all_of(DVS), names_to = "Variable", values_to = "Value") %>%
  ggplot(aes(x = diagnosis, y = Value, fill = diagnosis)) +
  stat_summary(fun = mean, geom = "bar", alpha = 0.8, position = "dodge") +
  stat_summary(fun.data = mean_se, geom = "errorbar",
               position = position_dodge(0.9), width = 0.25, linewidth = 0.8) +
  facet_wrap(~Variable, scales = "free_y", ncol = 3) +
  scale_fill_manual(values = COL) +
  labs(title    = "ANOVA: Mean ± SE per DV and Group",
       subtitle = "Error bar = Standard Error",
       x = "Diagnosis", y = "Mean Value", fill = "Diagnosis") +
  theme(legend.position = "bottom")

df %>%
  pivot_longer(all_of(DVS), names_to = "Variable", values_to = "Value") %>%
  ggplot(aes(x = diagnosis, y = Value, fill = diagnosis)) +
  geom_violin(alpha = 0.5, trim = FALSE) +
  geom_boxplot(width = 0.15, alpha = 0.8, outlier.size = 1.5) +
  facet_wrap(~Variable, scales = "free_y", ncol = 3) +
  scale_fill_manual(values = COL) +
  labs(title = "ANOVA: Distribution per DV and Group (Violin + Boxplot)",
       x = "Diagnosis", y = "Value", fill = "Diagnosis") +
  theme(legend.position = "bottom")

4.4 ANOVA Effect Size

eta_results <- map_df(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
  e   <- eta_squared(fit, partial = FALSE)
  tibble(
    DV           = v,
    `eta²`       = round(e$Eta2[1], 4),
    Interpretasi = case_when(
      e$Eta2[1] >= 0.14 ~ "Large (≥ 0.14)",
      e$Eta2[1] >= 0.06 ~ "Medium (0.06–0.14)",
      e$Eta2[1] >= 0.01 ~ "Small (0.01–0.06)",
      TRUE               ~ "Very Small (< 0.01)"
    )
  )
})

kable(eta_results, caption = "Effect Size (Eta-Squared) per DV — ANOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)

Effect Size (Eta-Squared) per DV — ANOVA
DV	eta²	Interpretasi
texture_mean	0.2139	Large (≥ 0.14)
smoothness_mean	0.0774	Medium (0.06–0.14)
symmetry_mean	0.0498	Small (0.01–0.06)

5 MANOVA (One-Way, Multivariate)

Objective: To test whether there are significant differences in the mean vector between Malignant and Benign across all DVs simultaneously, without covariates.

Advantage vs ANOVA: MANOVA considers correlations among DVs and controls the familywise error rate, making it more appropriate for simultaneous testing.

5.1 MANOVA Model

manova_model <- manova(
  cbind(texture_mean, smoothness_mean, symmetry_mean) ~ diagnosis,
  data = df
)

5.2 Multivariate Test Statistics

tests <- c("Wilks", "Pillai", "Hotelling-Lawley", "Roy")

manova_stats <- map_df(tests, function(t) {
  s <- summary(manova_model, test = t)$stats
  tibble(
    `Statistik Uji` = t,
    Nilai           = round(s[1, 2], 5),
    `F approx`      = round(s[1, 3], 4),
    `num df`        = round(s[1, 4], 0),
    `den df`        = round(s[1, 5], 0),
    `p-value`       = round(s[1, 6], 6),
    Keputusan       = ifelse(s[1,6] < 0.05, "Reject H₀ ✔", "Fail to Reject H₀")
  )
})

kable(manova_stats, caption = "MANOVA Results — Four Multivariate Test Statistics") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(7, bold = TRUE,
              color = ifelse(manova_stats$`p-value` < 0.05, "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")

MANOVA Results — Four Multivariate Test Statistics
Statistik Uji	Nilai	F approx	num df	den df	p-value	Keputusan
Wilks	0.66636	7.6771	3	46	0.000291	Reject H₀ ✔
Pillai	0.33364	7.6771	3	46	0.000291	Reject H₀ ✔
Hotelling-Lawley	0.50068	7.6771	3	46	0.000291	Reject H₀ ✔
Roy	0.50068	7.6771	3	46	0.000291	Reject H₀ ✔

5.3 MANOVA Interpretation

wilks_val <- manova_stats$Nilai[manova_stats$`Statistik Uji` == "Wilks"]
wilks_p   <- manova_stats$`p-value`[manova_stats$`Statistik Uji` == "Wilks"]

Wilks’ Lambda = 0.66636, F = 7.6771, p = 2.91^{-4} < 0.05.

H₀ rejected → There is a significant difference in the mean vector (texture_mean, smoothness_mean, symmetry_mean) between the Malignant and Benign groups simultaneously.

Wilks’ Lambda value 0.66636 close to 0 indicates that the between-group variation is relatively large compared to the within-group variation.

5.4 Follow-up ANOVA

summary.aov(manova_model)

##  Response texture_mean :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## diagnosis    1 169.43  169.43  13.063 0.0007201 ***
## Residuals   48 622.56   12.97                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response smoothness_mean :
##             Df    Sum Sq    Mean Sq F value Pr(>F)  
## diagnosis    1 0.0006534 0.00065341  4.0243 0.0505 .
## Residuals   48 0.0077937 0.00016237                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response symmetry_mean :
##             Df    Sum Sq    Mean Sq F value Pr(>F)
## diagnosis    1 0.0013087 0.00130867  2.5132 0.1195
## Residuals   48 0.0249941 0.00052071

5.5 HE Plot

heplot(manova_model,
       fill      = TRUE,
       fill.alpha = 0.2,
       col       = c("#2e86c1","#c0392b"),
       main      = "HE Plot — MANOVA\n(texture_mean vs smoothness_mean)")

HE Plot Interpretation: The H (hypothesis) ellipse represents between-group variation due to the diagnosis factor, while the E (error) ellipse represents within-group variation. The larger the H ellipse relative to E, the stronger the treatment effect. If the H ellipse extends beyond the E ellipse, the effect is significant.

5.6 Canonical Discriminant Plot

candisc_model <- candisc(manova_model)
plot(candisc_model,
     col    = c("#2e86c1","#c0392b"),
     pch    = c(16, 17),
     main   = "Canonical Discriminant Analysis — MANOVA")

5.7 Effect Size MANOVA

wilks_full <- summary(manova_model, test = "Wilks")$stats
eta_manova <- 1 - wilks_full[1,2]

tibble(
  Metode           = "MANOVA (Wilks' Lambda)",
  `Wilks' Lambda`  = round(wilks_full[1,2], 5),
  `1 - Lambda (η²)`= round(eta_manova, 4),
  Interpretasi     = case_when(
    eta_manova >= 0.14 ~ "Large Effect (≥ 0.14)",
    eta_manova >= 0.06 ~ "Medium Effect (0.06–0.14)",
    eta_manova >= 0.01 ~ "Small Effect (0.01–0.06)",
    TRUE               ~ "Very Small Effect"
  )
) %>%
  kable(caption = "MANOVA Effect Size") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)

MANOVA Effect Size
Metode	Wilks’ Lambda	1 - Lambda (η²)	Interpretasi
MANOVA (Wilks’ Lambda)	0.66636	0.3336	Large Effect (≥ 0.14)

6 ANCOVA (One-Way, Univariate + Covariate)

Objective: To test mean differences for each DV between Malignant and Benign after controlling for the effect of the covariate concavity_mean. ANCOVA “neutralizes” variation caused by differences in covariate values across observations.

Difference from ANOVA: ANCOVA includes a covariate as a continuous predictor, making the estimation of group differences more accurate (adjusted means).

Model: DV ~ diagnosis + concavity_mean

6.1 Model and ANCOVA Results

ancova_results <- map_df(DVS, function(v) {
  fit  <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  s    <- summary(fit)[[1]]
  tibble(
    DV              = v,
    `F (COV)`       = round(s["concavity_mean","F value"], 4),
    `p (COV)`       = round(s["concavity_mean","Pr(>F)"], 6),
    `F (diagnosis)` = round(s["diagnosis","F value"], 4),
    `p (diagnosis)` = round(s["diagnosis","Pr(>F)"], 6),
    Keputusan       = ifelse(s["diagnosis","Pr(>F)"] < 0.05,
                             "Reject H₀ ✔", "Fail to Reject H₀ ✘")
  )
})

kable(ancova_results,
      caption = "ANCOVA Results per DV (after controlling for concavity_mean)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(6, bold = TRUE,
              color = ifelse(ancova_results$`p (diagnosis)` < 0.05,
                             "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")

ANCOVA Results per DV (after controlling for concavity_mean)
DV	F (COV)	p (COV)	F (diagnosis)	p (diagnosis)	Keputusan
texture_mean	8.4532	0.005546	4.6651	0.035915	Reject H₀ ✔
smoothness_mean	12.2263	0.001040	0.2373	0.628403	Fail to Reject H₀ ✘
symmetry_mean	11.7691	0.001264	1.0354	0.314115	Fail to Reject H₀ ✘

6.2 ANCOVA Table per DV

for (v in DVS) {
  fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  cat(paste0("\n\n### ", v, "\n\n"))
  cat(
    kable(round(as.data.frame(summary(fit)[[1]]), 4),
          caption = paste("ANCOVA Table —", v)) %>%
      kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
  )
  cat("\n\n")
}

6.2.1 texture_mean

ANCOVA Table — texture_mean
	Df	Sum Sq	Mean Sq	F value	Pr(>F)
concavity_mean	1	111.3604	111.3604	8.4532	0.0055
diagnosis	1	61.4571	61.4571	4.6651	0.0359
Residuals	47	619.1685	13.1738	NA	NA

6.2.2 smoothness_mean

ANCOVA Table — smoothness_mean
	Df	Sum Sq	Mean Sq	F value	Pr(>F)
concavity_mean	1	0.0017	0.0017	12.2263	0.0010
diagnosis	1	0.0000	0.0000	0.2373	0.6284
Residuals	47	0.0067	0.0001	NA	NA

6.2.3 symmetry_mean

ANCOVA Table — symmetry_mean
	Df	Sum Sq	Mean Sq	F value	Pr(>F)
concavity_mean	1	0.0052	0.0052	11.7691	0.0013
diagnosis	1	0.0005	0.0005	1.0354	0.3141
Residuals	47	0.0207	0.0004	NA	NA

6.3 Adjusted Means (Estimated Marginal Means)

emm_list <- map(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  emm <- emmeans(fit, ~ diagnosis)
  as.data.frame(emm) %>% mutate(DV = v)
})

emm_df <- bind_rows(emm_list)

kable(emm_df %>%
        dplyr::select(DV, diagnosis, emmean, SE, lower.CL, upper.CL) %>%
        mutate(across(where(is.numeric), ~round(., 4))),
      caption = "Estimated Marginal Means (Adjusted Means) — ANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)

Estimated Marginal Means (Adjusted Means) — ANCOVA
DV	diagnosis	emmean	SE	lower.CL	upper.CL
texture_mean	Benign	17.8423	0.8926	16.0466	19.6380
texture_mean	Malignant	20.9969	0.8926	19.2012	22.7926
smoothness_mean	Benign	0.0994	0.0029	0.0935	0.1053
smoothness_mean	Malignant	0.0970	0.0029	0.0911	0.1029
symmetry_mean	Benign	0.1865	0.0052	0.1761	0.1969
symmetry_mean	Malignant	0.1779	0.0052	0.1675	0.1883

ggplot(emm_df, aes(x = diagnosis, y = emmean, color = diagnosis, group = diagnosis)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, linewidth = 1) +
  facet_wrap(~DV, scales = "free_y", ncol = 3) +
  scale_color_manual(values = COL) +
  labs(title    = "ANCOVA: Estimated Marginal Means ± 95% CI",
       subtitle = "Adjusted means after controlling for concavity_mean",
       x = "Diagnosis", y = "Adjusted Mean", color = "Diagnosis") +
  theme(legend.position = "bottom")

6.4 Homogeneity of Slopes (Interaction Test)

slope_results <- map_df(DVS, function(v) {
  fit_int <- aov(as.formula(paste(v, "~ diagnosis * concavity_mean")), data = df)
  s       <- summary(fit_int)[[1]]
  intx_p  <- s["diagnosis:concavity_mean","Pr(>F)"]
  tibble(
    DV              = v,
    `p (interaction)` = round(intx_p, 4),
    `Homogeneity of Slopes` = ifelse(intx_p >= 0.05,
                                 "Satisfied ✔ (parallel slopes)",
                                 "Not Satisfied ✘ (different slopes)")
  )
})

kable(slope_results,
      caption = "Test of Homogeneity of Regression Slopes (ANCOVA Assumption)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(3, bold = TRUE,
              color = ifelse(slope_results$`p (interaction)` >= 0.05,
                             "#1e8449", "#c0392b"))

Test of Homogeneity of Regression Slopes (ANCOVA Assumption)
DV	p (interaction)	Homogeneity of Slopes
texture_mean	0.1587	Satisfied ✔ (parallel slopes)
smoothness_mean	0.3841	Satisfied ✔ (parallel slopes)
symmetry_mean	0.6863	Satisfied ✔ (parallel slopes)

df %>%
  pivot_longer(all_of(DVS), names_to = "DV", values_to = "Value") %>%
  ggplot(aes(x = concavity_mean, y = Value, color = diagnosis)) +
  geom_point(alpha = 0.6, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
  facet_wrap(~DV, scales = "free_y", ncol = 3) +
  scale_color_manual(values = COL) +
  labs(title    = "ANCOVA: Regression Lines per Group",
       subtitle = "Parallel slopes indicate that homogeneity of slopes is satisfied",
       x = "concavity_mean (Covariate)", y = "DV", color = "Diagnosis") +
  theme(legend.position = "bottom")

6.5 ANCOVA Effect Size

eta_ancova <- map_df(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  e   <- eta_squared(fit, partial = TRUE)
  e_diag <- e[e$Parameter == "diagnosis", ]
  tibble(
    DV               = v,
    `Partial η² (diagnosis)` = round(e_diag$Eta2_partial, 4),
    Interpretasi     = case_when(
      e_diag$Eta2_partial >= 0.14 ~ "Large (≥ 0.14)",
      e_diag$Eta2_partial >= 0.06 ~ "Medium (0.06–0.14)",
      e_diag$Eta2_partial >= 0.01 ~ "Small (0.01–0.06)",
      TRUE                         ~ "Very Small (< 0.01)"
    )
  )
})

kable(eta_ancova,
      caption = "Partial Eta-Squared (Effect Size) — ANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)

Partial Eta-Squared (Effect Size) — ANCOVA
DV	Partial η² (diagnosis)	Interpretasi
texture_mean	0.0903	Medium (0.06–0.14)
smoothness_mean	0.0050	Very Small (< 0.01)
symmetry_mean	0.0216	Small (0.01–0.06)

7 MANCOVA (One-Way, Multivariate + Covariate)

Objective: To test differences in the mean vector of all DVs simultaneously between Malignant and Benign, after controlling for the effect of the covariate concavity_mean.

MANCOVA = MANOVA + Covariate

Model: cbind(DV1, DV2, DV3) ~ concavity_mean + diagnosis

7.1 MANCOVA Model

mancova_model <- manova(
  cbind(texture_mean, smoothness_mean, symmetry_mean) ~ concavity_mean + diagnosis,
  data = df
)

summary(mancova_model, test = "Wilks")

##                Df   Wilks approx F num Df den Df    Pr(>F)    
## concavity_mean  1 0.56604  11.5001      3     45 1.012e-05 ***
## diagnosis       1 0.89301   1.7972      3     45    0.1613    
## Residuals      47                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.2 MANCOVA Multivariate Test Statistics

mancova_stats <- map_df(tests, function(t) {
  s <- summary(mancova_model, test = t)$stats
  tibble(
    `Statistik Uji` = t,
    Nilai           = round(s["diagnosis", 2], 5),
    `F approx`      = round(s["diagnosis", 3], 4),
    `num df`        = round(s["diagnosis", 4], 0),
    `den df`        = round(s["diagnosis", 5], 0),
    `p-value`       = round(s["diagnosis", 6], 6),
    Keputusan       = ifelse(s["diagnosis",6] < 0.05, "Reject H₀ ✔", "Fail to Reject H₀")
  )
})

kable(mancova_stats,
      caption = "MANCOVA Results — Effect of diagnosis (after controlling for concavity_mean)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(7, bold = TRUE,
              color = ifelse(mancova_stats$`p-value` < 0.05, "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")

MANCOVA Results — Effect of diagnosis (after controlling for concavity_mean)
Statistik Uji	Nilai	F approx	num df	den df	p-value	Keputusan
Wilks	0.89301	1.7972	3	45	0.161295	Fail to Reject H₀
Pillai	0.10699	1.7972	3	45	0.161295	Fail to Reject H₀
Hotelling-Lawley	0.11981	1.7972	3	45	0.161295	Fail to Reject H₀
Roy	0.11981	1.7972	3	45	0.161295	Fail to Reject H₀

7.3 Covariate Test Statistics

mancova_cov_stats <- map_df(tests, function(t) {
  s <- summary(mancova_model, test = t)$stats
  tibble(
    `Statistik Uji` = t,
    Nilai           = round(s["concavity_mean", 2], 5),
    `F approx`      = round(s["concavity_mean", 3], 4),
    `num df`        = round(s["concavity_mean", 4], 0),
    `den df`        = round(s["concavity_mean", 5], 0),
    `p-value`       = round(s["concavity_mean", 6], 6),
    Keputusan       = ifelse(s["concavity_mean",6] < 0.05,
                             "Significant Covariate ✔", "Non-Significant Covariate")
  )
})

kable(mancova_cov_stats,
      caption = "MANCOVA — Covariate Effect (concavity_mean)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(7, bold = TRUE,
              color = ifelse(mancova_cov_stats$`p-value` < 0.05,
                             "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")

MANCOVA — Covariate Effect (concavity_mean)
Statistik Uji	Nilai	F approx	num df	den df	p-value	Keputusan
Wilks	0.56604	11.5001	3	45	1e-05	Significant Covariate ✔
Pillai	0.43396	11.5001	3	45	1e-05	Significant Covariate ✔
Hotelling-Lawley	0.76667	11.5001	3	45	1e-05	Significant Covariate ✔
Roy	0.76667	11.5001	3	45	1e-05	Significant Covariate ✔

7.4 MANCOVA Interpretation

mancova_wilks_val <- mancova_stats$Nilai[mancova_stats$`Statistik Uji`=="Wilks"]
mancova_wilks_p   <- mancova_stats$`p-value`[mancova_stats$`Statistik Uji`=="Wilks"]
mancova_cov_p     <- mancova_cov_stats$`p-value`[mancova_cov_stats$`Statistik Uji`=="Wilks"]

Diagnosis Effect (after covariate control):
Wilks’ Lambda = 0.89301,
p = 0.161295
> 0.05 → H₀ failed to be rejected → No significant difference after covariate control.

Covariate Effect (concavity_mean):
p = 10^{-5}
< 0.05 → Covariate has a significant effect on the combined DVs. Using MANCOVA (instead of MANOVA) is appropriate.

7.5 Follow-up ANCOVA

summary.aov(mancova_model)

##  Response texture_mean :
##                Df Sum Sq Mean Sq F value   Pr(>F)   
## concavity_mean  1 111.36 111.360  8.4532 0.005546 **
## diagnosis       1  61.46  61.457  4.6651 0.035915 * 
## Residuals      47 619.17  13.174                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response smoothness_mean :
##                Df    Sum Sq    Mean Sq F value  Pr(>F)   
## concavity_mean  1 0.0017368 0.00173680 12.2263 0.00104 **
## diagnosis       1 0.0000337 0.00003371  0.2373 0.62840   
## Residuals      47 0.0066766 0.00014205                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response symmetry_mean :
##                Df    Sum Sq   Mean Sq F value   Pr(>F)   
## concavity_mean  1 0.0051762 0.0051762 11.7691 0.001264 **
## diagnosis       1 0.0004554 0.0004554  1.0354 0.314115   
## Residuals      47 0.0206712 0.0004398                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.6 HE Plot MANCOVA

heplot(mancova_model,
       fill       = TRUE,
       fill.alpha = 0.2,
       col        = c("#8e44ad","#27ae60","#c0392b"),
       main       = "HE Plot — MANCOVA\n(After controlling concavity_mean)")

7.7 MANCOVA Effect Size

wilks_mancova <- summary(mancova_model, test = "Wilks")$stats
eta_mancova_val <- 1 - wilks_mancova["diagnosis", 2]

tibble(
  Metode           = "MANCOVA (Wilks' Lambda — diagnosis)",
  `Wilks' Lambda`  = round(wilks_mancova["diagnosis",2], 5),
  `1-Lambda (η²)`  = round(eta_mancova_val, 4),
  Interpretasi     = case_when(
    eta_mancova_val >= 0.14 ~ "Large Effect (≥ 0.14)",
    eta_mancova_val >= 0.06 ~ "Medium Effect (0.06–0.14)",
    eta_mancova_val >= 0.01 ~ "Small Effect (0.01–0.06)",
    TRUE                    ~ "Very Small Effect"
  )
) %>%
  kable(caption = "MANCOVA Effect Size") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)

MANCOVA Effect Size
Metode	Wilks’ Lambda	1-Lambda (η²)	Interpretasi
MANCOVA (Wilks’ Lambda — diagnosis)	0.89301	0.107	Medium Effect (0.06–0.14)

8 Comparison of MANOVA vs MANCOVA

Purpose of Comparison: To evaluate the impact of adding a covariate (concavity_mean) on the test results.

8.1 Wilks’ Lambda Comparison Table

wl_manova <- summary(manova_model,  test = "Wilks")$stats
wl_mancova <- summary(mancova_model, test = "Wilks")$stats

comp_df <- tibble(
  Metode           = c("MANOVA", "MANCOVA"),
  `Covariate`      = c("None", "concavity_mean"),
  `Wilks' Lambda`  = c(round(wl_manova[1,2], 5),
                       round(wl_mancova["diagnosis",2], 5)),
  `F approx`       = c(round(wl_manova[1,3], 4),
                       round(wl_mancova["diagnosis",3], 4)),
  `p-value`        = c(round(wl_manova[1,6], 6),
                       round(wl_mancova["diagnosis",6], 6)),
  `η² (1-Lambda)`  = c(round(1 - wl_manova[1,2], 4),
                       round(1 - wl_mancova["diagnosis",2], 4)),
  Keputusan        = c(
    ifelse(wl_manova[1,6] < 0.05, "Reject H₀ ✔", "Fail to Reject H₀"),
    ifelse(wl_mancova["diagnosis",6] < 0.05, "Reject H₀ ✔", "Fail to Reject H₀")
  )
)

kable(comp_df, caption = "Comparison of Wilks' Lambda: MANOVA vs MANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white") %>%
  row_spec(1, background = "#eaf4fb") %>%
  row_spec(2, background = "#f5eef8")

Comparison of Wilks’ Lambda: MANOVA vs MANCOVA
Metode	Covariate	Wilks’ Lambda	F approx	p-value	η² (1-Lambda)	Keputusan
MANOVA	None	0.66636	7.6771	0.000291	0.3336	Reject H₀ ✔
MANCOVA	concavity_mean	0.89301	1.7972	0.161295	0.1070	Fail to Reject H₀

8.2 Comparison of All Test Statistics

all_tests_df <- map_df(tests, function(t) {
  sm  <- summary(manova_model,  test = t)$stats
  smc <- summary(mancova_model, test = t)$stats
  tibble(
    `Statistical Test` = t,
    `MANOVA — p`    = round(sm[1,6], 6),
    `MANCOVA — p`   = round(smc["diagnosis",6], 6),
    `Change in p`   = round(smc["diagnosis",6] - sm[1,6], 6),
    `MANOVA — value` = round(sm[1,2], 5),
    `MANCOVA — value`= round(smc["diagnosis",2], 5)
  )
})

kable(all_tests_df, caption = "Comparison of All Test Statistics: MANOVA vs MANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")

Comparison of All Test Statistics: MANOVA vs MANCOVA
Statistical Test	MANOVA — p	MANCOVA — p	Change in p	MANOVA — value	MANCOVA — value
Wilks	0.000291	0.161295	0.161004	0.66636	0.89301
Pillai	0.000291	0.161295	0.161004	0.33364	0.10699
Hotelling-Lawley	0.000291	0.161295	0.161004	0.50068	0.11981
Roy	0.000291	0.161295	0.161004	0.50068	0.11981

8.3 Comparison of ANOVA vs ANCOVA per DV

comp_uni <- map_df(DVS, function(v) {
  # ANOVA
  fit_anova  <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
  s_anova    <- summary(fit_anova)[[1]]
  f_anova    <- s_anova["diagnosis","F value"]
  p_anova    <- s_anova["diagnosis","Pr(>F)"]
  e_anova    <- eta_squared(fit_anova, partial=FALSE)$Eta2[1]

  # ANCOVA
  fit_ancova <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  s_ancova   <- summary(fit_ancova)[[1]]
  f_ancova   <- s_ancova["diagnosis","F value"]
  p_ancova   <- s_ancova["diagnosis","Pr(>F)"]
  e_ancova   <- eta_squared(fit_ancova, partial=TRUE)$Eta2_partial[
                  which(eta_squared(fit_ancova, partial=TRUE)$Parameter == "diagnosis")]

  tibble(
    DV              = v,
    `F (ANOVA)`     = round(f_anova, 4),
    `p (ANOVA)`     = round(p_anova, 6),
    `η² (ANOVA)`    = round(e_anova, 4),
    `F (ANCOVA)`    = round(f_ancova, 4),
    `p (ANCOVA)`    = round(p_ancova, 6),
    `η²p (ANCOVA)`  = round(e_ancova, 4),
    `Δ F`           = round(f_ancova - f_anova, 4),
    `Δ η²`          = round(e_ancova - e_anova, 4)
  )
})

kable(comp_uni,
      caption = "Comparison of ANOVA vs ANCOVA per DV (Covariate Effect Control)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")

Comparison of ANOVA vs ANCOVA per DV (Covariate Effect Control)
DV	F (ANOVA)	p (ANOVA)	η² (ANOVA)	F (ANCOVA)	p (ANCOVA)	η²p (ANCOVA)	Δ F	Δ η²
texture_mean	13.0630	0.000720	0.2139	4.6651	0.035915	0.0903	-8.3979	-0.1236
smoothness_mean	4.0243	0.050503	0.0774	0.2373	0.628403	0.0050	-3.7869	-0.0723
symmetry_mean	2.5132	0.119460	0.0498	1.0354	0.314115	0.0216	-1.4779	-0.0282

8.4 Visualization of Effect Size Comparison

effect_comp <- bind_rows(
  # ANOVA
  map_df(DVS, function(v) {
    fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
    tibble(DV = v, Metode = "ANOVA",
           eta2 = eta_squared(fit, partial=FALSE)$Eta2[1])
  }),
  # ANCOVA
  map_df(DVS, function(v) {
    fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
    e   <- eta_squared(fit, partial=TRUE)
    tibble(DV = v, Metode = "ANCOVA",
           eta2 = e$Eta2_partial[e$Parameter == "diagnosis"])
  })
)

ggplot(effect_comp, aes(x = DV, y = eta2, fill = Metode)) +
  geom_bar(stat = "identity", position = "dodge", alpha = 0.85, width = 0.6) +
  geom_text(aes(label = round(eta2, 3)),
            position = position_dodge(0.6), vjust = -0.4, size = 3.5) +
  scale_fill_manual(values = c("ANOVA" = "#2e86c1", "ANCOVA" = "#8e44ad")) +
  labs(title    = "Effect Size Comparison: ANOVA vs ANCOVA",
       subtitle = "η² (ANOVA) vs Partial η² (ANCOVA) — higher values indicate larger effects",
       x = "Dependent Variable", y = "Effect Size (η²)", fill = "Method") +
  theme(legend.position = "bottom") +
  ylim(0, max(effect_comp$eta2) * 1.25)

# Visualization of Wilks' Lambda changes
wilks_comp <- tibble(
  Metode         = c("MANOVA", "MANCOVA"),
  `Wilks Lambda` = c(wl_manova[1,2], wl_mancova["diagnosis",2]),
  `p-value`      = c(wl_manova[1,6], wl_mancova["diagnosis",6])
)

ggplot(wilks_comp, aes(x = Metode, y = `Wilks Lambda`, fill = Metode)) +
  geom_bar(stat = "identity", alpha = 0.85, width = 0.5) +
  geom_text(aes(label = paste0("Λ=", round(`Wilks Lambda`,4),
                               "\np=", round(`p-value`,4))),
            vjust = -0.4, size = 4, fontface = "bold") +
  scale_fill_manual(values = c("MANOVA" = "#2e86c1", "MANCOVA" = "#8e44ad")) +
  labs(title    = "Comparison of Wilks' Lambda: MANOVA vs MANCOVA",
       subtitle = "Lower Lambda values = stronger diagnosis effect",
       y = "Wilks' Lambda", x = NULL) +
  theme(legend.position = "none") +
  ylim(0, 1)

8.5 Interpretation of Comparison

1. Wilks’ Lambda - MANOVA: Λ = 0.66636 - MANCOVA: Λ = 0.89301 - MANCOVA Lambda is larger or equal → controlling the covariate does not improve detection of the diagnosis effect.

2. p-value - MANOVA: p = 2.91^{-4} - MANCOVA: p = 0.161295 - MANCOVA p-value is larger → the diagnosis effect weakens after covariate adjustment, indicating the covariate is related to the IV.

3. Covariate Significance Covariate concavity_mean has p = 10^{-5} — significant → the covariate does affect the DVs and is appropriate to include in the model. MANCOVA is more appropriate than MANOVA.

4. Main Conclusion MANOVA is significant but MANCOVA is not → group differences are partly driven by the covariate rather than a pure diagnosis effect.

9 Final Summary

9.1 Summary Table of All Analyses

final_summary <- tibble(
  Method      = c("ANOVA — texture_mean",
                  "ANOVA — smoothness_mean",
                  "ANOVA — symmetry_mean",
                  "MANOVA (Wilks)",
                  "ANCOVA — texture_mean",
                  "ANCOVA — smoothness_mean",
                  "ANCOVA — symmetry_mean",
                  "MANCOVA — diagnosis (Wilks)"),
  Covariate   = c(rep("None", 4), rep("concavity_mean", 4)),
  `p-value`   = c(
    round(anova_results$`p-value`, 5),
    round(wl_manova[1,6], 5),
    round(ancova_results$`p (diagnosis)`, 5),
    round(wl_mancova["diagnosis",6], 5)
  ),
  Decision   = c(
    ifelse(anova_results$`p-value` < 0.05, "Reject H₀ ✔", "Fail to Reject H₀ ✘"),
    ifelse(wl_manova[1,6] < 0.05,          "Reject H₀ ✔", "Fail to Reject H₀ ✘"),
    ifelse(ancova_results$`p (diagnosis)` < 0.05, "Reject H₀ ✔", "Fail to Reject H₀ ✘"),
    ifelse(wl_mancova["diagnosis",6] < 0.05, "Reject H₀ ✔", "Fail to Reject H₀ ✘")
  )
)

kable(final_summary, caption = "Summary of All Analytical Results") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                full_width = TRUE) %>%
  column_spec(4, bold = TRUE,
              color = ifelse(final_summary$Decision == "Reject H₀ ✔",
                             "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white") %>%
  row_spec(4, background = "#eaf4fb") %>%
  row_spec(8, background = "#f5eef8")

Summary of All Analytical Results
Method	Covariate	p-value	Decision
ANOVA — texture_mean	None	0.00072	Reject H₀ ✔
ANOVA — smoothness_mean	None	0.05050	Fail to Reject H₀ ✘
ANOVA — symmetry_mean	None	0.11946	Fail to Reject H₀ ✘
MANOVA (Wilks)	None	0.00029	Reject H₀ ✔
ANCOVA — texture_mean	concavity_mean	0.03592	Reject H₀ ✔
ANCOVA — smoothness_mean	concavity_mean	0.62840	Fail to Reject H₀ ✘
ANCOVA — symmetry_mean	concavity_mean	0.31411	Fail to Reject H₀ ✘
MANCOVA — diagnosis (Wilks)	concavity_mean	0.16130	Fail to Reject H₀ ✘

9.2 Narrative Conclusion

9.2.1 Complete Conclusion

1. Assumption Testing
All five MANCOVA assumptions were satisfied in a balanced sample of 50 observations (25 Malignant, 25 Benign): dependence among DVs, homogeneity of covariance matrices, multivariate normality, linearity between covariate and DVs, and independence of observations.

2. ANOVA
Univariate tests showed that texture_mean was the only DV with a significant difference between Malignant and Benign (p < 0.05). smoothness_mean and symmetry_mean did not show significant differences when tested individually.

3. MANOVA
Multivariately, the combined set of three DVs showed a significant difference between the two groups (Wilks’ Λ = 0.6664, p = 2.91^{-4}). This confirms that although not all individual DVs are significant, their combined multivariate structure can distinguish Malignant from Benign.

4. ANCOVA
After controlling for concavity_mean, the univariate results show that 1 out of 3 DVs are significant. Adjusting for the covariate improves estimation accuracy through adjusted means.

5. MANCOVA
At the multivariate level with covariate adjustment, the diagnosis effect yields Wilks’ Λ = 0.893, p = 0.161295.
Group differences are no longer significant after controlling for concavity_mean — indicating the covariate partially explains the group differences.
The covariate concavity_mean is itself significant (p = 10^{-5}), confirming that MANCOVA is more appropriate than MANOVA.

6. MANOVA vs MANCOVA Comparison
Including the covariate changes Wilks’ Lambda from 0.6664 (MANOVA) to 0.893 (MANCOVA), indicating that concavity_mean contributes to explaining variation in the DVs. MANCOVA provides a more refined understanding of the intrinsic group differences after removing the effect of nuclear concavity.

This report was prepared for the Multivariate Analysis course assignment.
Dataset: Wisconsin Breast Cancer Dataset — UCI Machine Learning Repository.
Analysis: Assumption Testing · ANOVA · MANOVA · ANCOVA · MANCOVA · Covariate Effect Comparison

Multivariate Analysis — Wisconsin Breast Cancer Dataset

Assumption Testing · ANOVA · MANOVA · ANCOVA · MANCOVA · Covariate Effect Comparison

Halilatunnisa (130) & Faishal Muflih Irfanu Tsaqib (170)

20 April 2026