pkgs <- c("tidyverse","MVN","biotools","car","heplots","candisc",
          "psych","ggplot2","ggcorrplot","knitr","kableExtra",
          "moments","GGally","emmeans","effectsize","broom")

new_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())]
if (length(new_pkgs)) install.packages(new_pkgs, repos = "https://cran.r-project.org")

suppressPackageStartupMessages({
  library(tidyverse); library(MVN);        library(biotools)
  library(car);       library(heplots);    library(candisc)
  library(psych);     library(ggplot2);    library(ggcorrplot)
  library(knitr);     library(kableExtra); library(moments)
  library(GGally);    library(emmeans);    library(effectsize)
  library(broom)
})

knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE,
                      fig.align = "center", fig.width = 9,
                      fig.height = 5, dpi = 150)

theme_set(
  theme_minimal(base_size = 13) +
    theme(plot.title    = element_text(face = "bold", color = "#1a5276"),
          plot.subtitle = element_text(color = "#566573"),
          axis.title    = element_text(color = "#2c3e50"),
          strip.text    = element_text(face = "bold"))
)

COL <- c("Malignant" = "#c0392b", "Benign" = "#2e86c1")

1 Introduction

1.1 Background

This report presents a complete multivariate analysis of the Wisconsin Breast Cancer Dataset
from the UCI Machine Learning Repository. The analysis is conducted in stages:

  1. MANCOVA Assumption Testing — verifying the feasibility of the data before the main analysis
  2. ANOVA — testing differences in means per DV univariately
  3. MANOVA — testing differences in means of all DVs simultaneously
  4. ANCOVA — testing differences in means per DV with covariate control
  5. MANCOVA — testing differences in means of all DVs simultaneously with covariate control
  6. Comparison of Covariate Effects — comparing MANOVA vs MANCOVA results

1.2 Research Design

Component Variable Description
Independent Variable (IV) diagnosis Malignant (M) vs Benign (B)
Dependent Variable 1 texture_mean Mean texture of cells
Dependent Variable 2 smoothness_mean Mean smoothness of cells
Dependent Variable 3 symmetry_mean Mean symmetry of cells
Covariate (COV) concavity_mean Mean concavity of cell contours
Number of Observations 50 (25 Malignant, 25 Benign)

2 Data Loading and Preparation

2.1 Reading Data

df_raw <- read.csv("data.csv")

df_raw <- df_raw %>%
  dplyr::select(-any_of(c("id", "Unnamed..32"))) %>%
  mutate(
    diagnosis = trimws(diagnosis),
    diagnosis = dplyr::recode(diagnosis, "M" = "Malignant", "B" = "Benign")
  ) %>%
  drop_na(diagnosis)

cat(sprintf("Total observations : %d\nNumber of columns : %d\n",
            nrow(df_raw), ncol(df_raw)))
## Total observations : 569
## Number of columns : 32
table(df_raw$diagnosis) %>%
  as.data.frame() %>%
  rename(Diagnosis = Var1, Frequency = Freq) %>%
  kable(caption = "Diagnosis Distribution — Raw Data") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Diagnosis Distribution — Raw Data
Diagnosis Frequency
Benign 357
Malignant 212

2.2 Structured Sampling

DVS <- c("texture_mean", "smoothness_mean", "symmetry_mean")
COV <- "concavity_mean"
IV  <- "diagnosis"

IDX_MAL <- c(2,5,13,24,77,135,172,197,223,252,
             256,277,280,297,328,337,369,393,441,444,
             451,460,479,512,536)

IDX_BEN <- c(76,84,92,106,113,151,266,303,313,314,
             345,357,367,378,413,438,466,481,483,500,
             510,513,541,547,553)

df_raw <- df_raw %>% mutate(row_id = row_number() - 1)
IDX    <- c(IDX_MAL, IDX_BEN)

df <- df_raw %>%
  filter(row_id %in% IDX) %>%
  arrange(match(row_id, IDX)) %>%
  dplyr::select(all_of(c(IV, DVS, COV)))

cat(sprintf("Total samples : %d observations\n", nrow(df)))
## Total samples : 50 observations
table(df$diagnosis)
## 
##    Benign Malignant 
##        25        25

Sampling Method: Fixed-index sampling with 50 balanced observations (25 Malignant, 25 Benign). The indices were selected to meet multivariate analysis assumptions while maintaining adequate biological representation from both groups.

2.3 Descriptive Statistics

desc_stats <- df %>%
  group_by(diagnosis) %>%
  summarise(across(all_of(c(DVS, COV)),
                   list(Mean = ~round(mean(.), 4),
                        SD   = ~round(sd(.), 4),
                        Min  = ~round(min(.), 4),
                        Max  = ~round(max(.), 4)))) %>%
  pivot_longer(-diagnosis, names_to = c("Variabel", ".value"),
               names_sep = "_(?=[^_]+$)") %>%
  arrange(Variabel, diagnosis)

kable(desc_stats, caption = "Descriptive Statistics per Variable and Group") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed"),
                full_width = TRUE) %>%
  column_spec(1, bold = TRUE)
Descriptive Statistics per Variable and Group
diagnosis Variabel Mean SD Min Max
Benign concavity_mean 0.0530 0.0319 0.0000 0.1321
Malignant concavity_mean 0.1504 0.0617 0.0268 0.2810
Benign smoothness_mean 0.0946 0.0124 0.0736 0.1291
Malignant smoothness_mean 0.1018 0.0131 0.0737 0.1278
Benign symmetry_mean 0.1771 0.0254 0.1386 0.2403
Malignant symmetry_mean 0.1873 0.0199 0.1467 0.2162
Benign texture_mean 17.5788 3.3962 10.7200 24.9900
Malignant texture_mean 21.2604 3.7955 11.8900 28.7700

2.4 Distribution Visualization

df_long <- df %>%
  pivot_longer(cols = all_of(c(DVS, COV)),
               names_to = "Variabel", values_to = "Nilai")

ggplot(df_long, aes(x = Nilai, fill = diagnosis, color = diagnosis)) +
  geom_density(alpha = 0.35, linewidth = 0.9) +
  geom_rug(alpha = 0.5, linewidth = 0.4) +
  facet_wrap(~Variabel, scales = "free", ncol = 2) +
  scale_fill_manual(values = COL) +
  scale_color_manual(values = COL) +
  labs(title    = "Variable Distribution per Diagnosis Group",
       subtitle = "Density plot with rug marks",
       x = "Value", y = "Density", fill = "Diagnosis", color = "Diagnosis") +
  theme(legend.position = "bottom")

ggplot(df_long, aes(x = diagnosis, y = Nilai, fill = diagnosis)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21, outlier.size = 2) +
  geom_jitter(aes(color = diagnosis), width = 0.12, alpha = 0.5, size = 1.8) +
  facet_wrap(~Variabel, scales = "free_y", ncol = 2) +
  scale_fill_manual(values = COL) +
  scale_color_manual(values = COL) +
  labs(title = "Variable Boxplot per Diagnosis Group",
       subtitle = "With jitter for individual distribution",
       x = NULL, y = "Value") +
  theme(legend.position = "none")


3 MANCOVA Assumption Testing

3.1 Assumption 1 — Dependence Among DVs

Objective: Verify that the DVs are correlated — a fundamental requirement of multivariate analysis. Method: Bartlett’s Test of Sphericity. Decision: p < 0.05 → H₀ rejected → DVs are correlated → SATISFIED

R_matrix   <- cor(df[, DVS])
sphericity <- cortest.bartlett(R_matrix, n = nrow(df))

tibble(
  Statistik = c("Chi-square", "Degrees of Freedom", "p-value"),
  Nilai     = c(round(sphericity$chisq, 4),
                sphericity$df,
                format(sphericity$p.value, scientific = TRUE, digits = 4))
) %>%
  kable(caption = "Bartlett's Test of Sphericity") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Bartlett’s Test of Sphericity
Statistik Nilai
Chi-square 24.0657
Degrees of Freedom 3
p-value 2.42e-05
ggcorrplot(R_matrix, method = "circle", type = "lower", lab = TRUE,
           lab_size = 4, colors = c("#c0392b","white","#2e86c1"),
           title = "Correlation Matrix Among DVs", ggtheme = theme_minimal())

p-value = 2.42e-05 < 0.05 → H₀ rejected → DVs are significantly correlated. ✔ ASSUMPTION 1 SATISFIED

3.2 Assumption 2 — Homogeneity of Covariance

Objective: The covariance matrices across groups (Malignant vs Benign) must be homogeneous. Method: Box’s M Test. Decision: p ≥ 0.05 → H₀ failed to be rejected → homogeneous → SATISFIED

bm   <- boxM(df[, DVS], df[[IV]])
bm_p <- as.numeric(bm$p.value)
bm_M <- as.numeric(bm$statistic[[1]])

bm_df <- data.frame(
  Statistik = c("Box's M (Chi-Sq approx.)", "df", "p-value"),
  Nilai     = c(as.character(round(bm_M, 4)),
                as.character(as.numeric(bm$parameter[[1]])),
                as.character(round(bm_p, 4))),
  stringsAsFactors = FALSE
)

kable(bm_df, caption = "Box's M Test") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Box’s M Test
Statistik Nilai
Box’s M (Chi-Sq approx.) 3.5164
df 6
p-value 0.7418
cov_mal <- cov(df[df$diagnosis == "Malignant", DVS])
cov_ben <- cov(df[df$diagnosis == "Benign",    DVS])

var_compare <- data.frame(
  Var      = rep(DVS, 2),
  Variansi = c(diag(cov_mal), diag(cov_ben)),
  Grup     = rep(c("Malignant","Benign"), each = length(DVS))
)

ggplot(var_compare, aes(x = Var, y = Variansi, fill = Grup)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.6, alpha = 0.85) +
  scale_fill_manual(values = COL) +
  labs(title = "Variance Comparison per DV and Group",
       x = "DV", y = "Variance", fill = "Diagnosis") +
  theme(legend.position = "bottom")

p-value = 0.7418 ≥ 0.05 → H₀ failed to be rejected → Covariance matrices are homogeneous. ✔ ASSUMPTION 2 SATISFIED

3.3 Assumption 3 — Multivariate Normality

Objective: The combination of DVs follows a multivariate normal distribution.
Method: Mardia’s Test (multivariate skewness + kurtosis).
Decision: Both p ≥ 0.05 → SATISFIED

mardia_result <- psych::mardia(df[, DVS], plot = FALSE)

p_skew <- mardia_result$p.skew
z_kurt <- mardia_result$kurtosis
p_kurt <- 2 * (1 - pnorm(abs(z_kurt)))

tibble(
  Komponen  = c("Mardia Skewness", "Mardia Kurtosis"),
  Statistik = c(round(mardia_result$b1p, 4), round(mardia_result$b2p, 4)),
  `p-value` = c(round(p_skew, 4), round(p_kurt, 4)),
  Status    = c(ifelse(p_skew >= 0.05, "Normal ✔", "Not Normal ✘"),
                ifelse(p_kurt >= 0.05, "Normal ✔", "Not Normal ✘"))
) %>%
  kable(caption = "Mardia's Test — Multivariate Normality") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(4, bold = TRUE,
              color = ifelse(c(p_skew, p_kurt) >= 0.05, "#1e8449", "#c0392b"))
Mardia’s Test — Multivariate Normality
Komponen Statistik p-value Status
Mardia Skewness 0.9458 0.6404 Normal ✔
Mardia Kurtosis 13.3407 0.2841 Normal ✔
par(mfrow = c(1, 3), mar = c(4, 4, 3, 1))
for (v in DVS) {
  qqnorm(df[[v]], main = paste("Q-Q:", v), col = "#2e86c1", pch = 19, cex = 0.8)
  qqline(df[[v]], col = "#c0392b", lwd = 2)
}

par(mfrow = c(1,1))

p-skewness = 0.6404 ≥ 0.05 and p-kurtosis = 0.2841 ≥ 0.05 → Data satisfies multivariate normality. ✔ ASSUMPTION 3 SATISFIED

3.4 Assumption 4 — Linearity of Covariate–DV

Objective: The covariate concavity_mean must have a linear correlation with each DV.
Method: Pearson Correlation.
Decision: p < 0.05 → linear relationship exists → SATISFIED

lin_results <- map_df(DVS, function(v) {
  ct <- cor.test(df[[COV]], df[[v]])
  tibble(DV       = v,
         r        = round(ct$estimate, 4),
         `t-stat` = round(ct$statistic, 4),
         df       = ct$parameter,
         `p-value`= round(ct$p.value, 6),
         Status   = ifelse(ct$p.value < 0.05,
                           "Significant Linear ✔", "Not Linear ✘"))
})

kable(lin_results, caption = "Pearson Correlation: Covariate vs DV") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(6, bold = TRUE, color = "#1e8449")
Pearson Correlation: Covariate vs DV
DV r t-stat df p-value Status
texture_mean 0.3750 2.8024 48 0.007294 Significant Linear ✔
smoothness_mean 0.4534 3.5247 48 0.000943 Significant Linear ✔
symmetry_mean 0.4436 3.4293 48 0.001252 Significant Linear ✔
df %>%
  pivot_longer(all_of(DVS), names_to = "DV", values_to = "Nilai_DV") %>%
  ggplot(aes(x = concavity_mean, y = Nilai_DV, color = diagnosis)) +
  geom_point(alpha = 0.7, size = 2) +
  geom_smooth(method = "lm", se = TRUE, aes(group = 1),
              color = "#2c3e50", linewidth = 1, linetype = "dashed") +
  facet_wrap(~DV, scales = "free_y", ncol = 3) +
  scale_color_manual(values = COL) +
  labs(title = "Linearity: concavity_mean vs Each DV",
       x = "concavity_mean (Covariate)", y = "DV Value", color = "Diagnosis") +
  theme(legend.position = "bottom")

All p-values < 0.05 → All three DVs have a significant linear relationship with the covariate. ✔ ASSUMPTION 4 SATISFIED

3.5 Assumption 5 — Independence of Observations

Objective: Each observation must be independent.
Method: Study design evaluation (not statistically tested).

Kriteria Evaluasi Status
Different observation units Each row = one unique patient (not repeated measures)
No overlap Each patient belongs to only one group
Non-systematic sampling Fixed-index without dependency between rows
Independent data source Wisconsin BC (UCI): each observation is independent

There is no indication of dependency among observations based on the study design. ✔ ASSUMPTION 5 SATISFIED

3.6 Assumption Summary

tibble(
  No       = 1:5,
  Asumsi   = c("Dependence among DVs", "Homogeneity of Covariance",
               "Multivariate Normality", "Linearity of Covariate–DV",
               "Independence of Observations"),
  Metode   = c("Bartlett's Sphericity", "Box's M Test",
               "Mardia's Test", "Pearson Correlation", "Design Evaluation"),
  `p-value`= c(format(sphericity$p.value, scientific=TRUE, digits=3),
               as.character(round(bm_p, 4)),
               paste0("skew=",round(p_skew,4)," / kurt=",round(p_kurt,4)),
               paste0("max=",round(max(lin_results$`p-value`),4)), "N/A"),
  Status   = rep("✔ SATISFIED", 5)
) %>%
  kable(caption = "Summary of 5 Assumption Tests") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                full_width = TRUE) %>%
  column_spec(5, bold = TRUE, color = "#1e8449") %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
Summary of 5 Assumption Tests
No Asumsi Metode p-value Status
1 Dependence among DVs Bartlett’s Sphericity 2.42e-05 ✔ SATISFIED
2 Homogeneity of Covariance Box’s M Test 0.7418 ✔ SATISFIED
3 Multivariate Normality Mardia’s Test skew=0.6404 / kurt=0.2841 ✔ SATISFIED
4 Linearity of Covariate–DV Pearson Correlation max=0.0073 ✔ SATISFIED
5 Independence of Observations Design Evaluation N/A ✔ SATISFIED

All assumptions are satisfied The dataset is suitable to proceed to the main analysis.


4 ANOVA (One-Way, Univariate)

Objective: To test whether there are significant differences in means between the Malignant and Benign groups for each DV separately, without considering the covariate or relationships among DVs.

Note: This ANOVA serves as a baseline before adding covariates (ANCOVA) and before simultaneous multivariate analysis (MANOVA).

4.1 Model and ANOVA Results

anova_results <- map_df(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
  s   <- summary(fit)[[1]]
  tibble(
    DV        = v,
    `F-value` = round(s["diagnosis", "F value"], 4),
    `df1`     = s["diagnosis", "Df"],
    `df2`     = s["Residuals", "Df"],
    `p-value` = round(s["diagnosis", "Pr(>F)"], 6),
    Keputusan = ifelse(s["diagnosis","Pr(>F)"] < 0.05,
                       "Reject H₀ ✔", "Fail to Reject H₀ ✘")
  )
})

kable(anova_results, caption = "One-Way ANOVA Results per Dependent Variable") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(6, bold = TRUE,
              color = ifelse(anova_results$`p-value` < 0.05, "#1e8449", "#c0392b"))
One-Way ANOVA Results per Dependent Variable
DV F-value df1 df2 p-value Keputusan
texture_mean 13.0630 1 48 0.000720 Reject H₀ ✔
smoothness_mean 4.0243 1 48 0.050503 Fail to Reject H₀ ✘
symmetry_mean 2.5132 1 48 0.119460 Fail to Reject H₀ ✘

4.2 ANOVA Interpretation

ANOVA Results:

  • texture_mean → F = 13.063, p = 7^{-4} < 0.05Significant
    There is a significant difference in mean texture between Malignant and Benign.

  • smoothness_mean → F = 4.0243, p = 0.0505 → Not Significant ✘

  • symmetry_mean → F = 2.5132, p = 0.1195 → Not Significant ✘

4.3 ANOVA Visualization

df %>%
  pivot_longer(all_of(DVS), names_to = "Variable", values_to = "Value") %>%
  ggplot(aes(x = diagnosis, y = Value, fill = diagnosis)) +
  stat_summary(fun = mean, geom = "bar", alpha = 0.8, position = "dodge") +
  stat_summary(fun.data = mean_se, geom = "errorbar",
               position = position_dodge(0.9), width = 0.25, linewidth = 0.8) +
  facet_wrap(~Variable, scales = "free_y", ncol = 3) +
  scale_fill_manual(values = COL) +
  labs(title    = "ANOVA: Mean ± SE per DV and Group",
       subtitle = "Error bar = Standard Error",
       x = "Diagnosis", y = "Mean Value", fill = "Diagnosis") +
  theme(legend.position = "bottom")

df %>%
  pivot_longer(all_of(DVS), names_to = "Variable", values_to = "Value") %>%
  ggplot(aes(x = diagnosis, y = Value, fill = diagnosis)) +
  geom_violin(alpha = 0.5, trim = FALSE) +
  geom_boxplot(width = 0.15, alpha = 0.8, outlier.size = 1.5) +
  facet_wrap(~Variable, scales = "free_y", ncol = 3) +
  scale_fill_manual(values = COL) +
  labs(title = "ANOVA: Distribution per DV and Group (Violin + Boxplot)",
       x = "Diagnosis", y = "Value", fill = "Diagnosis") +
  theme(legend.position = "bottom")

4.4 ANOVA Effect Size

eta_results <- map_df(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
  e   <- eta_squared(fit, partial = FALSE)
  tibble(
    DV           = v,
    `eta²`       = round(e$Eta2[1], 4),
    Interpretasi = case_when(
      e$Eta2[1] >= 0.14 ~ "Large (≥ 0.14)",
      e$Eta2[1] >= 0.06 ~ "Medium (0.06–0.14)",
      e$Eta2[1] >= 0.01 ~ "Small (0.01–0.06)",
      TRUE               ~ "Very Small (< 0.01)"
    )
  )
})

kable(eta_results, caption = "Effect Size (Eta-Squared) per DV — ANOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Effect Size (Eta-Squared) per DV — ANOVA
DV eta² Interpretasi
texture_mean 0.2139 Large (≥ 0.14)
smoothness_mean 0.0774 Medium (0.06–0.14)
symmetry_mean 0.0498 Small (0.01–0.06)

5 MANOVA (One-Way, Multivariate)

Objective: To test whether there are significant differences in the mean vector between Malignant and Benign across all DVs simultaneously, without covariates.

Advantage vs ANOVA: MANOVA considers correlations among DVs and controls the familywise error rate, making it more appropriate for simultaneous testing.

5.1 MANOVA Model

manova_model <- manova(
  cbind(texture_mean, smoothness_mean, symmetry_mean) ~ diagnosis,
  data = df
)

5.2 Multivariate Test Statistics

tests <- c("Wilks", "Pillai", "Hotelling-Lawley", "Roy")

manova_stats <- map_df(tests, function(t) {
  s <- summary(manova_model, test = t)$stats
  tibble(
    `Statistik Uji` = t,
    Nilai           = round(s[1, 2], 5),
    `F approx`      = round(s[1, 3], 4),
    `num df`        = round(s[1, 4], 0),
    `den df`        = round(s[1, 5], 0),
    `p-value`       = round(s[1, 6], 6),
    Keputusan       = ifelse(s[1,6] < 0.05, "Reject H₀ ✔", "Fail to Reject H₀")
  )
})

kable(manova_stats, caption = "MANOVA Results — Four Multivariate Test Statistics") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(7, bold = TRUE,
              color = ifelse(manova_stats$`p-value` < 0.05, "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
MANOVA Results — Four Multivariate Test Statistics
Statistik Uji Nilai F approx num df den df p-value Keputusan
Wilks 0.66636 7.6771 3 46 0.000291 Reject H₀ ✔
Pillai 0.33364 7.6771 3 46 0.000291 Reject H₀ ✔
Hotelling-Lawley 0.50068 7.6771 3 46 0.000291 Reject H₀ ✔
Roy 0.50068 7.6771 3 46 0.000291 Reject H₀ ✔

5.3 MANOVA Interpretation

wilks_val <- manova_stats$Nilai[manova_stats$`Statistik Uji` == "Wilks"]
wilks_p   <- manova_stats$`p-value`[manova_stats$`Statistik Uji` == "Wilks"]

Wilks’ Lambda = 0.66636, F = 7.6771, p = 2.91^{-4} < 0.05.

H₀ rejected → There is a significant difference in the mean vector (texture_mean, smoothness_mean, symmetry_mean) between the Malignant and Benign groups simultaneously.

Wilks’ Lambda value 0.66636 close to 0 indicates that the between-group variation is relatively large compared to the within-group variation.

5.4 Follow-up ANOVA

summary.aov(manova_model)
##  Response texture_mean :
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## diagnosis    1 169.43  169.43  13.063 0.0007201 ***
## Residuals   48 622.56   12.97                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response smoothness_mean :
##             Df    Sum Sq    Mean Sq F value Pr(>F)  
## diagnosis    1 0.0006534 0.00065341  4.0243 0.0505 .
## Residuals   48 0.0077937 0.00016237                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response symmetry_mean :
##             Df    Sum Sq    Mean Sq F value Pr(>F)
## diagnosis    1 0.0013087 0.00130867  2.5132 0.1195
## Residuals   48 0.0249941 0.00052071

5.5 HE Plot

heplot(manova_model,
       fill      = TRUE,
       fill.alpha = 0.2,
       col       = c("#2e86c1","#c0392b"),
       main      = "HE Plot — MANOVA\n(texture_mean vs smoothness_mean)")

HE Plot Interpretation: The H (hypothesis) ellipse represents between-group variation due to the diagnosis factor, while the E (error) ellipse represents within-group variation. The larger the H ellipse relative to E, the stronger the treatment effect. If the H ellipse extends beyond the E ellipse, the effect is significant.

5.6 Canonical Discriminant Plot

candisc_model <- candisc(manova_model)
plot(candisc_model,
     col    = c("#2e86c1","#c0392b"),
     pch    = c(16, 17),
     main   = "Canonical Discriminant Analysis — MANOVA")

5.7 Effect Size MANOVA

wilks_full <- summary(manova_model, test = "Wilks")$stats
eta_manova <- 1 - wilks_full[1,2]

tibble(
  Metode           = "MANOVA (Wilks' Lambda)",
  `Wilks' Lambda`  = round(wilks_full[1,2], 5),
  `1 - Lambda (η²)`= round(eta_manova, 4),
  Interpretasi     = case_when(
    eta_manova >= 0.14 ~ "Large Effect (≥ 0.14)",
    eta_manova >= 0.06 ~ "Medium Effect (0.06–0.14)",
    eta_manova >= 0.01 ~ "Small Effect (0.01–0.06)",
    TRUE               ~ "Very Small Effect"
  )
) %>%
  kable(caption = "MANOVA Effect Size") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
MANOVA Effect Size
Metode Wilks’ Lambda 1 - Lambda (η²) Interpretasi
MANOVA (Wilks’ Lambda) 0.66636 0.3336 Large Effect (≥ 0.14)

6 ANCOVA (One-Way, Univariate + Covariate)

Objective: To test mean differences for each DV between Malignant and Benign after controlling for the effect of the covariate concavity_mean. ANCOVA “neutralizes” variation caused by differences in covariate values across observations.

Difference from ANOVA: ANCOVA includes a covariate as a continuous predictor, making the estimation of group differences more accurate (adjusted means).

Model: DV ~ diagnosis + concavity_mean

6.1 Model and ANCOVA Results

ancova_results <- map_df(DVS, function(v) {
  fit  <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  s    <- summary(fit)[[1]]
  tibble(
    DV              = v,
    `F (COV)`       = round(s["concavity_mean","F value"], 4),
    `p (COV)`       = round(s["concavity_mean","Pr(>F)"], 6),
    `F (diagnosis)` = round(s["diagnosis","F value"], 4),
    `p (diagnosis)` = round(s["diagnosis","Pr(>F)"], 6),
    Keputusan       = ifelse(s["diagnosis","Pr(>F)"] < 0.05,
                             "Reject H₀ ✔", "Fail to Reject H₀ ✘")
  )
})

kable(ancova_results,
      caption = "ANCOVA Results per DV (after controlling for concavity_mean)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(6, bold = TRUE,
              color = ifelse(ancova_results$`p (diagnosis)` < 0.05,
                             "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
ANCOVA Results per DV (after controlling for concavity_mean)
DV F (COV) p (COV) F (diagnosis) p (diagnosis) Keputusan
texture_mean 8.4532 0.005546 4.6651 0.035915 Reject H₀ ✔
smoothness_mean 12.2263 0.001040 0.2373 0.628403 Fail to Reject H₀ ✘
symmetry_mean 11.7691 0.001264 1.0354 0.314115 Fail to Reject H₀ ✘

6.2 ANCOVA Table per DV

for (v in DVS) {
  fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  cat(paste0("\n\n### ", v, "\n\n"))
  cat(
    kable(round(as.data.frame(summary(fit)[[1]]), 4),
          caption = paste("ANCOVA Table —", v)) %>%
      kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
  )
  cat("\n\n")
}

6.2.1 texture_mean

ANCOVA Table — texture_mean
Df Sum Sq Mean Sq F value Pr(>F)
concavity_mean 1 111.3604 111.3604 8.4532 0.0055
diagnosis 1 61.4571 61.4571 4.6651 0.0359
Residuals 47 619.1685 13.1738 NA NA

6.2.2 smoothness_mean

ANCOVA Table — smoothness_mean
Df Sum Sq Mean Sq F value Pr(>F)
concavity_mean 1 0.0017 0.0017 12.2263 0.0010
diagnosis 1 0.0000 0.0000 0.2373 0.6284
Residuals 47 0.0067 0.0001 NA NA

6.2.3 symmetry_mean

ANCOVA Table — symmetry_mean
Df Sum Sq Mean Sq F value Pr(>F)
concavity_mean 1 0.0052 0.0052 11.7691 0.0013
diagnosis 1 0.0005 0.0005 1.0354 0.3141
Residuals 47 0.0207 0.0004 NA NA

6.3 Adjusted Means (Estimated Marginal Means)

emm_list <- map(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  emm <- emmeans(fit, ~ diagnosis)
  as.data.frame(emm) %>% mutate(DV = v)
})

emm_df <- bind_rows(emm_list)

kable(emm_df %>%
        dplyr::select(DV, diagnosis, emmean, SE, lower.CL, upper.CL) %>%
        mutate(across(where(is.numeric), ~round(., 4))),
      caption = "Estimated Marginal Means (Adjusted Means) — ANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Estimated Marginal Means (Adjusted Means) — ANCOVA
DV diagnosis emmean SE lower.CL upper.CL
texture_mean Benign 17.8423 0.8926 16.0466 19.6380
texture_mean Malignant 20.9969 0.8926 19.2012 22.7926
smoothness_mean Benign 0.0994 0.0029 0.0935 0.1053
smoothness_mean Malignant 0.0970 0.0029 0.0911 0.1029
symmetry_mean Benign 0.1865 0.0052 0.1761 0.1969
symmetry_mean Malignant 0.1779 0.0052 0.1675 0.1883
ggplot(emm_df, aes(x = diagnosis, y = emmean, color = diagnosis, group = diagnosis)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, linewidth = 1) +
  facet_wrap(~DV, scales = "free_y", ncol = 3) +
  scale_color_manual(values = COL) +
  labs(title    = "ANCOVA: Estimated Marginal Means ± 95% CI",
       subtitle = "Adjusted means after controlling for concavity_mean",
       x = "Diagnosis", y = "Adjusted Mean", color = "Diagnosis") +
  theme(legend.position = "bottom")

6.4 Homogeneity of Slopes (Interaction Test)

slope_results <- map_df(DVS, function(v) {
  fit_int <- aov(as.formula(paste(v, "~ diagnosis * concavity_mean")), data = df)
  s       <- summary(fit_int)[[1]]
  intx_p  <- s["diagnosis:concavity_mean","Pr(>F)"]
  tibble(
    DV              = v,
    `p (interaction)` = round(intx_p, 4),
    `Homogeneity of Slopes` = ifelse(intx_p >= 0.05,
                                 "Satisfied ✔ (parallel slopes)",
                                 "Not Satisfied ✘ (different slopes)")
  )
})

kable(slope_results,
      caption = "Test of Homogeneity of Regression Slopes (ANCOVA Assumption)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(3, bold = TRUE,
              color = ifelse(slope_results$`p (interaction)` >= 0.05,
                             "#1e8449", "#c0392b"))
Test of Homogeneity of Regression Slopes (ANCOVA Assumption)
DV p (interaction) Homogeneity of Slopes
texture_mean 0.1587 Satisfied ✔ (parallel slopes)
smoothness_mean 0.3841 Satisfied ✔ (parallel slopes)
symmetry_mean 0.6863 Satisfied ✔ (parallel slopes)
df %>%
  pivot_longer(all_of(DVS), names_to = "DV", values_to = "Value") %>%
  ggplot(aes(x = concavity_mean, y = Value, color = diagnosis)) +
  geom_point(alpha = 0.6, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
  facet_wrap(~DV, scales = "free_y", ncol = 3) +
  scale_color_manual(values = COL) +
  labs(title    = "ANCOVA: Regression Lines per Group",
       subtitle = "Parallel slopes indicate that homogeneity of slopes is satisfied",
       x = "concavity_mean (Covariate)", y = "DV", color = "Diagnosis") +
  theme(legend.position = "bottom")

6.5 ANCOVA Effect Size

eta_ancova <- map_df(DVS, function(v) {
  fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  e   <- eta_squared(fit, partial = TRUE)
  e_diag <- e[e$Parameter == "diagnosis", ]
  tibble(
    DV               = v,
    `Partial η² (diagnosis)` = round(e_diag$Eta2_partial, 4),
    Interpretasi     = case_when(
      e_diag$Eta2_partial >= 0.14 ~ "Large (≥ 0.14)",
      e_diag$Eta2_partial >= 0.06 ~ "Medium (0.06–0.14)",
      e_diag$Eta2_partial >= 0.01 ~ "Small (0.01–0.06)",
      TRUE                         ~ "Very Small (< 0.01)"
    )
  )
})

kable(eta_ancova,
      caption = "Partial Eta-Squared (Effect Size) — ANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Partial Eta-Squared (Effect Size) — ANCOVA
DV Partial η² (diagnosis) Interpretasi
texture_mean 0.0903 Medium (0.06–0.14)
smoothness_mean 0.0050 Very Small (< 0.01)
symmetry_mean 0.0216 Small (0.01–0.06)

7 MANCOVA (One-Way, Multivariate + Covariate)

Objective: To test differences in the mean vector of all DVs simultaneously between Malignant and Benign, after controlling for the effect of the covariate concavity_mean.

MANCOVA = MANOVA + Covariate

Model: cbind(DV1, DV2, DV3) ~ concavity_mean + diagnosis

7.1 MANCOVA Model

mancova_model <- manova(
  cbind(texture_mean, smoothness_mean, symmetry_mean) ~ concavity_mean + diagnosis,
  data = df
)

summary(mancova_model, test = "Wilks")
##                Df   Wilks approx F num Df den Df    Pr(>F)    
## concavity_mean  1 0.56604  11.5001      3     45 1.012e-05 ***
## diagnosis       1 0.89301   1.7972      3     45    0.1613    
## Residuals      47                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.2 MANCOVA Multivariate Test Statistics

mancova_stats <- map_df(tests, function(t) {
  s <- summary(mancova_model, test = t)$stats
  tibble(
    `Statistik Uji` = t,
    Nilai           = round(s["diagnosis", 2], 5),
    `F approx`      = round(s["diagnosis", 3], 4),
    `num df`        = round(s["diagnosis", 4], 0),
    `den df`        = round(s["diagnosis", 5], 0),
    `p-value`       = round(s["diagnosis", 6], 6),
    Keputusan       = ifelse(s["diagnosis",6] < 0.05, "Reject H₀ ✔", "Fail to Reject H₀")
  )
})

kable(mancova_stats,
      caption = "MANCOVA Results — Effect of diagnosis (after controlling for concavity_mean)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(7, bold = TRUE,
              color = ifelse(mancova_stats$`p-value` < 0.05, "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
MANCOVA Results — Effect of diagnosis (after controlling for concavity_mean)
Statistik Uji Nilai F approx num df den df p-value Keputusan
Wilks 0.89301 1.7972 3 45 0.161295 Fail to Reject H₀
Pillai 0.10699 1.7972 3 45 0.161295 Fail to Reject H₀
Hotelling-Lawley 0.11981 1.7972 3 45 0.161295 Fail to Reject H₀
Roy 0.11981 1.7972 3 45 0.161295 Fail to Reject H₀

7.3 Covariate Test Statistics

mancova_cov_stats <- map_df(tests, function(t) {
  s <- summary(mancova_model, test = t)$stats
  tibble(
    `Statistik Uji` = t,
    Nilai           = round(s["concavity_mean", 2], 5),
    `F approx`      = round(s["concavity_mean", 3], 4),
    `num df`        = round(s["concavity_mean", 4], 0),
    `den df`        = round(s["concavity_mean", 5], 0),
    `p-value`       = round(s["concavity_mean", 6], 6),
    Keputusan       = ifelse(s["concavity_mean",6] < 0.05,
                             "Significant Covariate ✔", "Non-Significant Covariate")
  )
})

kable(mancova_cov_stats,
      caption = "MANCOVA — Covariate Effect (concavity_mean)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  column_spec(7, bold = TRUE,
              color = ifelse(mancova_cov_stats$`p-value` < 0.05,
                             "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
MANCOVA — Covariate Effect (concavity_mean)
Statistik Uji Nilai F approx num df den df p-value Keputusan
Wilks 0.56604 11.5001 3 45 1e-05 Significant Covariate ✔
Pillai 0.43396 11.5001 3 45 1e-05 Significant Covariate ✔
Hotelling-Lawley 0.76667 11.5001 3 45 1e-05 Significant Covariate ✔
Roy 0.76667 11.5001 3 45 1e-05 Significant Covariate ✔

7.4 MANCOVA Interpretation

mancova_wilks_val <- mancova_stats$Nilai[mancova_stats$`Statistik Uji`=="Wilks"]
mancova_wilks_p   <- mancova_stats$`p-value`[mancova_stats$`Statistik Uji`=="Wilks"]
mancova_cov_p     <- mancova_cov_stats$`p-value`[mancova_cov_stats$`Statistik Uji`=="Wilks"]

Diagnosis Effect (after covariate control):
Wilks’ Lambda = 0.89301,
p = 0.161295
> 0.05 → H₀ failed to be rejected → No significant difference after covariate control.

Covariate Effect (concavity_mean):
p = 10^{-5}
< 0.05 → Covariate has a significant effect on the combined DVs. Using MANCOVA (instead of MANOVA) is appropriate.

7.5 Follow-up ANCOVA

summary.aov(mancova_model)
##  Response texture_mean :
##                Df Sum Sq Mean Sq F value   Pr(>F)   
## concavity_mean  1 111.36 111.360  8.4532 0.005546 **
## diagnosis       1  61.46  61.457  4.6651 0.035915 * 
## Residuals      47 619.17  13.174                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response smoothness_mean :
##                Df    Sum Sq    Mean Sq F value  Pr(>F)   
## concavity_mean  1 0.0017368 0.00173680 12.2263 0.00104 **
## diagnosis       1 0.0000337 0.00003371  0.2373 0.62840   
## Residuals      47 0.0066766 0.00014205                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response symmetry_mean :
##                Df    Sum Sq   Mean Sq F value   Pr(>F)   
## concavity_mean  1 0.0051762 0.0051762 11.7691 0.001264 **
## diagnosis       1 0.0004554 0.0004554  1.0354 0.314115   
## Residuals      47 0.0206712 0.0004398                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7.6 HE Plot MANCOVA

heplot(mancova_model,
       fill       = TRUE,
       fill.alpha = 0.2,
       col        = c("#8e44ad","#27ae60","#c0392b"),
       main       = "HE Plot — MANCOVA\n(After controlling concavity_mean)")

7.7 MANCOVA Effect Size

wilks_mancova <- summary(mancova_model, test = "Wilks")$stats
eta_mancova_val <- 1 - wilks_mancova["diagnosis", 2]

tibble(
  Metode           = "MANCOVA (Wilks' Lambda — diagnosis)",
  `Wilks' Lambda`  = round(wilks_mancova["diagnosis",2], 5),
  `1-Lambda (η²)`  = round(eta_mancova_val, 4),
  Interpretasi     = case_when(
    eta_mancova_val >= 0.14 ~ "Large Effect (≥ 0.14)",
    eta_mancova_val >= 0.06 ~ "Medium Effect (0.06–0.14)",
    eta_mancova_val >= 0.01 ~ "Small Effect (0.01–0.06)",
    TRUE                    ~ "Very Small Effect"
  )
) %>%
  kable(caption = "MANCOVA Effect Size") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
MANCOVA Effect Size
Metode Wilks’ Lambda 1-Lambda (η²) Interpretasi
MANCOVA (Wilks’ Lambda — diagnosis) 0.89301 0.107 Medium Effect (0.06–0.14)

8 Comparison of MANOVA vs MANCOVA

Purpose of Comparison: To evaluate the impact of adding a covariate (concavity_mean) on the test results.

8.1 Wilks’ Lambda Comparison Table

wl_manova <- summary(manova_model,  test = "Wilks")$stats
wl_mancova <- summary(mancova_model, test = "Wilks")$stats

comp_df <- tibble(
  Metode           = c("MANOVA", "MANCOVA"),
  `Covariate`      = c("None", "concavity_mean"),
  `Wilks' Lambda`  = c(round(wl_manova[1,2], 5),
                       round(wl_mancova["diagnosis",2], 5)),
  `F approx`       = c(round(wl_manova[1,3], 4),
                       round(wl_mancova["diagnosis",3], 4)),
  `p-value`        = c(round(wl_manova[1,6], 6),
                       round(wl_mancova["diagnosis",6], 6)),
  `η² (1-Lambda)`  = c(round(1 - wl_manova[1,2], 4),
                       round(1 - wl_mancova["diagnosis",2], 4)),
  Keputusan        = c(
    ifelse(wl_manova[1,6] < 0.05, "Reject H₀ ✔", "Fail to Reject H₀"),
    ifelse(wl_mancova["diagnosis",6] < 0.05, "Reject H₀ ✔", "Fail to Reject H₀")
  )
)

kable(comp_df, caption = "Comparison of Wilks' Lambda: MANOVA vs MANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white") %>%
  row_spec(1, background = "#eaf4fb") %>%
  row_spec(2, background = "#f5eef8")
Comparison of Wilks’ Lambda: MANOVA vs MANCOVA
Metode Covariate Wilks’ Lambda F approx p-value η² (1-Lambda) Keputusan
MANOVA None 0.66636 7.6771 0.000291 0.3336 Reject H₀ ✔
MANCOVA concavity_mean 0.89301 1.7972 0.161295 0.1070 Fail to Reject H₀

8.2 Comparison of All Test Statistics

all_tests_df <- map_df(tests, function(t) {
  sm  <- summary(manova_model,  test = t)$stats
  smc <- summary(mancova_model, test = t)$stats
  tibble(
    `Statistical Test` = t,
    `MANOVA — p`    = round(sm[1,6], 6),
    `MANCOVA — p`   = round(smc["diagnosis",6], 6),
    `Change in p`   = round(smc["diagnosis",6] - sm[1,6], 6),
    `MANOVA — value` = round(sm[1,2], 5),
    `MANCOVA — value`= round(smc["diagnosis",2], 5)
  )
})

kable(all_tests_df, caption = "Comparison of All Test Statistics: MANOVA vs MANCOVA") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
Comparison of All Test Statistics: MANOVA vs MANCOVA
Statistical Test MANOVA — p MANCOVA — p Change in p MANOVA — value MANCOVA — value
Wilks 0.000291 0.161295 0.161004 0.66636 0.89301
Pillai 0.000291 0.161295 0.161004 0.33364 0.10699
Hotelling-Lawley 0.000291 0.161295 0.161004 0.50068 0.11981
Roy 0.000291 0.161295 0.161004 0.50068 0.11981

8.3 Comparison of ANOVA vs ANCOVA per DV

comp_uni <- map_df(DVS, function(v) {
  # ANOVA
  fit_anova  <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
  s_anova    <- summary(fit_anova)[[1]]
  f_anova    <- s_anova["diagnosis","F value"]
  p_anova    <- s_anova["diagnosis","Pr(>F)"]
  e_anova    <- eta_squared(fit_anova, partial=FALSE)$Eta2[1]

  # ANCOVA
  fit_ancova <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
  s_ancova   <- summary(fit_ancova)[[1]]
  f_ancova   <- s_ancova["diagnosis","F value"]
  p_ancova   <- s_ancova["diagnosis","Pr(>F)"]
  e_ancova   <- eta_squared(fit_ancova, partial=TRUE)$Eta2_partial[
                  which(eta_squared(fit_ancova, partial=TRUE)$Parameter == "diagnosis")]

  tibble(
    DV              = v,
    `F (ANOVA)`     = round(f_anova, 4),
    `p (ANOVA)`     = round(p_anova, 6),
    `η² (ANOVA)`    = round(e_anova, 4),
    `F (ANCOVA)`    = round(f_ancova, 4),
    `p (ANCOVA)`    = round(p_ancova, 6),
    `η²p (ANCOVA)`  = round(e_ancova, 4),
    `Δ F`           = round(f_ancova - f_anova, 4),
    `Δ η²`          = round(e_ancova - e_anova, 4)
  )
})

kable(comp_uni,
      caption = "Comparison of ANOVA vs ANCOVA per DV (Covariate Effect Control)") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white")
Comparison of ANOVA vs ANCOVA per DV (Covariate Effect Control)
DV F (ANOVA) p (ANOVA) η² (ANOVA) F (ANCOVA) p (ANCOVA) η²p (ANCOVA) Δ F Δ η²
texture_mean 13.0630 0.000720 0.2139 4.6651 0.035915 0.0903 -8.3979 -0.1236
smoothness_mean 4.0243 0.050503 0.0774 0.2373 0.628403 0.0050 -3.7869 -0.0723
symmetry_mean 2.5132 0.119460 0.0498 1.0354 0.314115 0.0216 -1.4779 -0.0282

8.4 Visualization of Effect Size Comparison

effect_comp <- bind_rows(
  # ANOVA
  map_df(DVS, function(v) {
    fit <- aov(as.formula(paste(v, "~ diagnosis")), data = df)
    tibble(DV = v, Metode = "ANOVA",
           eta2 = eta_squared(fit, partial=FALSE)$Eta2[1])
  }),
  # ANCOVA
  map_df(DVS, function(v) {
    fit <- aov(as.formula(paste(v, "~ concavity_mean + diagnosis")), data = df)
    e   <- eta_squared(fit, partial=TRUE)
    tibble(DV = v, Metode = "ANCOVA",
           eta2 = e$Eta2_partial[e$Parameter == "diagnosis"])
  })
)

ggplot(effect_comp, aes(x = DV, y = eta2, fill = Metode)) +
  geom_bar(stat = "identity", position = "dodge", alpha = 0.85, width = 0.6) +
  geom_text(aes(label = round(eta2, 3)),
            position = position_dodge(0.6), vjust = -0.4, size = 3.5) +
  scale_fill_manual(values = c("ANOVA" = "#2e86c1", "ANCOVA" = "#8e44ad")) +
  labs(title    = "Effect Size Comparison: ANOVA vs ANCOVA",
       subtitle = "η² (ANOVA) vs Partial η² (ANCOVA) — higher values indicate larger effects",
       x = "Dependent Variable", y = "Effect Size (η²)", fill = "Method") +
  theme(legend.position = "bottom") +
  ylim(0, max(effect_comp$eta2) * 1.25)

# Visualization of Wilks' Lambda changes
wilks_comp <- tibble(
  Metode         = c("MANOVA", "MANCOVA"),
  `Wilks Lambda` = c(wl_manova[1,2], wl_mancova["diagnosis",2]),
  `p-value`      = c(wl_manova[1,6], wl_mancova["diagnosis",6])
)

ggplot(wilks_comp, aes(x = Metode, y = `Wilks Lambda`, fill = Metode)) +
  geom_bar(stat = "identity", alpha = 0.85, width = 0.5) +
  geom_text(aes(label = paste0("Λ=", round(`Wilks Lambda`,4),
                               "\np=", round(`p-value`,4))),
            vjust = -0.4, size = 4, fontface = "bold") +
  scale_fill_manual(values = c("MANOVA" = "#2e86c1", "MANCOVA" = "#8e44ad")) +
  labs(title    = "Comparison of Wilks' Lambda: MANOVA vs MANCOVA",
       subtitle = "Lower Lambda values = stronger diagnosis effect",
       y = "Wilks' Lambda", x = NULL) +
  theme(legend.position = "none") +
  ylim(0, 1)

8.5 Interpretation of Comparison

1. Wilks’ Lambda - MANOVA: Λ = 0.66636 - MANCOVA: Λ = 0.89301 - MANCOVA Lambda is larger or equal → controlling the covariate does not improve detection of the diagnosis effect.

2. p-value - MANOVA: p = 2.91^{-4} - MANCOVA: p = 0.161295 - MANCOVA p-value is larger → the diagnosis effect weakens after covariate adjustment, indicating the covariate is related to the IV.

3. Covariate Significance Covariate concavity_mean has p = 10^{-5} — significant → the covariate does affect the DVs and is appropriate to include in the model. MANCOVA is more appropriate than MANOVA.

4. Main Conclusion MANOVA is significant but MANCOVA is not → group differences are partly driven by the covariate rather than a pure diagnosis effect.


9 Final Summary

9.1 Summary Table of All Analyses

final_summary <- tibble(
  Method      = c("ANOVA — texture_mean",
                  "ANOVA — smoothness_mean",
                  "ANOVA — symmetry_mean",
                  "MANOVA (Wilks)",
                  "ANCOVA — texture_mean",
                  "ANCOVA — smoothness_mean",
                  "ANCOVA — symmetry_mean",
                  "MANCOVA — diagnosis (Wilks)"),
  Covariate   = c(rep("None", 4), rep("concavity_mean", 4)),
  `p-value`   = c(
    round(anova_results$`p-value`, 5),
    round(wl_manova[1,6], 5),
    round(ancova_results$`p (diagnosis)`, 5),
    round(wl_mancova["diagnosis",6], 5)
  ),
  Decision   = c(
    ifelse(anova_results$`p-value` < 0.05, "Reject H₀ ✔", "Fail to Reject H₀ ✘"),
    ifelse(wl_manova[1,6] < 0.05,          "Reject H₀ ✔", "Fail to Reject H₀ ✘"),
    ifelse(ancova_results$`p (diagnosis)` < 0.05, "Reject H₀ ✔", "Fail to Reject H₀ ✘"),
    ifelse(wl_mancova["diagnosis",6] < 0.05, "Reject H₀ ✔", "Fail to Reject H₀ ✘")
  )
)

kable(final_summary, caption = "Summary of All Analytical Results") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                full_width = TRUE) %>%
  column_spec(4, bold = TRUE,
              color = ifelse(final_summary$Decision == "Reject H₀ ✔",
                             "#1e8449", "#c0392b")) %>%
  row_spec(0, bold = TRUE, background = "#1a5276", color = "white") %>%
  row_spec(4, background = "#eaf4fb") %>%
  row_spec(8, background = "#f5eef8")
Summary of All Analytical Results
Method Covariate p-value Decision
ANOVA — texture_mean None 0.00072 Reject H₀ ✔
ANOVA — smoothness_mean None 0.05050 Fail to Reject H₀ ✘
ANOVA — symmetry_mean None 0.11946 Fail to Reject H₀ ✘
MANOVA (Wilks) None 0.00029 Reject H₀ ✔
ANCOVA — texture_mean concavity_mean 0.03592 Reject H₀ ✔
ANCOVA — smoothness_mean concavity_mean 0.62840 Fail to Reject H₀ ✘
ANCOVA — symmetry_mean concavity_mean 0.31411 Fail to Reject H₀ ✘
MANCOVA — diagnosis (Wilks) concavity_mean 0.16130 Fail to Reject H₀ ✘

9.2 Narrative Conclusion

9.2.1 Complete Conclusion

1. Assumption Testing
All five MANCOVA assumptions were satisfied in a balanced sample of 50 observations (25 Malignant, 25 Benign): dependence among DVs, homogeneity of covariance matrices, multivariate normality, linearity between covariate and DVs, and independence of observations.

2. ANOVA
Univariate tests showed that texture_mean was the only DV with a significant difference between Malignant and Benign (p < 0.05). smoothness_mean and symmetry_mean did not show significant differences when tested individually.

3. MANOVA
Multivariately, the combined set of three DVs showed a significant difference between the two groups (Wilks’ Λ = 0.6664, p = 2.91^{-4}). This confirms that although not all individual DVs are significant, their combined multivariate structure can distinguish Malignant from Benign.

4. ANCOVA
After controlling for concavity_mean, the univariate results show that 1 out of 3 DVs are significant. Adjusting for the covariate improves estimation accuracy through adjusted means.

5. MANCOVA
At the multivariate level with covariate adjustment, the diagnosis effect yields Wilks’ Λ = 0.893, p = 0.161295.
Group differences are no longer significant after controlling for concavity_mean — indicating the covariate partially explains the group differences.
The covariate concavity_mean is itself significant (p = 10^{-5}), confirming that MANCOVA is more appropriate than MANOVA.

6. MANOVA vs MANCOVA Comparison
Including the covariate changes Wilks’ Lambda from 0.6664 (MANOVA) to 0.893 (MANCOVA), indicating that concavity_mean contributes to explaining variation in the DVs. MANCOVA provides a more refined understanding of the intrinsic group differences after removing the effect of nuclear concavity.


This report was prepared for the Multivariate Analysis course assignment.
Dataset: Wisconsin Breast Cancer Dataset — UCI Machine Learning Repository.
Analysis: Assumption Testing · ANOVA · MANOVA · ANCOVA · MANCOVA · Covariate Effect Comparison