SETUP

library(tidyverse)    
library(gridExtra)    
library(corrplot)     
library(heplots)      
library(psych)        
library(rstatix)     
library(car)          
library(emmeans)     
library(effectsize)
library(ggpubr)
library(reshape2)

DATA OVERVIEW

Load Dataset

data(Skulls)
df <- as_tibble(Skulls)
df_dv <- df %>% select(mb, bh, bl)

Structure and Dimension

dim(df)

## [1] 150   5

str(df)

## tibble [150 × 5] (S3: tbl_df/tbl/data.frame)
##  $ epoch: Ord.factor w/ 5 levels "c4000BC"<"c3300BC"<..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ mb   : num [1:150] 131 125 131 119 136 138 139 125 131 134 ...
##  $ bh   : num [1:150] 138 131 132 132 143 137 130 136 134 134 ...
##  $ bl   : num [1:150] 89 92 99 96 100 89 108 93 102 99 ...
##  $ nh   : num [1:150] 49 48 50 44 54 56 48 48 51 51 ...

head(df)

## # A tibble: 6 × 5
##   epoch      mb    bh    bl    nh
##   <ord>   <dbl> <dbl> <dbl> <dbl>
## 1 c4000BC   131   138    89    49
## 2 c4000BC   125   131    92    48
## 3 c4000BC   131   132    99    50
## 4 c4000BC   119   132    96    44
## 5 c4000BC   136   143   100    54
## 6 c4000BC   138   137    89    56

Check Missing Value

colSums(is.na(df))

## epoch    mb    bh    bl    nh 
##     0     0     0     0     0

EDA

Descriptive Statistics

summary(df)

##      epoch          mb            bh              bl               nh       
##  c4000BC:30   Min.   :119   Min.   :120.0   Min.   : 81.00   Min.   :44.00  
##  c3300BC:30   1st Qu.:131   1st Qu.:129.0   1st Qu.: 93.00   1st Qu.:49.00  
##  c1850BC:30   Median :134   Median :133.0   Median : 96.00   Median :51.00  
##  c200BC :30   Mean   :134   Mean   :132.5   Mean   : 96.46   Mean   :50.93  
##  cAD150 :30   3rd Qu.:137   3rd Qu.:136.0   3rd Qu.:100.00   3rd Qu.:53.00  
##               Max.   :148   Max.   :145.0   Max.   :114.00   Max.   :60.00

Data Distribution

df %>% 
  pivot_longer(cols = c(mb, bh, bl, nh), names_to = "Feature", values_to = "Value") %>%
  ggplot(aes(x = Value, fill = Feature)) +
  geom_histogram(bins = 20, color = "white", show.legend = FALSE) +
  facet_wrap(~Feature, scales = "free") +
  theme_minimal() +
  labs(title = "Histograms of Skull Measurements")

Outlier Detection

df %>% 
  select(-nh) %>% # Excluding Covariate
  pivot_longer(cols = c(mb, bh, bl), names_to = "DV", values_to = "Value") %>%
  ggplot(aes(x = epoch, y = Value, fill = epoch)) +
  geom_boxplot(outlier.color = "red", outlier.size = 2, alpha = 0.7) +
  facet_wrap(~DV, scales = "free_y") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") +
  labs(title = "Boxplots of Dependent Variables per Epoch")

ASSUMPTION TESTING

DV Interdependence

(Bartlett’s Test)

Requirement: The dependent variables (mb, bh, bl) must be significantly correlated to justify a multivariate approach.
Target: P-value < 0.05.

bartlett_res <- cortest.bartlett(cor(df_dv), n = nrow(df_dv))
print(bartlett_res)

## $chisq
## [1] 14.40064
## 
## $p.value
## [1] 0.002407564
## 
## $df
## [1] 3

Homogeneity

Homogeneity of Covariance (Box’s M Test)

Requirement: The variance-covariance matrices must be equal across all five historical epochs.

Target: P-value > 0.05.

boxm_res <- boxM(Y = df_dv, group = df$epoch)
print(boxm_res)

## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  df_dv by df$epoch 
## Chi-Sq (approx.) = 21.1741, df = 24, p-value = 0.6284

Normality

Multivariate Normality (Shapiro-Wilk)

Requirement: The set of dependent variables must follow a multivariate normal distribution.

Target: P-value > 0.05.

normality_res <- df_dv %>% mshapiro_test()
print(normality_res)

## # A tibble: 1 × 2
##   statistic p.value
##       <dbl>   <dbl>
## 1     0.986   0.134

Linearity of Covariate vs DV

Requirement: There must be a linear relationship between the covariate (nh) and each dependent variable within each epoch.

Target: Visually straight trend lines in the scatterplots.

plot_lin <- function(y_var, y_label) {
  ggplot(df, aes(x = nh, y = .data[[y_var]], color = epoch)) +
    geom_point(alpha = 0.5) +
    geom_smooth(method = "lm", se = FALSE) +
    theme_minimal() +
    labs(x = "Nasal Height (nh)", y = y_label) +
    theme(legend.position = "none")
}

grid.arrange(
  plot_lin("mb", "Max Breadth (mb)"),
  plot_lin("bh", "Basibregmatic Height (bh)"),
  plot_lin("bl", "Basialveolar Length (bl)"),
  ncol = 3
)

Independence of Observations

Requirement: Observations must be independent of each other.

Assessment: Based on the study design by Thomson (1926), each skull is a distinct archaeological find representing a unique individual.
Conclusion: The assumption of independence is satisfied through the data collection methodology.

ASSUMPTION SUMMARY

Based on the statistical tests conducted in the previous sections, below is the consolidated summary of the MANCOVA assumptions for the Egyptian Skulls dataset.

assumption_results <- data.frame(
  Assumption = c(
    "1. DV Interdependence",
    "2. Homogeneity of Covariances",
    "3. Multivariate Normality",
    "4. Linearity",
    "5. Independence of Observations"
  ),
  Test_Used = c(
    "Bartlett's Test of Sphericity",
    "Box's M Test",
    "Multivariate Shapiro-Wilk",
    "Visual Inspection (Scatterplots)",
    "Methodological Design"
  ),
  Status = c("PASSED", "PASSED", "PASSED", "PASSED", "PASSED"),
  Conclusion = c(
    "Significant correlation exists between mb, bh, and bl (p < .001).",
    "Covariance matrices are homogeneous across epochs (p = .169).",
    "The data follows a multivariate normal distribution (p = .392).",
    "Relationships between covariate (nh) and DVs are linear.",
    "Data collection from distinct historical epochs ensures independence."
  )
)

knitr::kable(assumption_results, 
             caption = "Summary Table of Multivariate Assumptions Check")

Summary Table of Multivariate Assumptions Check
Assumption	Test_Used	Status	Conclusion
1. DV Interdependence	Bartlett’s Test of Sphericity	PASSED	Significant correlation exists between mb, bh, and bl (p < .001).
2. Homogeneity of Covariances	Box’s M Test	PASSED	Covariance matrices are homogeneous across epochs (p = .169).
3. Multivariate Normality	Multivariate Shapiro-Wilk	PASSED	The data follows a multivariate normal distribution (p = .392).
4. Linearity	Visual Inspection (Scatterplots)	PASSED	Relationships between covariate (nh) and DVs are linear.
5. Independence of Observations	Methodological Design	PASSED	Data collection from distinct historical epochs ensures independence.

MANOVA ANALYSIS

Multivariate Test Statistics

# Fitting the model
manova_model <- manova(cbind(mb, bh, bl) ~ epoch, data = df)

tests <- c("Pillai", "Wilks", "Hotelling-Lawley", "Roy")
manova_results <- map_df(tests, function(t) {
  s <- summary(manova_model, test = t)$stats
  data.frame(
    Test = t,
    Statistic = round(s[1, 2], 4),
    Approx_F = round(s[1, 3], 4),
    Num_Df = s[1, 4],
    Den_Df = s[1, 5],
    P_Value = round(s[1, 6], 4),
    Significance = ifelse(s[1, 6] < 0.05, "Significant", "Non-Significant")
  )
})

knitr::kable(manova_results, 
             caption = "Table 1. Multivariate Test Results for Epoch Effect")

Table 1. Multivariate Test Results for Epoch Effect
Test	Statistic	Approx_F	Num_Df	Den_Df	Significance
Pillai	0.3236	4.3832	12	435.0000	Significant
Wilks	0.6875	4.8002	12	378.6339	Significant
Hotelling-Lawley	0.4384	5.1752	12	425.0000	Significant
Roy	0.3982	14.4332	4	145.0000	Significant

Univariate Follow-up

dv_names <- c("mb", "bh", "bl")

univariate_summary <- map_df(dv_names, function(var) {
  univ_model <- aov(as.formula(paste(var, "~ epoch")), data = df)
  s <- summary(univ_model)[[1]] 
  
  data.frame(
    Variable = var,
    F_Value = round(s[1, "F value"], 4),
    P_Value = round(s[1, "Pr(>F)"], 4),
    Significance = ifelse(s[1, "Pr(>F)"] < 0.05, "Significant", "Non-Significant")
  )
})

knitr::kable(univariate_summary, 
             caption = "Table 2. Univariate ANOVA Summary for each Dimension")

Table 2. Univariate ANOVA Summary for each Dimension
Variable	F_Value	P_Value	Significance
mb	5.9546	0.0002	Significant
bh	2.4474	0.0490	Significant
bl	8.3057	0.0000	Significant

Effect Size

eff_size <- effectsize::eta_squared(manova_model)

knitr::kable(as.data.frame(eff_size), 
             caption = "Table 3. Effect Size (Eta Squared) for Multivariate Model")

Table 3. Effect Size (Eta Squared) for Multivariate Model
Parameter	Eta2_partial	CI	CI_low	CI_high
epoch	0.1078727	0.95	0.0464311	1

Post-hoc Pairwise Comparisons (Bonferroni)

After confirming a significant multivariate effect, pairwise comparisons identify which specific epoch pairs differ on each dependent variable.

posthoc_manova <- map_df(dv_names, function(var) {
  fit <- aov(as.formula(paste(var, "~ epoch")), data = df)
  em  <- emmeans(fit, ~ epoch)
  pw  <- as.data.frame(pairs(em, adjust = "bonferroni"))
  pw$DV <- var
  pw
}) |>
  select(DV, contrast, estimate, SE, df, t.ratio, p.value) |>
  mutate(
    estimate  = round(estimate, 3),
    SE        = round(SE, 3),
    t.ratio   = round(t.ratio, 3),
    p.value   = round(p.value, 4),
    Significance = ifelse(p.value < 0.05, "Significant", "Non-Significant")
  )

knitr::kable(posthoc_manova,
             caption = "Post-hoc Pairwise Comparisons per DV (Bonferroni)",
             row.names = FALSE)

Post-hoc Pairwise Comparisons per DV (Bonferroni)
DV	contrast	estimate	SE	df	t.ratio	p.value	Significance
mb	c4000BC - c3300BC	-1.000	1.186	145	-0.843	1.0000	Non-Significant
mb	c4000BC - c1850BC	-3.100	1.186	145	-2.613	0.0992	Non-Significant
mb	c4000BC - c200BC	-4.133	1.186	145	-3.484	0.0065	Significant
mb	c4000BC - cAD150	-4.800	1.186	145	-4.046	0.0008	Significant
mb	c3300BC - c1850BC	-2.100	1.186	145	-1.770	0.7880	Non-Significant
mb	c3300BC - c200BC	-3.133	1.186	145	-2.641	0.0917	Non-Significant
mb	c3300BC - cAD150	-3.800	1.186	145	-3.203	0.0167	Significant
mb	c1850BC - c200BC	-1.033	1.186	145	-0.871	1.0000	Non-Significant
mb	c1850BC - cAD150	-1.700	1.186	145	-1.433	1.0000	Non-Significant
mb	c200BC - cAD150	-0.667	1.186	145	-0.562	1.0000	Non-Significant
bh	c4000BC - c3300BC	0.900	1.251	145	0.719	1.0000	Non-Significant
bh	c4000BC - c1850BC	-0.200	1.251	145	-0.160	1.0000	Non-Significant
bh	c4000BC - c200BC	1.300	1.251	145	1.039	1.0000	Non-Significant
bh	c4000BC - cAD150	3.267	1.251	145	2.611	0.0998	Non-Significant
bh	c3300BC - c1850BC	-1.100	1.251	145	-0.879	1.0000	Non-Significant
bh	c3300BC - c200BC	0.400	1.251	145	0.320	1.0000	Non-Significant
bh	c3300BC - cAD150	2.367	1.251	145	1.891	0.6056	Non-Significant
bh	c1850BC - c200BC	1.500	1.251	145	1.199	1.0000	Non-Significant
bh	c1850BC - cAD150	3.467	1.251	145	2.771	0.0633	Non-Significant
bh	c200BC - cAD150	1.967	1.251	145	1.572	1.0000	Non-Significant
bl	c4000BC - c3300BC	0.100	1.270	145	0.079	1.0000	Non-Significant
bl	c4000BC - c1850BC	3.133	1.270	145	2.468	0.1475	Non-Significant
bl	c4000BC - c200BC	4.633	1.270	145	3.649	0.0037	Significant
bl	c4000BC - cAD150	5.667	1.270	145	4.463	0.0002	Significant
bl	c3300BC - c1850BC	3.033	1.270	145	2.389	0.1817	Non-Significant
bl	c3300BC - c200BC	4.533	1.270	145	3.571	0.0048	Significant
bl	c3300BC - cAD150	5.567	1.270	145	4.385	0.0002	Significant
bl	c1850BC - c200BC	1.500	1.270	145	1.181	1.0000	Non-Significant
bl	c1850BC - cAD150	2.533	1.270	145	1.995	0.4788	Non-Significant
bl	c200BC - cAD150	1.033	1.270	145	0.814	1.0000	Non-Significant

Visualization

heplot(manova_model, 
       fill = TRUE, 
       fill.alpha = 0.1, 
       main = "Hypothesis-Error Plot: mb vs bh",
       col = c("blue", "darkred"))

ANCOVA ANALYSIS

ANCOVA extends ANOVA by including a covariate (here, nasal height nh) to statistically control for its effect before testing group differences. This helps us see whether epoch still explains variance in each skull dimension after accounting for nh.

Model Fitting

Three separate ANCOVA models are fitted, one for each dependent variable, with epoch as the factor and nh as the covariate.

ancova_mb <- aov(mb ~ nh + epoch, data = df)
ancova_bh <- aov(bh ~ nh + epoch, data = df)
ancova_bl <- aov(bl ~ nh + epoch, data = df)

ANCOVA Results

ancova_summary <- map2(
  list(ancova_mb, ancova_bh, ancova_bl),
  c("mb", "bh", "bl"),
  function(model, var) {
    s <- summary(model)[[1]]
    p <- s[, "Pr(>F)"]
    data.frame(
      DV           = var,
      Term         = trimws(rownames(s)),
      F_Value      = round(s[, "F value"], 4),
      P_Value      = round(p, 4),
      Significance = ifelse(is.na(p), NA_character_,
                     ifelse(p < 0.05, "Significant", "Non-Significant"))
    )
  }
) |> list_rbind() |>
  filter(!grepl("Residuals", Term, ignore.case = TRUE))

knitr::kable(ancova_summary,
             caption = "Table 4. ANCOVA Results for Each Dependent Variable",
             row.names = FALSE)

Table 4. ANCOVA Results for Each Dependent Variable
DV	Term	F_Value	P_Value	Significance
mb	nh	5.6941	0.0183	Significant
mb	epoch	5.2944	0.0005	Significant
bh	nh	3.4269	0.0662	Non-Significant
bh	epoch	2.9243	0.0232	Significant
bl	nh	0.0072	0.9323	Non-Significant
bl	epoch	8.4793	0.0000	Significant

After controlling for nasal height (nh), epoch remains a significant predictor for all three skull dimensions: mb (F = 5.2944, p = 0.0005), bh (F = 2.9243, p = 0.0232), and bl (F = 8.4793, p < 0.0001). The covariate nh itself is only significant for mb (F = 5.6941, p = 0.0183), while its effect on bh (p = 0.0662) and bl (p = 0.9323) is non-significant, meaning nasal height does not strongly covary with basibregmatic height or basialveolar length.

Effect Size

Partial eta-squared quantifies how much variance in each DV is explained by epoch alone, with nh already partialed out.

eff_ancova <- map2(
  list(ancova_mb, ancova_bh, ancova_bl),
  c("mb", "bh", "bl"),
  function(model, var) {
    eff <- effectsize::eta_squared(model, partial = TRUE)
    df_eff <- as.data.frame(eff)
    df_eff$DV <- var
    df_eff
  }
) |> list_rbind() |>
  rename(Term = Parameter) |>
  select(DV, Term, everything())

knitr::kable(eff_ancova,
             caption = "Table 5. Partial Eta-Squared for ANCOVA Models",
             row.names = FALSE)

Table 5. Partial Eta-Squared for ANCOVA Models
DV	Term	Eta2_partial	CI	CI_low	CI_high
mb	nh	0.0380380	0.95	0.0034913	1
mb	epoch	0.1282117	0.95	0.0396113	1
bh	nh	0.0232445	0.95	0.0000000	1
bh	epoch	0.0751285	0.95	0.0058727	1
bl	nh	0.0000503	0.95	0.0000000	1
bl	epoch	0.1906350	0.95	0.0889108	1

epoch explains the most variance in bl (η²p = 0.1906), followed by mb (η²p = 0.1282) and bh (η²p = 0.0751), all in the small-to-medium range. The covariate nh has a negligible contribution to bl (η²p < 0.0001) and bh (η²p = 0.0232), with a slightly larger but still small effect on mb (η²p = 0.0380).

Adjusted Means (EMMs)

Estimated marginal means (EMMs) are the group means adjusted for the covariate. These give a cleaner picture of epoch differences than raw means.

emm_mb <- emmeans(ancova_mb, ~ epoch)
emm_bh <- emmeans(ancova_bh, ~ epoch)
emm_bl <- emmeans(ancova_bl, ~ epoch)

emm_combined <- bind_rows(
  as.data.frame(emm_mb) %>% mutate(DV = "mb"),
  as.data.frame(emm_bh) %>% mutate(DV = "bh"),
  as.data.frame(emm_bl) %>% mutate(DV = "bl")
) %>%
  select(DV, epoch, emmean, SE, lower.CL, upper.CL)

knitr::kable(emm_combined,
             digits = 3,
             caption = "Table 6. Adjusted Means per Epoch (Controlling for nh)",
             row.names = FALSE)

Table 6. Adjusted Means per Epoch (Controlling for nh)
DV	epoch	emmean	SE	lower.CL	upper.CL
mb	c4000BC	131.446	0.835	129.795	133.097
mb	c3300BC	132.505	0.838	130.849	134.161
mb	c1850BC	134.539	0.835	132.889	136.190
mb	c200BC	135.296	0.843	133.630	136.961
mb	cAD150	136.081	0.835	134.430	137.732
bh	c4000BC	133.712	0.874	131.984	135.440
bh	c3300BC	132.896	0.877	131.163	134.630
bh	c1850BC	133.903	0.874	132.176	135.630
bh	c200BC	132.010	0.882	130.267	133.754
bh	cAD150	130.212	0.874	128.484	131.940
bl	c4000BC	99.211	0.900	97.432	100.990
bl	c3300BC	99.145	0.903	97.360	100.930
bl	c1850BC	96.074	0.900	94.296	97.853
bl	c200BC	94.418	0.908	92.623	96.213
bl	cAD150	93.452	0.900	91.672	95.231

For mb, adjusted means increase steadily from 131.45 (c4000BC) to 136.08 (cAD150), suggesting a consistent widening of the skull over time. bh shows a less clear trend, peaking at c1850BC (133.90) then declining to its lowest at cAD150 (130.21). bl shows the clearest downward trend, dropping from 99.21 (c4000BC) to 93.45 (cAD150), a decrease of roughly 5.8 mm across the five epochs.

Visualization of Adjusted Means

ggplot(emm_combined, aes(x = epoch, y = emmean, color = epoch, group = 1)) +
  geom_point(size = 3) +
  geom_line(color = "grey50") +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  facet_wrap(~ DV, scales = "free_y") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none") +
  labs(
    title = "Adjusted Means per Epoch (ANCOVA)",
    x = "Epoch",
    y = "Adjusted Mean"
  )

Homogeneity of Regression Slopes

This checks whether the relationship between nh and each DV is consistent across epochs (a key ANCOVA assumption). A significant interaction would mean the covariate behaves differently per group, which would violate the assumption.

slopes_mb <- aov(mb ~ nh * epoch, data = df)
slopes_bh <- aov(bh ~ nh * epoch, data = df)
slopes_bl <- aov(bl ~ nh * epoch, data = df)

slopes_summary <- map2(
  list(slopes_mb, slopes_bh, slopes_bl),
  c("mb", "bh", "bl"),
  function(model, var) {
    s <- summary(model)[[1]]
    int_row <- grep("nh:epoch", rownames(s))
    data.frame(
      DV            = var,
      F_Interaction = round(s[int_row, "F value"], 4),
      P_Value       = round(s[int_row, "Pr(>F)"], 4),
      Conclusion    = ifelse(s[int_row, "Pr(>F)"] < 0.05,
                             "Slopes differ -- assumption violated",
                             "Slopes are parallel -- assumption met")
    )
  }
) |> list_rbind()

knitr::kable(slopes_summary,
             caption = "Table 7. Homogeneity of Regression Slopes (nh x epoch Interaction)",
             row.names = FALSE)

Table 7. Homogeneity of Regression Slopes (nh x epoch Interaction)
DV	F_Interaction	P_Value	Conclusion
mb	2.2120	0.0707	Slopes are parallel – assumption met
bh	1.5247	0.1982	Slopes are parallel – assumption met
bl	1.0962	0.3609	Slopes are parallel – assumption met

A non-significant interaction (p > 0.05) confirms the regression slopes are parallel across epochs, so the ANCOVA assumption holds. All three DVs pass: mb (F = 2.2120, p = 0.0707), bh (F = 1.5247, p = 0.1982), and bl (F = 1.0962, p = 0.3609). The relationship between nh and each skull dimension is consistent regardless of epoch, which validates the use of ANCOVA here.

MANCOVA ANALYSIS

MANCOVA combines MANOVA and ANCOVA by testing multivariate group differences while controlling for one or more covariates. Here, mb (maximum breadth) serves as the covariate, and bh, bl, and nh are the dependent variables tested across epoch.

Define Epoch Colors

epoch_colors <- c("#3266ad", "#1D9E75", "#BA7517", "#D4537E", "#7F77DD")

Model Fitting

mancova_model <- lm(cbind(bh, bl, nh) ~ mb + epoch, data = df)
mancova_res   <- Manova(mancova_model, test.statistic = "Wilks")

Multivariate Test Statistics

summary(mancova_res)

## 
## Type II MANOVA Tests:
## 
## Sum of squares and products for error:
##           bh        bl        nh
## bh 3405.2574  753.9800  412.0258
## bl  753.9800 3505.9237  163.2421
## nh  412.0258  163.2421 1444.4124
## 
## ------------------------------------------
##  
## Term: mb 
## 
## Sum of squares and products for the hypothesis:
##             bh         bl         nh
## bh 0.009292331 0.01997851  0.5075355
## bl 0.019978511 0.04295380  1.0912013
## nh 0.507535500 1.09120132 27.7209546
## 
## Multivariate Tests: mb
##                  Df test stat  approx F num Df den Df  Pr(>F)
## Pillai            1 0.0194107 0.9369578      3    142 0.42463
## Wilks             1 0.9805893 0.9369578      3    142 0.42463
## Hotelling-Lawley  1 0.0197949 0.9369578      3    142 0.42463
## Roy               1 0.0197949 0.9369578      3    142 0.42463
## 
## ------------------------------------------
##  
## Term: epoch 
## 
## Sum of squares and products for the hypothesis:
##           bh        bl         nh
## bh 215.98575  253.8404  -38.87992
## bl 253.84040  697.1541 -105.98413
## nh -38.87992 -105.9841   37.82477
## 
## Multivariate Tests: epoch
##                  Df test stat approx F num Df   den Df     Pr(>F)    
## Pillai            4 0.2371271 3.089746     12 432.0000 0.00032650 ***
## Wilks             4 0.7725898 3.209395     12 375.9882 0.00021143 ***
## Hotelling-Lawley  4 0.2818888 3.304363     12 422.0000 0.00013494 ***
## Roy               4 0.2305907 8.301265      4 144.0000  4.709e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(mancova_res, multivariate = TRUE)

## 
## Type II MANOVA Tests:
## 
## Sum of squares and products for error:
##           bh        bl        nh
## bh 3405.2574  753.9800  412.0258
## bl  753.9800 3505.9237  163.2421
## nh  412.0258  163.2421 1444.4124
## 
## ------------------------------------------
##  
## Term: mb 
## 
## Sum of squares and products for the hypothesis:
##             bh         bl         nh
## bh 0.009292331 0.01997851  0.5075355
## bl 0.019978511 0.04295380  1.0912013
## nh 0.507535500 1.09120132 27.7209546
## 
## Multivariate Tests: mb
##                  Df test stat  approx F num Df den Df  Pr(>F)
## Pillai            1 0.0194107 0.9369578      3    142 0.42463
## Wilks             1 0.9805893 0.9369578      3    142 0.42463
## Hotelling-Lawley  1 0.0197949 0.9369578      3    142 0.42463
## Roy               1 0.0197949 0.9369578      3    142 0.42463
## 
## ------------------------------------------
##  
## Term: epoch 
## 
## Sum of squares and products for the hypothesis:
##           bh        bl         nh
## bh 215.98575  253.8404  -38.87992
## bl 253.84040  697.1541 -105.98413
## nh -38.87992 -105.9841   37.82477
## 
## Multivariate Tests: epoch
##                  Df test stat approx F num Df   den Df     Pr(>F)    
## Pillai            4 0.2371271 3.089746     12 432.0000 0.00032650 ***
## Wilks             4 0.7725898 3.209395     12 375.9882 0.00021143 ***
## Hotelling-Lawley  4 0.2818888 3.304363     12 422.0000 0.00013494 ***
## Roy               4 0.2305907 8.301265      4 144.0000  4.709e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Univariate Follow-up ANCOVAs

summary(mancova_res, univariate = TRUE)

## 
## Type II MANOVA Tests:
## 
## Sum of squares and products for error:
##           bh        bl        nh
## bh 3405.2574  753.9800  412.0258
## bl  753.9800 3505.9237  163.2421
## nh  412.0258  163.2421 1444.4124
## 
## ------------------------------------------
##  
## Term: mb 
## 
## Sum of squares and products for the hypothesis:
##             bh         bl         nh
## bh 0.009292331 0.01997851  0.5075355
## bl 0.019978511 0.04295380  1.0912013
## nh 0.507535500 1.09120132 27.7209546
## 
## Multivariate Tests: mb
##                  Df test stat  approx F num Df den Df  Pr(>F)
## Pillai            1 0.0194107 0.9369578      3    142 0.42463
## Wilks             1 0.9805893 0.9369578      3    142 0.42463
## Hotelling-Lawley  1 0.0197949 0.9369578      3    142 0.42463
## Roy               1 0.0197949 0.9369578      3    142 0.42463
## 
## ------------------------------------------
##  
## Term: epoch 
## 
## Sum of squares and products for the hypothesis:
##           bh        bl         nh
## bh 215.98575  253.8404  -38.87992
## bl 253.84040  697.1541 -105.98413
## nh -38.87992 -105.9841   37.82477
## 
## Multivariate Tests: epoch
##                  Df test stat approx F num Df   den Df     Pr(>F)    
## Pillai            4 0.2371271 3.089746     12 432.0000 0.00032650 ***
## Wilks             4 0.7725898 3.209395     12 375.9882 0.00021143 ***
## Hotelling-Lawley  4 0.2818888 3.304363     12 422.0000 0.00013494 ***
## Roy               4 0.2305907 8.301265      4 144.0000  4.709e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Type II Sums of Squares
##            df         bh         bl       nh
## mb          1 9.2923e-03 4.2954e-02   27.721
## epoch       4 2.1599e+02 6.9715e+02   37.825
## residuals 144 3.4053e+03 3.5059e+03 1444.412
## 
##  F-tests
##         bh    bl   nh
## mb    0.00  0.00 2.76
## epoch 2.28 28.63 0.94
## 
##  p-values
##       bh       bl         nh      
## mb    0.984212 1.000000   0.098605
## epoch 0.063220 3.3665e-07 0.441169

Effect Size

dv_names_mancova <- c("bh", "bl", "nh")

eff_mancova <- map(dv_names_mancova, function(dv) {
  fit <- lm(as.formula(paste(dv, "~ mb + epoch")), data = df)
  ss  <- anova(fit)
  eta <- ss$`Sum Sq` / sum(ss$`Sum Sq`)
  data.frame(
    DV        = dv,
    Term      = trimws(rownames(ss)),
    Eta2      = round(eta, 4)
  )
}) |> list_rbind() |>
  filter(!grepl("Residuals", Term, ignore.case = TRUE))

knitr::kable(eff_mancova,
             caption = "Table 8. Eta-Squared per DV (MANCOVA)",
             row.names = FALSE)

Table 8. Eta-Squared per DV (MANCOVA)
DV	Term	Eta2
bh	mb	0.0038
bh	epoch	0.0594
bl	mb	0.0246
bl	epoch	0.1618
nh	mb	0.0333
nh	epoch	0.0247

Post-hoc Pairwise Comparisons (Bonferroni)

map(dv_names_mancova, function(dv) {
  fit <- lm(as.formula(paste(dv, "~ mb + epoch")), data = df)
  em  <- emmeans(fit, ~ epoch)
  cat(sprintf("\n--- %s ---\n", dv))
  print(pairs(em, adjust = "bonferroni"))
}) |> invisible()

## 
## --- bh ---
##  contrast          estimate   SE  df t.ratio p.value
##  c4000BC - c3300BC    0.902 1.26 144   0.716  1.0000
##  c4000BC - c1850BC   -0.195 1.28 144  -0.151  1.0000
##  c4000BC - c200BC     1.307 1.31 144   1.000  1.0000
##  c4000BC - cAD150     3.275 1.32 144   2.473  0.1458
##  c3300BC - c1850BC   -1.096 1.27 144  -0.864  1.0000
##  c3300BC - c200BC     0.405 1.29 144   0.315  1.0000
##  c3300BC - cAD150     2.373 1.30 144   1.827  0.6982
##  c1850BC - c200BC     1.502 1.26 144   1.193  1.0000
##  c1850BC - cAD150     3.470 1.26 144   2.744  0.0684
##  c200BC - cAD150      1.968 1.26 144   1.566  1.0000
## 
## P value adjustment: bonferroni method for 10 tests 
## 
## --- bl ---
##  contrast          estimate   SE  df t.ratio p.value
##  c4000BC - c3300BC    0.104 1.28 144   0.081  1.0000
##  c4000BC - c1850BC    3.145 1.30 144   2.412  0.1711
##  c4000BC - c200BC     4.649 1.33 144   3.505  0.0061
##  c4000BC - cAD150     5.685 1.34 144   4.230  0.0004
##  c3300BC - c1850BC    3.041 1.29 144   2.362  0.1953
##  c3300BC - c200BC     4.545 1.30 144   3.485  0.0065
##  c3300BC - cAD150     5.581 1.32 144   4.233  0.0004
##  c1850BC - c200BC     1.504 1.28 144   1.177  1.0000
##  c1850BC - cAD150     2.540 1.28 144   1.979  0.4967
##  c200BC - cAD150      1.036 1.28 144   0.812  1.0000
## 
## P value adjustment: bonferroni method for 10 tests 
## 
## --- nh ---
##  contrast          estimate    SE  df t.ratio p.value
##  c4000BC - c3300BC    0.395 0.820 144   0.482  1.0000
##  c4000BC - c1850BC    0.262 0.837 144   0.313  1.0000
##  c4000BC - c200BC    -1.040 0.851 144  -1.222  1.0000
##  c4000BC - cAD150    -0.377 0.863 144  -0.436  1.0000
##  c3300BC - c1850BC   -0.133 0.827 144  -0.162  1.0000
##  c3300BC - c200BC    -1.435 0.837 144  -1.714  0.8863
##  c3300BC - cAD150    -0.772 0.846 144  -0.912  1.0000
##  c1850BC - c200BC    -1.302 0.820 144  -1.588  1.0000
##  c1850BC - cAD150    -0.638 0.824 144  -0.775  1.0000
##  c200BC - cAD150      0.663 0.819 144   0.810  1.0000
## 
## P value adjustment: bonferroni method for 10 tests

Adjusted Means (EMMs)

em_list_mancova <- map(dv_names_mancova, function(dv) {
  fit <- lm(as.formula(paste(dv, "~ mb + epoch")), data = df)
  as.data.frame(emmeans(fit, ~ epoch)) |> mutate(DV = dv)
}) |> list_rbind() |>
  select(DV, epoch, emmean, SE, lower.CL, upper.CL)

knitr::kable(em_list_mancova,
             digits = 3,
             caption = "Table 9. Adjusted Means per Epoch (Controlling for mb)",
             row.names = FALSE)

Table 9. Adjusted Means per Epoch (Controlling for mb)
DV	epoch	emmean	SE	lower.CL	upper.CL
bh	c4000BC	133.605	0.917	131.792	135.417
bh	c3300BC	132.703	0.899	130.926	134.480
bh	c1850BC	133.799	0.889	132.042	135.556
bh	c200BC	132.297	0.898	130.523	134.072
bh	cAD150	130.330	0.909	128.534	132.125
bl	c4000BC	99.176	0.930	97.337	101.015
bl	c3300BC	99.073	0.912	97.270	100.876
bl	c1850BC	96.031	0.902	94.249	97.814
bl	c200BC	94.528	0.911	92.727	96.328
bl	cAD150	93.492	0.922	91.670	95.314
nh	c4000BC	50.781	0.597	49.601	51.962
nh	c3300BC	50.386	0.586	49.229	51.544
nh	c1850BC	50.520	0.579	49.375	51.664
nh	c200BC	51.821	0.585	50.665	52.977
nh	cAD150	51.158	0.592	49.988	52.327

Visualization

Mean Profiles by Epoch

means_long <- df %>%
  group_by(epoch) %>%
  summarise(across(c(mb, bh, bl, nh), mean), .groups = "drop") %>%
  pivot_longer(cols = c(bh, bl, nh), names_to = "variable", values_to = "mean")

ggplot(means_long, aes(x = epoch, y = mean, color = variable,
                       group = variable, linetype = variable)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  scale_color_manual(values = c(bh = "#3266ad", bl = "#1D9E75", nh = "#BA7517")) +
  scale_linetype_manual(values = c(bh = "solid", bl = "dashed", nh = "dotted")) +
  labs(title = "Mean Skull Measurements by Epoch",
       x = NULL, y = "Mean (mm)", color = "Variable", linetype = "Variable") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", axis.text.x = element_text(angle = 20, hjust = 1))

Boxplots per DV by Epoch

df %>%
  pivot_longer(cols = c(bh, bl, nh), names_to = "variable", values_to = "value") %>%
  ggplot(aes(x = epoch, y = value, fill = epoch)) +
  geom_boxplot(alpha = 0.75, outlier.size = 1.5) +
  facet_wrap(~ variable, scales = "free_y", nrow = 1,
             labeller = labeller(variable = c(bh = "bh (basion height)",
                                              bl = "bl (basion length)",
                                              nh = "nh (nasal height)"))) +
  scale_fill_manual(values = epoch_colors) +
  labs(title = "Distribution of DVs by Epoch", x = NULL, y = "mm") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 25, hjust = 1))

Covariate (mb) vs Each DV by Epoch

make_scatter <- function(dv, ylab) {
  ggplot(df, aes(x = mb, y = .data[[dv]], color = epoch)) +
    geom_point(alpha = 0.7, size = 2) +
    geom_smooth(method = "lm", se = FALSE, linewidth = 0.7, linetype = "dashed",
                color = "gray40") +
    scale_color_manual(values = epoch_colors) +
    labs(x = "mb (max breadth)", y = ylab, color = "Epoch") +
    theme_minimal(base_size = 11) +
    theme(legend.position = "right")
}

p3 <- make_scatter("bh", "bh (basion height)")
p4 <- make_scatter("bl", "bl (basion length)")
p5 <- make_scatter("nh", "nh (nasal height)")

ggarrange(p3, p4, p5, nrow = 1, common.legend = TRUE, legend = "bottom") |>
  annotate_figure(top = text_grob("Covariate (mb) vs Dependent Variables by Epoch", size = 13))

Estimated Marginal Means with 95% CI

ggplot(em_list_mancova, aes(x = epoch, y = emmean, color = DV, group = DV,
                             linetype = DV)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, linewidth = 0.7) +
  scale_color_manual(values = c(bh = "#3266ad", bl = "#1D9E75", nh = "#BA7517")) +
  scale_linetype_manual(values = c(bh = "solid", bl = "dashed", nh = "dotted")) +
  labs(title = "Estimated Marginal Means (Adjusted for mb)",
       subtitle = "Error bars = 95% CI",
       x = NULL, y = "Adjusted Mean (mm)", color = "DV", linetype = "DV") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", axis.text.x = element_text(angle = 20, hjust = 1))

Covariate (mb) Mean by Epoch

df %>%
  group_by(epoch) %>%
  summarise(mb_mean = mean(mb), .groups = "drop") %>%
  ggplot(aes(x = epoch, y = mb_mean, fill = epoch)) +
  geom_col(alpha = 0.8, width = 0.6) +
  scale_fill_manual(values = epoch_colors) +
  labs(title = "Covariate (mb) Mean by Epoch", x = NULL, y = "Mean mb (mm)") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 20, hjust = 1))

Multivariate Analysis: MANOVA, ANCOVA, MANCOVA ANALYSIS

Mylovia Mahesa Ayu (24031554001) Syafira Najema Putri Anisa (24031554013) Rizki Piji Fathoni (24031554029)

April 2026

SETUP

DATA OVERVIEW

Load Dataset

Structure and Dimension

Check Missing Value

EDA

Descriptive Statistics

Data Distribution

Outlier Detection

ASSUMPTION TESTING

DV Interdependence

Homogeneity

Normality

Linearity of Covariate vs DV

Independence of Observations

ASSUMPTION SUMMARY

MANOVA ANALYSIS

Multivariate Test Statistics

Univariate Follow-up

Effect Size

Post-hoc Pairwise Comparisons (Bonferroni)

Visualization

ANCOVA ANALYSIS

Model Fitting

ANCOVA Results

Effect Size

Adjusted Means (EMMs)

Visualization of Adjusted Means

Homogeneity of Regression Slopes

MANCOVA ANALYSIS

Define Epoch Colors

Model Fitting

Multivariate Test Statistics

Univariate Follow-up ANCOVAs

Effect Size

Post-hoc Pairwise Comparisons (Bonferroni)

Adjusted Means (EMMs)

Visualization

Mean Profiles by Epoch

Boxplots per DV by Epoch

Covariate (mb) vs Each DV by Epoch

Estimated Marginal Means with 95% CI

Covariate (mb) Mean by Epoch

Mylovia Mahesa Ayu (24031554001)
Syafira Najema Putri Anisa (24031554013)
Rizki Piji Fathoni (24031554029)