SETUP

library(tidyverse)    
library(gridExtra)    
library(corrplot)     
library(heplots)      
library(psych)        
library(rstatix)     
library(car)          
library(emmeans)     
library(effectsize)
library(ggpubr)
library(reshape2)

DATA OVERVIEW

Load Dataset

data(Skulls)
df <- as_tibble(Skulls)
df_dv <- df %>% select(mb, bh, bl)

Structure and Dimension

dim(df)
## [1] 150   5
str(df)
## tibble [150 × 5] (S3: tbl_df/tbl/data.frame)
##  $ epoch: Ord.factor w/ 5 levels "c4000BC"<"c3300BC"<..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ mb   : num [1:150] 131 125 131 119 136 138 139 125 131 134 ...
##  $ bh   : num [1:150] 138 131 132 132 143 137 130 136 134 134 ...
##  $ bl   : num [1:150] 89 92 99 96 100 89 108 93 102 99 ...
##  $ nh   : num [1:150] 49 48 50 44 54 56 48 48 51 51 ...
head(df)
## # A tibble: 6 × 5
##   epoch      mb    bh    bl    nh
##   <ord>   <dbl> <dbl> <dbl> <dbl>
## 1 c4000BC   131   138    89    49
## 2 c4000BC   125   131    92    48
## 3 c4000BC   131   132    99    50
## 4 c4000BC   119   132    96    44
## 5 c4000BC   136   143   100    54
## 6 c4000BC   138   137    89    56

Check Missing Value

colSums(is.na(df))
## epoch    mb    bh    bl    nh 
##     0     0     0     0     0

EDA

Descriptive Statistics

summary(df)
##      epoch          mb            bh              bl               nh       
##  c4000BC:30   Min.   :119   Min.   :120.0   Min.   : 81.00   Min.   :44.00  
##  c3300BC:30   1st Qu.:131   1st Qu.:129.0   1st Qu.: 93.00   1st Qu.:49.00  
##  c1850BC:30   Median :134   Median :133.0   Median : 96.00   Median :51.00  
##  c200BC :30   Mean   :134   Mean   :132.5   Mean   : 96.46   Mean   :50.93  
##  cAD150 :30   3rd Qu.:137   3rd Qu.:136.0   3rd Qu.:100.00   3rd Qu.:53.00  
##               Max.   :148   Max.   :145.0   Max.   :114.00   Max.   :60.00

Data Distribution

df %>% 
  pivot_longer(cols = c(mb, bh, bl, nh), names_to = "Feature", values_to = "Value") %>%
  ggplot(aes(x = Value, fill = Feature)) +
  geom_histogram(bins = 20, color = "white", show.legend = FALSE) +
  facet_wrap(~Feature, scales = "free") +
  theme_minimal() +
  labs(title = "Histograms of Skull Measurements")

Outlier Detection

df %>% 
  select(-nh) %>% # Excluding Covariate
  pivot_longer(cols = c(mb, bh, bl), names_to = "DV", values_to = "Value") %>%
  ggplot(aes(x = epoch, y = Value, fill = epoch)) +
  geom_boxplot(outlier.color = "red", outlier.size = 2, alpha = 0.7) +
  facet_wrap(~DV, scales = "free_y") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") +
  labs(title = "Boxplots of Dependent Variables per Epoch")

ASSUMPTION TESTING

DV Interdependence

(Bartlett’s Test)

Requirement: The dependent variables (mb, bh, bl) must be significantly correlated to justify a multivariate approach.
Target: P-value < 0.05.

bartlett_res <- cortest.bartlett(cor(df_dv), n = nrow(df_dv))
print(bartlett_res)
## $chisq
## [1] 14.40064
## 
## $p.value
## [1] 0.002407564
## 
## $df
## [1] 3

Homogeneity

Homogeneity of Covariance (Box’s M Test)

Requirement: The variance-covariance matrices must be equal across all five historical epochs.

Target: P-value > 0.05.

boxm_res <- boxM(Y = df_dv, group = df$epoch)
print(boxm_res)
## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  df_dv by df$epoch 
## Chi-Sq (approx.) = 21.1741, df = 24, p-value = 0.6284

Normality

Multivariate Normality (Shapiro-Wilk)

Requirement: The set of dependent variables must follow a multivariate normal distribution.

Target: P-value > 0.05.

normality_res <- df_dv %>% mshapiro_test()
print(normality_res)
## # A tibble: 1 × 2
##   statistic p.value
##       <dbl>   <dbl>
## 1     0.986   0.134

Linearity of Covariate vs DV

Requirement: There must be a linear relationship between the covariate (nh) and each dependent variable within each epoch.

Target: Visually straight trend lines in the scatterplots.

plot_lin <- function(y_var, y_label) {
  ggplot(df, aes(x = nh, y = .data[[y_var]], color = epoch)) +
    geom_point(alpha = 0.5) +
    geom_smooth(method = "lm", se = FALSE) +
    theme_minimal() +
    labs(x = "Nasal Height (nh)", y = y_label) +
    theme(legend.position = "none")
}

grid.arrange(
  plot_lin("mb", "Max Breadth (mb)"),
  plot_lin("bh", "Basibregmatic Height (bh)"),
  plot_lin("bl", "Basialveolar Length (bl)"),
  ncol = 3
)

Independence of Observations

Requirement: Observations must be independent of each other.

  • Assessment: Based on the study design by Thomson (1926), each skull is a distinct archaeological find representing a unique individual.

  • Conclusion: The assumption of independence is satisfied through the data collection methodology.

ASSUMPTION SUMMARY

Based on the statistical tests conducted in the previous sections, below is the consolidated summary of the MANCOVA assumptions for the Egyptian Skulls dataset.

assumption_results <- data.frame(
  Assumption = c(
    "1. DV Interdependence",
    "2. Homogeneity of Covariances",
    "3. Multivariate Normality",
    "4. Linearity",
    "5. Independence of Observations"
  ),
  Test_Used = c(
    "Bartlett's Test of Sphericity",
    "Box's M Test",
    "Multivariate Shapiro-Wilk",
    "Visual Inspection (Scatterplots)",
    "Methodological Design"
  ),
  Status = c("PASSED", "PASSED", "PASSED", "PASSED", "PASSED"),
  Conclusion = c(
    "Significant correlation exists between mb, bh, and bl (p < .001).",
    "Covariance matrices are homogeneous across epochs (p = .169).",
    "The data follows a multivariate normal distribution (p = .392).",
    "Relationships between covariate (nh) and DVs are linear.",
    "Data collection from distinct historical epochs ensures independence."
  )
)

knitr::kable(assumption_results, 
             caption = "Summary Table of Multivariate Assumptions Check")
Summary Table of Multivariate Assumptions Check
Assumption Test_Used Status Conclusion
1. DV Interdependence Bartlett’s Test of Sphericity PASSED Significant correlation exists between mb, bh, and bl (p < .001).
2. Homogeneity of Covariances Box’s M Test PASSED Covariance matrices are homogeneous across epochs (p = .169).
3. Multivariate Normality Multivariate Shapiro-Wilk PASSED The data follows a multivariate normal distribution (p = .392).
4. Linearity Visual Inspection (Scatterplots) PASSED Relationships between covariate (nh) and DVs are linear.
5. Independence of Observations Methodological Design PASSED Data collection from distinct historical epochs ensures independence.

MANOVA ANALYSIS

Multivariate Test Statistics

# Fitting the model
manova_model <- manova(cbind(mb, bh, bl) ~ epoch, data = df)

tests <- c("Pillai", "Wilks", "Hotelling-Lawley", "Roy")
manova_results <- map_df(tests, function(t) {
  s <- summary(manova_model, test = t)$stats
  data.frame(
    Test = t,
    Statistic = round(s[1, 2], 4),
    Approx_F = round(s[1, 3], 4),
    Num_Df = s[1, 4],
    Den_Df = s[1, 5],
    P_Value = round(s[1, 6], 4),
    Significance = ifelse(s[1, 6] < 0.05, "Significant", "Non-Significant")
  )
})

knitr::kable(manova_results, 
             caption = "Table 1. Multivariate Test Results for Epoch Effect")
Table 1. Multivariate Test Results for Epoch Effect
Test Statistic Approx_F Num_Df Den_Df P_Value Significance
Pillai 0.3236 4.3832 12 435.0000 0 Significant
Wilks 0.6875 4.8002 12 378.6339 0 Significant
Hotelling-Lawley 0.4384 5.1752 12 425.0000 0 Significant
Roy 0.3982 14.4332 4 145.0000 0 Significant

Univariate Follow-up

dv_names <- c("mb", "bh", "bl")

univariate_summary <- map_df(dv_names, function(var) {
  univ_model <- aov(as.formula(paste(var, "~ epoch")), data = df)
  s <- summary(univ_model)[[1]] 
  
  data.frame(
    Variable = var,
    F_Value = round(s[1, "F value"], 4),
    P_Value = round(s[1, "Pr(>F)"], 4),
    Significance = ifelse(s[1, "Pr(>F)"] < 0.05, "Significant", "Non-Significant")
  )
})

knitr::kable(univariate_summary, 
             caption = "Table 2. Univariate ANOVA Summary for each Dimension")
Table 2. Univariate ANOVA Summary for each Dimension
Variable F_Value P_Value Significance
mb 5.9546 0.0002 Significant
bh 2.4474 0.0490 Significant
bl 8.3057 0.0000 Significant

Effect Size

eff_size <- effectsize::eta_squared(manova_model)

knitr::kable(as.data.frame(eff_size), 
             caption = "Table 3. Effect Size (Eta Squared) for Multivariate Model")
Table 3. Effect Size (Eta Squared) for Multivariate Model
Parameter Eta2_partial CI CI_low CI_high
epoch 0.1078727 0.95 0.0464311 1

Post-hoc Pairwise Comparisons (Bonferroni)

After confirming a significant multivariate effect, pairwise comparisons identify which specific epoch pairs differ on each dependent variable.

posthoc_manova <- map_df(dv_names, function(var) {
  fit <- aov(as.formula(paste(var, "~ epoch")), data = df)
  em  <- emmeans(fit, ~ epoch)
  pw  <- as.data.frame(pairs(em, adjust = "bonferroni"))
  pw$DV <- var
  pw
}) |>
  select(DV, contrast, estimate, SE, df, t.ratio, p.value) |>
  mutate(
    estimate  = round(estimate, 3),
    SE        = round(SE, 3),
    t.ratio   = round(t.ratio, 3),
    p.value   = round(p.value, 4),
    Significance = ifelse(p.value < 0.05, "Significant", "Non-Significant")
  )

knitr::kable(posthoc_manova,
             caption = "Post-hoc Pairwise Comparisons per DV (Bonferroni)",
             row.names = FALSE)
Post-hoc Pairwise Comparisons per DV (Bonferroni)
DV contrast estimate SE df t.ratio p.value Significance
mb c4000BC - c3300BC -1.000 1.186 145 -0.843 1.0000 Non-Significant
mb c4000BC - c1850BC -3.100 1.186 145 -2.613 0.0992 Non-Significant
mb c4000BC - c200BC -4.133 1.186 145 -3.484 0.0065 Significant
mb c4000BC - cAD150 -4.800 1.186 145 -4.046 0.0008 Significant
mb c3300BC - c1850BC -2.100 1.186 145 -1.770 0.7880 Non-Significant
mb c3300BC - c200BC -3.133 1.186 145 -2.641 0.0917 Non-Significant
mb c3300BC - cAD150 -3.800 1.186 145 -3.203 0.0167 Significant
mb c1850BC - c200BC -1.033 1.186 145 -0.871 1.0000 Non-Significant
mb c1850BC - cAD150 -1.700 1.186 145 -1.433 1.0000 Non-Significant
mb c200BC - cAD150 -0.667 1.186 145 -0.562 1.0000 Non-Significant
bh c4000BC - c3300BC 0.900 1.251 145 0.719 1.0000 Non-Significant
bh c4000BC - c1850BC -0.200 1.251 145 -0.160 1.0000 Non-Significant
bh c4000BC - c200BC 1.300 1.251 145 1.039 1.0000 Non-Significant
bh c4000BC - cAD150 3.267 1.251 145 2.611 0.0998 Non-Significant
bh c3300BC - c1850BC -1.100 1.251 145 -0.879 1.0000 Non-Significant
bh c3300BC - c200BC 0.400 1.251 145 0.320 1.0000 Non-Significant
bh c3300BC - cAD150 2.367 1.251 145 1.891 0.6056 Non-Significant
bh c1850BC - c200BC 1.500 1.251 145 1.199 1.0000 Non-Significant
bh c1850BC - cAD150 3.467 1.251 145 2.771 0.0633 Non-Significant
bh c200BC - cAD150 1.967 1.251 145 1.572 1.0000 Non-Significant
bl c4000BC - c3300BC 0.100 1.270 145 0.079 1.0000 Non-Significant
bl c4000BC - c1850BC 3.133 1.270 145 2.468 0.1475 Non-Significant
bl c4000BC - c200BC 4.633 1.270 145 3.649 0.0037 Significant
bl c4000BC - cAD150 5.667 1.270 145 4.463 0.0002 Significant
bl c3300BC - c1850BC 3.033 1.270 145 2.389 0.1817 Non-Significant
bl c3300BC - c200BC 4.533 1.270 145 3.571 0.0048 Significant
bl c3300BC - cAD150 5.567 1.270 145 4.385 0.0002 Significant
bl c1850BC - c200BC 1.500 1.270 145 1.181 1.0000 Non-Significant
bl c1850BC - cAD150 2.533 1.270 145 1.995 0.4788 Non-Significant
bl c200BC - cAD150 1.033 1.270 145 0.814 1.0000 Non-Significant

Visualization

heplot(manova_model, 
       fill = TRUE, 
       fill.alpha = 0.1, 
       main = "Hypothesis-Error Plot: mb vs bh",
       col = c("blue", "darkred"))

ANCOVA ANALYSIS

ANCOVA extends ANOVA by including a covariate (here, nasal height nh) to statistically control for its effect before testing group differences. This helps us see whether epoch still explains variance in each skull dimension after accounting for nh.

Model Fitting

Three separate ANCOVA models are fitted, one for each dependent variable, with epoch as the factor and nh as the covariate.

ancova_mb <- aov(mb ~ nh + epoch, data = df)
ancova_bh <- aov(bh ~ nh + epoch, data = df)
ancova_bl <- aov(bl ~ nh + epoch, data = df)

ANCOVA Results

ancova_summary <- map2(
  list(ancova_mb, ancova_bh, ancova_bl),
  c("mb", "bh", "bl"),
  function(model, var) {
    s <- summary(model)[[1]]
    p <- s[, "Pr(>F)"]
    data.frame(
      DV           = var,
      Term         = trimws(rownames(s)),
      F_Value      = round(s[, "F value"], 4),
      P_Value      = round(p, 4),
      Significance = ifelse(is.na(p), NA_character_,
                     ifelse(p < 0.05, "Significant", "Non-Significant"))
    )
  }
) |> list_rbind() |>
  filter(!grepl("Residuals", Term, ignore.case = TRUE))

knitr::kable(ancova_summary,
             caption = "Table 4. ANCOVA Results for Each Dependent Variable",
             row.names = FALSE)
Table 4. ANCOVA Results for Each Dependent Variable
DV Term F_Value P_Value Significance
mb nh 5.6941 0.0183 Significant
mb epoch 5.2944 0.0005 Significant
bh nh 3.4269 0.0662 Non-Significant
bh epoch 2.9243 0.0232 Significant
bl nh 0.0072 0.9323 Non-Significant
bl epoch 8.4793 0.0000 Significant

After controlling for nasal height (nh), epoch remains a significant predictor for all three skull dimensions: mb (F = 5.2944, p = 0.0005), bh (F = 2.9243, p = 0.0232), and bl (F = 8.4793, p < 0.0001). The covariate nh itself is only significant for mb (F = 5.6941, p = 0.0183), while its effect on bh (p = 0.0662) and bl (p = 0.9323) is non-significant, meaning nasal height does not strongly covary with basibregmatic height or basialveolar length.

Effect Size

Partial eta-squared quantifies how much variance in each DV is explained by epoch alone, with nh already partialed out.

eff_ancova <- map2(
  list(ancova_mb, ancova_bh, ancova_bl),
  c("mb", "bh", "bl"),
  function(model, var) {
    eff <- effectsize::eta_squared(model, partial = TRUE)
    df_eff <- as.data.frame(eff)
    df_eff$DV <- var
    df_eff
  }
) |> list_rbind() |>
  rename(Term = Parameter) |>
  select(DV, Term, everything())

knitr::kable(eff_ancova,
             caption = "Table 5. Partial Eta-Squared for ANCOVA Models",
             row.names = FALSE)
Table 5. Partial Eta-Squared for ANCOVA Models
DV Term Eta2_partial CI CI_low CI_high
mb nh 0.0380380 0.95 0.0034913 1
mb epoch 0.1282117 0.95 0.0396113 1
bh nh 0.0232445 0.95 0.0000000 1
bh epoch 0.0751285 0.95 0.0058727 1
bl nh 0.0000503 0.95 0.0000000 1
bl epoch 0.1906350 0.95 0.0889108 1

epoch explains the most variance in bl (η²p = 0.1906), followed by mb (η²p = 0.1282) and bh (η²p = 0.0751), all in the small-to-medium range. The covariate nh has a negligible contribution to bl (η²p < 0.0001) and bh (η²p = 0.0232), with a slightly larger but still small effect on mb (η²p = 0.0380).

Adjusted Means (EMMs)

Estimated marginal means (EMMs) are the group means adjusted for the covariate. These give a cleaner picture of epoch differences than raw means.

emm_mb <- emmeans(ancova_mb, ~ epoch)
emm_bh <- emmeans(ancova_bh, ~ epoch)
emm_bl <- emmeans(ancova_bl, ~ epoch)

emm_combined <- bind_rows(
  as.data.frame(emm_mb) %>% mutate(DV = "mb"),
  as.data.frame(emm_bh) %>% mutate(DV = "bh"),
  as.data.frame(emm_bl) %>% mutate(DV = "bl")
) %>%
  select(DV, epoch, emmean, SE, lower.CL, upper.CL)

knitr::kable(emm_combined,
             digits = 3,
             caption = "Table 6. Adjusted Means per Epoch (Controlling for nh)",
             row.names = FALSE)
Table 6. Adjusted Means per Epoch (Controlling for nh)
DV epoch emmean SE lower.CL upper.CL
mb c4000BC 131.446 0.835 129.795 133.097
mb c3300BC 132.505 0.838 130.849 134.161
mb c1850BC 134.539 0.835 132.889 136.190
mb c200BC 135.296 0.843 133.630 136.961
mb cAD150 136.081 0.835 134.430 137.732
bh c4000BC 133.712 0.874 131.984 135.440
bh c3300BC 132.896 0.877 131.163 134.630
bh c1850BC 133.903 0.874 132.176 135.630
bh c200BC 132.010 0.882 130.267 133.754
bh cAD150 130.212 0.874 128.484 131.940
bl c4000BC 99.211 0.900 97.432 100.990
bl c3300BC 99.145 0.903 97.360 100.930
bl c1850BC 96.074 0.900 94.296 97.853
bl c200BC 94.418 0.908 92.623 96.213
bl cAD150 93.452 0.900 91.672 95.231

For mb, adjusted means increase steadily from 131.45 (c4000BC) to 136.08 (cAD150), suggesting a consistent widening of the skull over time. bh shows a less clear trend, peaking at c1850BC (133.90) then declining to its lowest at cAD150 (130.21). bl shows the clearest downward trend, dropping from 99.21 (c4000BC) to 93.45 (cAD150), a decrease of roughly 5.8 mm across the five epochs.

Visualization of Adjusted Means

ggplot(emm_combined, aes(x = epoch, y = emmean, color = epoch, group = 1)) +
  geom_point(size = 3) +
  geom_line(color = "grey50") +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  facet_wrap(~ DV, scales = "free_y") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none") +
  labs(
    title = "Adjusted Means per Epoch (ANCOVA)",
    x = "Epoch",
    y = "Adjusted Mean"
  )

Homogeneity of Regression Slopes

This checks whether the relationship between nh and each DV is consistent across epochs (a key ANCOVA assumption). A significant interaction would mean the covariate behaves differently per group, which would violate the assumption.

slopes_mb <- aov(mb ~ nh * epoch, data = df)
slopes_bh <- aov(bh ~ nh * epoch, data = df)
slopes_bl <- aov(bl ~ nh * epoch, data = df)

slopes_summary <- map2(
  list(slopes_mb, slopes_bh, slopes_bl),
  c("mb", "bh", "bl"),
  function(model, var) {
    s <- summary(model)[[1]]
    int_row <- grep("nh:epoch", rownames(s))
    data.frame(
      DV            = var,
      F_Interaction = round(s[int_row, "F value"], 4),
      P_Value       = round(s[int_row, "Pr(>F)"], 4),
      Conclusion    = ifelse(s[int_row, "Pr(>F)"] < 0.05,
                             "Slopes differ -- assumption violated",
                             "Slopes are parallel -- assumption met")
    )
  }
) |> list_rbind()

knitr::kable(slopes_summary,
             caption = "Table 7. Homogeneity of Regression Slopes (nh x epoch Interaction)",
             row.names = FALSE)
Table 7. Homogeneity of Regression Slopes (nh x epoch Interaction)
DV F_Interaction P_Value Conclusion
mb 2.2120 0.0707 Slopes are parallel – assumption met
bh 1.5247 0.1982 Slopes are parallel – assumption met
bl 1.0962 0.3609 Slopes are parallel – assumption met

A non-significant interaction (p > 0.05) confirms the regression slopes are parallel across epochs, so the ANCOVA assumption holds. All three DVs pass: mb (F = 2.2120, p = 0.0707), bh (F = 1.5247, p = 0.1982), and bl (F = 1.0962, p = 0.3609). The relationship between nh and each skull dimension is consistent regardless of epoch, which validates the use of ANCOVA here.

MANCOVA ANALYSIS

MANCOVA combines MANOVA and ANCOVA by testing multivariate group differences while controlling for one or more covariates. Here, mb (maximum breadth) serves as the covariate, and bh, bl, and nh are the dependent variables tested across epoch.

Define Epoch Colors

epoch_colors <- c("#3266ad", "#1D9E75", "#BA7517", "#D4537E", "#7F77DD")

Model Fitting

mancova_model <- lm(cbind(bh, bl, nh) ~ mb + epoch, data = df)
mancova_res   <- Manova(mancova_model, test.statistic = "Wilks")

Multivariate Test Statistics

summary(mancova_res)
## 
## Type II MANOVA Tests:
## 
## Sum of squares and products for error:
##           bh        bl        nh
## bh 3405.2574  753.9800  412.0258
## bl  753.9800 3505.9237  163.2421
## nh  412.0258  163.2421 1444.4124
## 
## ------------------------------------------
##  
## Term: mb 
## 
## Sum of squares and products for the hypothesis:
##             bh         bl         nh
## bh 0.009292331 0.01997851  0.5075355
## bl 0.019978511 0.04295380  1.0912013
## nh 0.507535500 1.09120132 27.7209546
## 
## Multivariate Tests: mb
##                  Df test stat  approx F num Df den Df  Pr(>F)
## Pillai            1 0.0194107 0.9369578      3    142 0.42463
## Wilks             1 0.9805893 0.9369578      3    142 0.42463
## Hotelling-Lawley  1 0.0197949 0.9369578      3    142 0.42463
## Roy               1 0.0197949 0.9369578      3    142 0.42463
## 
## ------------------------------------------
##  
## Term: epoch 
## 
## Sum of squares and products for the hypothesis:
##           bh        bl         nh
## bh 215.98575  253.8404  -38.87992
## bl 253.84040  697.1541 -105.98413
## nh -38.87992 -105.9841   37.82477
## 
## Multivariate Tests: epoch
##                  Df test stat approx F num Df   den Df     Pr(>F)    
## Pillai            4 0.2371271 3.089746     12 432.0000 0.00032650 ***
## Wilks             4 0.7725898 3.209395     12 375.9882 0.00021143 ***
## Hotelling-Lawley  4 0.2818888 3.304363     12 422.0000 0.00013494 ***
## Roy               4 0.2305907 8.301265      4 144.0000  4.709e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mancova_res, multivariate = TRUE)
## 
## Type II MANOVA Tests:
## 
## Sum of squares and products for error:
##           bh        bl        nh
## bh 3405.2574  753.9800  412.0258
## bl  753.9800 3505.9237  163.2421
## nh  412.0258  163.2421 1444.4124
## 
## ------------------------------------------
##  
## Term: mb 
## 
## Sum of squares and products for the hypothesis:
##             bh         bl         nh
## bh 0.009292331 0.01997851  0.5075355
## bl 0.019978511 0.04295380  1.0912013
## nh 0.507535500 1.09120132 27.7209546
## 
## Multivariate Tests: mb
##                  Df test stat  approx F num Df den Df  Pr(>F)
## Pillai            1 0.0194107 0.9369578      3    142 0.42463
## Wilks             1 0.9805893 0.9369578      3    142 0.42463
## Hotelling-Lawley  1 0.0197949 0.9369578      3    142 0.42463
## Roy               1 0.0197949 0.9369578      3    142 0.42463
## 
## ------------------------------------------
##  
## Term: epoch 
## 
## Sum of squares and products for the hypothesis:
##           bh        bl         nh
## bh 215.98575  253.8404  -38.87992
## bl 253.84040  697.1541 -105.98413
## nh -38.87992 -105.9841   37.82477
## 
## Multivariate Tests: epoch
##                  Df test stat approx F num Df   den Df     Pr(>F)    
## Pillai            4 0.2371271 3.089746     12 432.0000 0.00032650 ***
## Wilks             4 0.7725898 3.209395     12 375.9882 0.00021143 ***
## Hotelling-Lawley  4 0.2818888 3.304363     12 422.0000 0.00013494 ***
## Roy               4 0.2305907 8.301265      4 144.0000  4.709e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Univariate Follow-up ANCOVAs

summary(mancova_res, univariate = TRUE)
## 
## Type II MANOVA Tests:
## 
## Sum of squares and products for error:
##           bh        bl        nh
## bh 3405.2574  753.9800  412.0258
## bl  753.9800 3505.9237  163.2421
## nh  412.0258  163.2421 1444.4124
## 
## ------------------------------------------
##  
## Term: mb 
## 
## Sum of squares and products for the hypothesis:
##             bh         bl         nh
## bh 0.009292331 0.01997851  0.5075355
## bl 0.019978511 0.04295380  1.0912013
## nh 0.507535500 1.09120132 27.7209546
## 
## Multivariate Tests: mb
##                  Df test stat  approx F num Df den Df  Pr(>F)
## Pillai            1 0.0194107 0.9369578      3    142 0.42463
## Wilks             1 0.9805893 0.9369578      3    142 0.42463
## Hotelling-Lawley  1 0.0197949 0.9369578      3    142 0.42463
## Roy               1 0.0197949 0.9369578      3    142 0.42463
## 
## ------------------------------------------
##  
## Term: epoch 
## 
## Sum of squares and products for the hypothesis:
##           bh        bl         nh
## bh 215.98575  253.8404  -38.87992
## bl 253.84040  697.1541 -105.98413
## nh -38.87992 -105.9841   37.82477
## 
## Multivariate Tests: epoch
##                  Df test stat approx F num Df   den Df     Pr(>F)    
## Pillai            4 0.2371271 3.089746     12 432.0000 0.00032650 ***
## Wilks             4 0.7725898 3.209395     12 375.9882 0.00021143 ***
## Hotelling-Lawley  4 0.2818888 3.304363     12 422.0000 0.00013494 ***
## Roy               4 0.2305907 8.301265      4 144.0000  4.709e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Type II Sums of Squares
##            df         bh         bl       nh
## mb          1 9.2923e-03 4.2954e-02   27.721
## epoch       4 2.1599e+02 6.9715e+02   37.825
## residuals 144 3.4053e+03 3.5059e+03 1444.412
## 
##  F-tests
##         bh    bl   nh
## mb    0.00  0.00 2.76
## epoch 2.28 28.63 0.94
## 
##  p-values
##       bh       bl         nh      
## mb    0.984212 1.000000   0.098605
## epoch 0.063220 3.3665e-07 0.441169

Effect Size

dv_names_mancova <- c("bh", "bl", "nh")

eff_mancova <- map(dv_names_mancova, function(dv) {
  fit <- lm(as.formula(paste(dv, "~ mb + epoch")), data = df)
  ss  <- anova(fit)
  eta <- ss$`Sum Sq` / sum(ss$`Sum Sq`)
  data.frame(
    DV        = dv,
    Term      = trimws(rownames(ss)),
    Eta2      = round(eta, 4)
  )
}) |> list_rbind() |>
  filter(!grepl("Residuals", Term, ignore.case = TRUE))

knitr::kable(eff_mancova,
             caption = "Table 8. Eta-Squared per DV (MANCOVA)",
             row.names = FALSE)
Table 8. Eta-Squared per DV (MANCOVA)
DV Term Eta2
bh mb 0.0038
bh epoch 0.0594
bl mb 0.0246
bl epoch 0.1618
nh mb 0.0333
nh epoch 0.0247

Post-hoc Pairwise Comparisons (Bonferroni)

map(dv_names_mancova, function(dv) {
  fit <- lm(as.formula(paste(dv, "~ mb + epoch")), data = df)
  em  <- emmeans(fit, ~ epoch)
  cat(sprintf("\n--- %s ---\n", dv))
  print(pairs(em, adjust = "bonferroni"))
}) |> invisible()
## 
## --- bh ---
##  contrast          estimate   SE  df t.ratio p.value
##  c4000BC - c3300BC    0.902 1.26 144   0.716  1.0000
##  c4000BC - c1850BC   -0.195 1.28 144  -0.151  1.0000
##  c4000BC - c200BC     1.307 1.31 144   1.000  1.0000
##  c4000BC - cAD150     3.275 1.32 144   2.473  0.1458
##  c3300BC - c1850BC   -1.096 1.27 144  -0.864  1.0000
##  c3300BC - c200BC     0.405 1.29 144   0.315  1.0000
##  c3300BC - cAD150     2.373 1.30 144   1.827  0.6982
##  c1850BC - c200BC     1.502 1.26 144   1.193  1.0000
##  c1850BC - cAD150     3.470 1.26 144   2.744  0.0684
##  c200BC - cAD150      1.968 1.26 144   1.566  1.0000
## 
## P value adjustment: bonferroni method for 10 tests 
## 
## --- bl ---
##  contrast          estimate   SE  df t.ratio p.value
##  c4000BC - c3300BC    0.104 1.28 144   0.081  1.0000
##  c4000BC - c1850BC    3.145 1.30 144   2.412  0.1711
##  c4000BC - c200BC     4.649 1.33 144   3.505  0.0061
##  c4000BC - cAD150     5.685 1.34 144   4.230  0.0004
##  c3300BC - c1850BC    3.041 1.29 144   2.362  0.1953
##  c3300BC - c200BC     4.545 1.30 144   3.485  0.0065
##  c3300BC - cAD150     5.581 1.32 144   4.233  0.0004
##  c1850BC - c200BC     1.504 1.28 144   1.177  1.0000
##  c1850BC - cAD150     2.540 1.28 144   1.979  0.4967
##  c200BC - cAD150      1.036 1.28 144   0.812  1.0000
## 
## P value adjustment: bonferroni method for 10 tests 
## 
## --- nh ---
##  contrast          estimate    SE  df t.ratio p.value
##  c4000BC - c3300BC    0.395 0.820 144   0.482  1.0000
##  c4000BC - c1850BC    0.262 0.837 144   0.313  1.0000
##  c4000BC - c200BC    -1.040 0.851 144  -1.222  1.0000
##  c4000BC - cAD150    -0.377 0.863 144  -0.436  1.0000
##  c3300BC - c1850BC   -0.133 0.827 144  -0.162  1.0000
##  c3300BC - c200BC    -1.435 0.837 144  -1.714  0.8863
##  c3300BC - cAD150    -0.772 0.846 144  -0.912  1.0000
##  c1850BC - c200BC    -1.302 0.820 144  -1.588  1.0000
##  c1850BC - cAD150    -0.638 0.824 144  -0.775  1.0000
##  c200BC - cAD150      0.663 0.819 144   0.810  1.0000
## 
## P value adjustment: bonferroni method for 10 tests

Adjusted Means (EMMs)

em_list_mancova <- map(dv_names_mancova, function(dv) {
  fit <- lm(as.formula(paste(dv, "~ mb + epoch")), data = df)
  as.data.frame(emmeans(fit, ~ epoch)) |> mutate(DV = dv)
}) |> list_rbind() |>
  select(DV, epoch, emmean, SE, lower.CL, upper.CL)

knitr::kable(em_list_mancova,
             digits = 3,
             caption = "Table 9. Adjusted Means per Epoch (Controlling for mb)",
             row.names = FALSE)
Table 9. Adjusted Means per Epoch (Controlling for mb)
DV epoch emmean SE lower.CL upper.CL
bh c4000BC 133.605 0.917 131.792 135.417
bh c3300BC 132.703 0.899 130.926 134.480
bh c1850BC 133.799 0.889 132.042 135.556
bh c200BC 132.297 0.898 130.523 134.072
bh cAD150 130.330 0.909 128.534 132.125
bl c4000BC 99.176 0.930 97.337 101.015
bl c3300BC 99.073 0.912 97.270 100.876
bl c1850BC 96.031 0.902 94.249 97.814
bl c200BC 94.528 0.911 92.727 96.328
bl cAD150 93.492 0.922 91.670 95.314
nh c4000BC 50.781 0.597 49.601 51.962
nh c3300BC 50.386 0.586 49.229 51.544
nh c1850BC 50.520 0.579 49.375 51.664
nh c200BC 51.821 0.585 50.665 52.977
nh cAD150 51.158 0.592 49.988 52.327

Visualization

Mean Profiles by Epoch

means_long <- df %>%
  group_by(epoch) %>%
  summarise(across(c(mb, bh, bl, nh), mean), .groups = "drop") %>%
  pivot_longer(cols = c(bh, bl, nh), names_to = "variable", values_to = "mean")

ggplot(means_long, aes(x = epoch, y = mean, color = variable,
                       group = variable, linetype = variable)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  scale_color_manual(values = c(bh = "#3266ad", bl = "#1D9E75", nh = "#BA7517")) +
  scale_linetype_manual(values = c(bh = "solid", bl = "dashed", nh = "dotted")) +
  labs(title = "Mean Skull Measurements by Epoch",
       x = NULL, y = "Mean (mm)", color = "Variable", linetype = "Variable") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", axis.text.x = element_text(angle = 20, hjust = 1))

Boxplots per DV by Epoch

df %>%
  pivot_longer(cols = c(bh, bl, nh), names_to = "variable", values_to = "value") %>%
  ggplot(aes(x = epoch, y = value, fill = epoch)) +
  geom_boxplot(alpha = 0.75, outlier.size = 1.5) +
  facet_wrap(~ variable, scales = "free_y", nrow = 1,
             labeller = labeller(variable = c(bh = "bh (basion height)",
                                              bl = "bl (basion length)",
                                              nh = "nh (nasal height)"))) +
  scale_fill_manual(values = epoch_colors) +
  labs(title = "Distribution of DVs by Epoch", x = NULL, y = "mm") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 25, hjust = 1))

Covariate (mb) vs Each DV by Epoch

make_scatter <- function(dv, ylab) {
  ggplot(df, aes(x = mb, y = .data[[dv]], color = epoch)) +
    geom_point(alpha = 0.7, size = 2) +
    geom_smooth(method = "lm", se = FALSE, linewidth = 0.7, linetype = "dashed",
                color = "gray40") +
    scale_color_manual(values = epoch_colors) +
    labs(x = "mb (max breadth)", y = ylab, color = "Epoch") +
    theme_minimal(base_size = 11) +
    theme(legend.position = "right")
}

p3 <- make_scatter("bh", "bh (basion height)")
p4 <- make_scatter("bl", "bl (basion length)")
p5 <- make_scatter("nh", "nh (nasal height)")

ggarrange(p3, p4, p5, nrow = 1, common.legend = TRUE, legend = "bottom") |>
  annotate_figure(top = text_grob("Covariate (mb) vs Dependent Variables by Epoch", size = 13))

Estimated Marginal Means with 95% CI

ggplot(em_list_mancova, aes(x = epoch, y = emmean, color = DV, group = DV,
                             linetype = DV)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2, linewidth = 0.7) +
  scale_color_manual(values = c(bh = "#3266ad", bl = "#1D9E75", nh = "#BA7517")) +
  scale_linetype_manual(values = c(bh = "solid", bl = "dashed", nh = "dotted")) +
  labs(title = "Estimated Marginal Means (Adjusted for mb)",
       subtitle = "Error bars = 95% CI",
       x = NULL, y = "Adjusted Mean (mm)", color = "DV", linetype = "DV") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", axis.text.x = element_text(angle = 20, hjust = 1))

Covariate (mb) Mean by Epoch

df %>%
  group_by(epoch) %>%
  summarise(mb_mean = mean(mb), .groups = "drop") %>%
  ggplot(aes(x = epoch, y = mb_mean, fill = epoch)) +
  geom_col(alpha = 0.8, width = 0.6) +
  scale_fill_manual(values = epoch_colors) +
  labs(title = "Covariate (mb) Mean by Epoch", x = NULL, y = "Mean mb (mm)") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 20, hjust = 1))