MODUL 2 : Analisis MANOVA, ANCOVA, dan MANCOVA

Install Package

required_packages <- c("Stat2Data", "psych", "heplots", "MVN", "ggplot2")
new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])]
if (length(new_packages)) install.packages(new_packages)

library(Stat2Data)
library(psych)
library(heplots)

## Warning: package 'heplots' was built under R version 4.5.3

## Loading required package: broom

library(MVN)

## Warning: package 'MVN' was built under R version 4.5.3

## 
## Attaching package: 'MVN'

## The following object is masked from 'package:psych':
## 
##     mardia

library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

library(emmeans)

## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

library(effectsize)

## 
## Attaching package: 'effectsize'

## The following object is masked from 'package:psych':
## 
##     phi

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Load Dataset

data(BlueJays)
df <- na.omit(BlueJays[, c("KnownSex", "BillWidth", "BillDepth", "BillLength", "Mass")])
head(df, 5)

##   KnownSex BillWidth BillDepth BillLength  Mass
## 1        M      9.21      8.26      25.92 73.30
## 2        M      8.76      8.54      24.99 75.10
## 3        M      8.78      8.39      26.07 70.25
## 4        F      9.30      7.78      23.48 65.50
## 5        M      9.84      8.71      25.47 74.90

Data Structure

cat("Total observations:", nrow(df), "\n")

## Total observations: 123

cat("Group distribution (KnownSex):\n")

## Group distribution (KnownSex):

print(table(df$KnownSex))

## 
##  F  M 
## 60 63

cat("\nGroup means:\n")

## 
## Group means:

print(aggregate(cbind(BillWidth, BillDepth, BillLength, Mass) ~ KnownSex, data = df, FUN = mean))

##   KnownSex BillWidth BillDepth BillLength     Mass
## 1        F  9.118000  8.012333   24.18400 69.80633
## 2        M  9.312698  8.444603   25.46238 73.22508

dvs <- df[, c("BillWidth", "BillDepth", "BillLength")]

Assumptions Test

Assumptions 1 : Linearity of Regression

cat("\nASSUMPTION 1: Bartlett Test of Sphericity (DVs must be intercorrelated)\n")

## 
## ASSUMPTION 1: Bartlett Test of Sphericity (DVs must be intercorrelated)

result_bartlett <- cortest.bartlett(cor(dvs), n = nrow(df))
print(result_bartlett)

## $chisq
## [1] 56.92631
## 
## $p.value
## [1] 2.664665e-12
## 
## $df
## [1] 3

if (result_bartlett$p.value < 0.05) {
  cat("Conclusion: p < .05, correlation matrix is not an identity matrix, DVs are intercorrelated (MET)\n")
} else {
  cat("Conclusion: p > .05, DVs are not sufficiently correlated (NOT MET)\n")
}

## Conclusion: p < .05, correlation matrix is not an identity matrix, DVs are intercorrelated (MET)

Assumptions 2 : Homogeneity of Error Variances

cat("\nASSUMPTION 2: Box's M Test (Homogeneity of covariance matrix)\n")

## 
## ASSUMPTION 2: Box's M Test (Homogeneity of covariance matrix)

result_boxm <- boxM(dvs, df$KnownSex)
print(result_boxm)

## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  dvs by df$KnownSex 
## Chi-Sq (approx.) = 8.5266, df = 6, p-value = 0.202

if (result_boxm$p.value > 0.05) {
  cat("Conclusion: p > .05, covariance matrix are homogeneous across groups (MET)\n")
} else {
  cat("Conclusion: p < .05, covariance matrix are not homogeneous (NOT MET)\n")
}

## Conclusion: p > .05, covariance matrix are homogeneous across groups (MET)

Assumptions 3 : Independence of Error Terms (Durbin-Watson)

cat("\nASSUMPTION 3: Multivariate Normality per group (MVN - Henze-Zirkler)\n")

## 
## ASSUMPTION 3: Multivariate Normality per group (MVN - Henze-Zirkler)

dvs_f <- dvs[df$KnownSex == "F", ]
dvs_m <- dvs[df$KnownSex == "M", ]

cat("Female group (n =", nrow(dvs_f), "):\n")

## Female group (n = 60 ):

result_mvn_f <- hz(dvs_f)
print(result_mvn_f)

##            Test Statistic  p.value     Method
## 1 Henze-Zirkler 0.6593133 0.511166 asymptotic

cat("\nMale group (n =", nrow(dvs_m), "):\n")

## 
## Male group (n = 63 ):

result_mvn_m <- hz(dvs_m)
print(result_mvn_m)

##            Test Statistic   p.value     Method
## 1 Henze-Zirkler 0.5396273 0.8418019 asymptotic

cat("Conclusion: If p > .05 in both groups, multivariate normality is met\n")

## Conclusion: If p > .05 in both groups, multivariate normality is met

Assumptions 4 : Multivariate Normality (Mardia)

cat("\nASSUMPTION 4: Linearity of covariate (Mass) with each DV\n")

## 
## ASSUMPTION 4: Linearity of covariate (Mass) with each DV

lm_bw <- lm(BillWidth  ~ Mass, data = df)
lm_bd <- lm(BillDepth  ~ Mass, data = df)
lm_bl <- lm(BillLength ~ Mass, data = df)

cat("Mass -> BillWidth:\n")

## Mass -> BillWidth:

print(summary(lm_bw)$coefficients)

##               Estimate  Std. Error  t value     Pr(>|t|)
## (Intercept) 6.89720006 0.698373703 9.876088 3.146236e-17
## Mass        0.03242884 0.009738223 3.330057 1.151394e-03

cat("R-squared:", round(summary(lm_bw)$r.squared, 4), "\n")

## R-squared: 0.084

cat("\nMass -> BillDepth:\n")

## 
## Mass -> BillDepth:

print(summary(lm_bd)$coefficients)

##               Estimate  Std. Error   t value     Pr(>|t|)
## (Intercept) 5.46400874 0.470240620 11.619602 2.013375e-21
## Mass        0.03870643 0.006557102  5.902977 3.341872e-08

cat("R-squared:", round(summary(lm_bd)$r.squared, 4), "\n")

## R-squared: 0.2236

cat("\nMass -> BillLength:\n")

## 
## Mass -> BillLength:

print(summary(lm_bl)$coefficients)

##               Estimate Std. Error   t value     Pr(>|t|)
## (Intercept) 17.6059460 1.46669551 12.003818 2.410827e-22
## Mass         0.1010774 0.02045181  4.942221 2.510571e-06

cat("R-squared:", round(summary(lm_bl)$r.squared, 4), "\n")

## R-squared: 0.168

cat("Conclusion: If p(Mass) < .05 in each model, linearity assumption is met\n")

## Conclusion: If p(Mass) < .05 in each model, linearity assumption is met

Assumptions 5 : Homogeneity of Regression Slopes

plot_data <- data.frame(
  Mass  = rep(df$Mass, 3),
  Value = c(df$BillWidth, df$BillDepth, df$BillLength),
  DV    = factor(rep(c("BillWidth", "BillDepth", "BillLength"), each = nrow(df)))
)

plot_data <- na.omit(plot_data)

p <- ggplot(plot_data, aes(x = Mass, y = Value)) +
  geom_point(alpha = 0.5, color = "steelblue") +
  geom_smooth(method = "lm", formula = y ~ x, color = "firebrick", se = TRUE) +
  facet_wrap(~ DV, scales = "free_y") +
  labs(
    title = "Assumption 5: Linearity of Mass (Covariate) with each DV",
    x     = "Mass (grams)",
    y     = "DV value (mm)"
  ) +
  theme_minimal()
print(p)

One Way ANOVA

Question:
Is the average bill length (BillLength) significantly different between male (M) and female (F) Blue Jays, without controlling for covariates?

Variables:

Factor (IV): KnownSex (F vs. M)
Response (DV): BillLength (a continuous variable)

Step 1: Descriptive Statistics + Boxplot per Group

cat("STEP 1: Descriptive Statistics + Boxplot per Group\n\n")

## STEP 1: Descriptive Statistics + Boxplot per Group

desc_anova <- df %>%
  group_by(KnownSex) %>%
  summarise(
    n        = n(),
    Mean     = round(mean(BillLength), 4),
    SD       = round(sd(BillLength), 4),
    SE       = round(sd(BillLength) / sqrt(n()), 4),
    Min      = round(min(BillLength), 4),
    Max      = round(max(BillLength), 4),
    Median   = round(median(BillLength), 4),
    .groups  = "drop"
  )

print(desc_anova)

## # A tibble: 2 × 8
##   KnownSex     n  Mean    SD    SE   Min   Max Median
##   <fct>    <int> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 F           60  24.2 0.965 0.125  22.2  26.3   24.0
## 2 M           63  25.5 1.01  0.127  23.6  30     25.5

grand_mean <- mean(df$BillLength)
cat(sprintf("\nGrand Mean (All BillLength) = %.4f mm\n", grand_mean))

## 
## Grand Mean (All BillLength) = 24.8388 mm

# Boxplot per Group
ggplot(df, aes(x = KnownSex, y = BillLength, fill = KnownSex)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21, outlier.size = 2) +
  geom_jitter(width = 0.15, alpha = 0.4, size = 1.5, color = "gray30") +
  stat_summary(fun = mean, geom = "point", shape = 23,
               size = 4, fill = "white", color = "black") +
  scale_fill_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
  labs(
    title    = "Figure 1. Distribution of BillLength per KnownSex",
    subtitle = "White diamond = group mean; dots = individual observations",
    x        = "KnownSex",
    y        = "BillLength (mm)",
    fill     = "Gender"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

Step 2: Test Ussumptions

Assumption 1 – Normality: Shapiro-Wilk per Group

cat("STEP 2a: Shapiro-Wilk Normality Test per Group\n\n")

## STEP 2a: Shapiro-Wilk Normality Test per Group

groups <- unique(df$KnownSex)
sw_results <- data.frame(Group = character(), W = numeric(),
                         p_value = numeric(), Conclusion = character(),
                         stringsAsFactors = FALSE)

for (g in groups) {
  x   <- df$BillLength[df$KnownSex == g]
  sw  <- shapiro.test(x)
  kes <- ifelse(sw$p.value > 0.05,
                "Normal (MET)", "Un Normal (NOT MET)")
  sw_results <- rbind(sw_results,
                      data.frame(Group      = g,
                                 W          = round(sw$statistic, 5),
                                 p_value    = round(sw$p.value, 5),
                                 Conclusion = kes))
}

print(sw_results)

##    Group       W p_value          Conclusion
## W      M 0.91363 0.00030 Un Normal (NOT MET)
## W1     F 0.97791 0.34651        Normal (MET)

cat("\nCriteria: p > 0.05 → normal distribution per group.\n")

## 
## Criteria: p > 0.05 → normal distribution per group.

par(mfrow = c(1, 2))
for (g in groups) {
  x <- df$BillLength[df$KnownSex == g]
  qqnorm(x, main = paste("QQ-Plot BillLength –", g),
         col = ifelse(g == "F", "#4E9AF1", "#F17A4E"), pch = 16)
  qqline(x, col = "black", lwd = 2)
}

par(mfrow = c(1, 1))

Assumption 2 – Homogeneity of Variance: Levene’s Test

cat("STEP 2b: Uji Homogenity Varians – Levene's Test\n\n")

## STEP 2b: Uji Homogenity Varians – Levene's Test

levene_result <- leveneTest(BillLength ~ KnownSex, data = df, center = mean)
print(levene_result)

## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.6141 0.4348
##       121

p_levene <- levene_result$`Pr(>F)`[1]
cat(sprintf("\np-value Levene = %.5f\n", p_levene))

## 
## p-value Levene = 0.43478

if (p_levene > 0.05) {
  cat("Conclusion: p > 0.05 → Homogeneous variance between groups (MET)\n")
} else {
  cat("Conclusion: p < 0.05 → Variance is NOT homogeneous between groups (NOT MET)\n")
}

## Conclusion: p > 0.05 → Homogeneous variance between groups (MET)

Step 3: Fit Model `aov()`, ANOVA Table, Calculate SS/MS/F Manually

cat("STEP 3: Fit ANOVA Model & Manually Calculate SS/MS/F\n\n")

## STEP 3: Fit ANOVA Model & Manually Calculate SS/MS/F

model_anova <- aov(BillLength ~ KnownSex, data = df)

cat("ANOVA table from aov()\n")

## ANOVA table from aov()

print(summary(model_anova))

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## KnownSex      1  50.22   50.22   51.33 6.66e-11 ***
## Residuals   121 118.39    0.98                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\nManual Calculation of SS/MS/F\n\n")

## 
## Manual Calculation of SS/MS/F

grand_mean_bl <- mean(df$BillLength)
k  <- nlevels(df$KnownSex)      
N  <- nrow(df)                  

SS_between <- sum(
  tapply(df$BillLength, df$KnownSex, function(x)
    length(x) * (mean(x) - grand_mean_bl)^2)
)

SS_within <- sum(
  tapply(df$BillLength, df$KnownSex, function(x)
    sum((x - mean(x))^2))
)

SS_total <- sum((df$BillLength - grand_mean_bl)^2)

# Degrees of freedom
df_between <- k - 1
df_within  <- N - k
df_total   <- N - 1

# MS
MS_between <- SS_between / df_between
MS_within  <- SS_within  / df_within

# F hitung
F_hitung_anova <- MS_between / MS_within

# p-value
p_anova <- pf(F_hitung_anova, df_between, df_within, lower.tail = FALSE)

cat(sprintf("Grand Mean                 = %.4f\n", grand_mean_bl))

## Grand Mean                 = 24.8388

cat(sprintf("k (amount of group)        = %d\n",  k))

## k (amount of group)        = 2

cat(sprintf("N (total observation)      = %d\n\n", N))

## N (total observation)      = 123

cat(sprintf("SS_between (between group ) = %.4f  (df = %d)\n", SS_between, df_between))

## SS_between (between group ) = 50.2235  (df = 1)

cat(sprintf("SS_within  (in group)       = %.4f  (df = %d)\n", SS_within,  df_within))

## SS_within  (in group)       = 118.3926  (df = 121)

cat(sprintf("SS_total                    = %.4f  (df = %d)\n\n", SS_total, df_total))

## SS_total                    = 168.6161  (df = 122)

cat(sprintf("MS_between = SS_between / df_between = %.4f / %d = %.4f\n",
            SS_between, df_between, MS_between))

## MS_between = SS_between / df_between = 50.2235 / 1 = 50.2235

cat(sprintf("MS_within  = SS_within  / df_within  = %.4f / %d = %.4f\n\n",
            SS_within,  df_within,  MS_within))

## MS_within  = SS_within  / df_within  = 118.3926 / 121 = 0.9785

cat(sprintf("F_hitung   = MS_between / MS_within  = %.4f / %.4f = %.4f\n",
            MS_between, MS_within, F_hitung_anova))

## F_hitung   = MS_between / MS_within  = 50.2235 / 0.9785 = 51.3296

cat(sprintf("p-value    = P(F(%.0f, %.0f) > %.4f)  = %.6f\n\n",
            df_between, df_within, F_hitung_anova, p_anova))

## p-value    = P(F(1, 121) > 51.3296)  = 0.000000

tbl_aov <- summary(model_anova)[[1]]
cat("Verifikasi: SS manual vs aov()\n")

## Verifikasi: SS manual vs aov()

cat(sprintf("SS_between  manual = %.4f  |  aov() = %.4f  |  selisih = %.8f\n",
            SS_between,
            tbl_aov["KnownSex", "Sum Sq"],
            abs(SS_between - tbl_aov["KnownSex", "Sum Sq"])))

## SS_between  manual = 50.2235  |  aov() = 50.2235  |  selisih = 0.00000000

cat(sprintf("SS_within   manual = %.4f  |  aov() = %.4f  |  selisih = %.8f\n",
            SS_within,
            tbl_aov["Residuals", "Sum Sq"],
            abs(SS_within - tbl_aov["Residuals", "Sum Sq"])))

## SS_within   manual = 118.3926  |  aov() = 118.3926  |  selisih = 0.00000000

cat(sprintf("F_hitung    manual = %.4f  |  aov() = %.4f  |  selisih = %.8f\n",
            F_hitung_anova,
            tbl_aov["KnownSex", "F value"],
            abs(F_hitung_anova - tbl_aov["KnownSex", "F value"])))

## F_hitung    manual = 51.3296  |  aov() = 51.3296  |  selisih = 0.00000000

Step 4: Effect Size η² (Manual + `effectsize`)

cat("STEP 4: Effect Size η² (Eta-Squared)\n\n")

## STEP 4: Effect Size η² (Eta-Squared)

eta2_manual <- SS_between / SS_total

cat("Formula: η² = SS_between / SS_total\n")

## Formula: η² = SS_between / SS_total

cat(sprintf("η² (manual)  = %.4f / %.4f = %.4f\n\n",
            SS_between, SS_total, eta2_manual))

## η² (manual)  = 50.2235 / 168.6161 = 0.2979

eta2_pkg <- eta_squared(model_anova, partial = FALSE)
cat("η² dari package effectsize\n")

## η² dari package effectsize

print(eta2_pkg)

## # Effect Size for ANOVA (Type I)
## 
## Parameter | Eta2 |       95% CI
## -------------------------------
## KnownSex  | 0.30 | [0.19, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

cat("\n Interpretasi η²\n")

## 
##  Interpretasi η²

cat("  η² < 0.01  = small\n")

##   η² < 0.01  = small

cat("  η² < 0.06  = medium\n")

##   η² < 0.06  = medium

cat("  η² >= 0.14 = big\n")

##   η² >= 0.14 = big

eta2_val <- eta2_pkg$Eta2[1]
interp   <- ifelse(eta2_val >= 0.14, "large",
             ifelse(eta2_val >= 0.06, "medium",
              ifelse(eta2_val >= 0.01, "small",
                     "very small (negligible)")))
cat(sprintf("\nη² = %.4f = Ukuran efek: %s\n", eta2_val, interp))

## 
## η² = 0.2979 = Ukuran efek: large

Step 5: Conclusion + EMM Visualization

cat("STEP 5: Conclusion + EMM Visualization\n\n")

## STEP 5: Conclusion + EMM Visualization

cat(sprintf("F_count (%d, %d) = %.4f\n", df_between, df_within, F_hitung_anova))

## F_count (1, 121) = 51.3296

cat(sprintf("p-value           = %.6f\n", p_anova))

## p-value           = 0.000000

cat(sprintf("η²                = %.4f  (%s)\n\n", eta2_val, interp))

## η²                = 0.2979  (large)

if (p_anova < 0.05) {
  cat("Decision : REJECT H0 (p < 0.05)\n")
} else {
  cat("Decision : FAIL TO REJECT H0 (p ≥ 0.05)\n")
}

## Decision : REJECT H0 (p < 0.05)

emm_anova <- emmeans(model_anova, ~ KnownSex)
cat("\n Estimated Marginal Means ANOVA \n")

## 
##  Estimated Marginal Means ANOVA

print(emm_anova)

##  KnownSex emmean    SE  df lower.CL upper.CL
##  F          24.2 0.128 121     23.9     24.4
##  M          25.5 0.125 121     25.2     25.7
## 
## Confidence level used: 0.95

emm_df <- as.data.frame(emm_anova)

# Visualization 1: EMM Plot
ggplot(emm_df, aes(x = KnownSex, y = emmean, color = KnownSex)) +
  geom_point(size = 5) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
                width = 0.15, linewidth = 1.2) +
  scale_color_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
  labs(
    title    = "Figure 2. Estimated Marginal Means BillLength per KnownSex",
    subtitle = "Error bar = 95% Confidence Interval",
    x        = "KnownSex (Jenis Kelamin)",
    y        = "EMM BillLength (mm)",
    color    = "Gender"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

# Visualization 2: Boxplot + EMM overlay 
ggplot(df, aes(x = KnownSex, y = BillLength, fill = KnownSex)) +
  geom_boxplot(alpha = 0.4, outlier.shape = NA) +
  geom_jitter(aes(color = KnownSex), width = 0.15,
              alpha = 0.5, size = 1.8) +
  geom_point(data = emm_df,
             aes(x = KnownSex, y = emmean),
             size = 5, shape = 23,
             fill = "white", color = "black", stroke = 1.5) +
  geom_errorbar(data = emm_df,
                aes(x = KnownSex, y = emmean,
                    ymin = lower.CL, ymax = upper.CL),
                width = 0.12, linewidth = 1.2, color = "black",
                inherit.aes = FALSE) +
  scale_fill_manual(values  = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
  scale_color_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
  labs(
    title    = "Figure 3. Distribusi BillLength + EMM per KnownSex",
    subtitle = "Berlian hitam = EMM; error bar = 95% CI",
    x        = "KnownSex (Jenis Kelamin)",
    y        = "BillLength (mm)"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

One Way MANOVA

Question:
Are the mean vectors of bill measurements (BillWidth, BillDepth, BillLength) simultaneously different between male (M) and female (F) Blue Jays, without controlling for any covariate?

Variables:

Factor (IV): KnownSex (F vs. M)
Response (DVs): BillWidth, BillDepth, BillLength (3 continuous variables)

Step 1: Descriptive Statistics per Group

cat("STEP 1: Descriptive Statistics per Group (3 DVs)\n\n")

## STEP 1: Descriptive Statistics per Group (3 DVs)

dv_names_manova <- c("BillWidth", "BillDepth", "BillLength")

for (dv in dv_names_manova) {
  desc <- df %>%
    group_by(KnownSex) %>%
    summarise(
      n      = n(),
      Mean   = round(mean(.data[[dv]]), 4),
      SD     = round(sd(.data[[dv]]),   4),
      SE     = round(sd(.data[[dv]]) / sqrt(n()), 4),
      Min    = round(min(.data[[dv]]), 4),
      Max    = round(max(.data[[dv]]), 4),
      Median = round(median(.data[[dv]]), 4),
      .groups = "drop"
    )
  cat(sprintf("--- %s ---\n", dv))
  print(desc)
  grand_m <- mean(df[[dv]])
  cat(sprintf("Grand Mean = %.4f mm\n\n", grand_m))
}

## --- BillWidth ---
## # A tibble: 2 × 8
##   KnownSex     n  Mean    SD     SE   Min   Max Median
##   <fct>    <int> <dbl> <dbl>  <dbl> <dbl> <dbl>  <dbl>
## 1 F           60  9.12 0.562 0.0726   8    10.8   9.18
## 2 M           63  9.31 0.490 0.0617   8.1  10.3   9.28
## Grand Mean = 9.2177 mm
## 
## --- BillDepth ---
## # A tibble: 2 × 8
##   KnownSex     n  Mean    SD     SE   Min   Max Median
##   <fct>    <int> <dbl> <dbl>  <dbl> <dbl> <dbl>  <dbl>
## 1 F           60  8.01 0.372 0.048   7.21   8.8   8.01
## 2 M           63  8.44 0.274 0.0346  7.89   9     8.43
## Grand Mean = 8.2337 mm
## 
## --- BillLength ---
## # A tibble: 2 × 8
##   KnownSex     n  Mean    SD    SE   Min   Max Median
##   <fct>    <int> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 F           60  24.2 0.965 0.125  22.2  26.3   24.0
## 2 M           63  25.5 1.01  0.127  23.6  30     25.5
## Grand Mean = 24.8388 mm

# Side-by-side boxplots for all 3 DVs
plot_data_manova <- data.frame(
  Sex   = rep(df$KnownSex, 3),
  Value = c(df$BillWidth, df$BillDepth, df$BillLength),
  DV    = factor(rep(dv_names_manova, each = nrow(df)),
                 levels = dv_names_manova)
)

ggplot(plot_data_manova, aes(x = Sex, y = Value, fill = Sex)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21, outlier.size = 2) +
  geom_jitter(width = 0.15, alpha = 0.35, size = 1.2, color = "gray30") +
  stat_summary(fun = mean, geom = "point", shape = 23,
               size = 3.5, fill = "white", color = "black") +
  facet_wrap(~ DV, scales = "free_y") +
  scale_fill_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
  labs(
    title    = "Figure A. Distribution of 3 DVs per KnownSex (MANOVA)",
    subtitle = "White diamond = group mean",
    x        = "KnownSex",
    y        = "Value (mm)",
    fill     = "Gender"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

Step 2: Assumption Tests

Assumption 1 – Multivariate Normality: Henze-Zirkler per Group

cat("STEP 2a: Multivariate Normality – Henze-Zirkler Test per Group\n\n")

## STEP 2a: Multivariate Normality – Henze-Zirkler Test per Group

dvs_manova <- df[, dv_names_manova]
dvs_F <- dvs_manova[df$KnownSex == "F", ]
dvs_M <- dvs_manova[df$KnownSex == "M", ]

cat(sprintf("Female group (n = %d):\n", nrow(dvs_F)))

## Female group (n = 60):

result_mvn_F <- hz(dvs_F)
print(result_mvn_F)

##            Test Statistic  p.value     Method
## 1 Henze-Zirkler 0.6593133 0.511166 asymptotic

cat(sprintf("\nMale group (n = %d):\n", nrow(dvs_M)))

## 
## Male group (n = 63):

result_mvn_M <- hz(dvs_M)
print(result_mvn_M)

##            Test Statistic   p.value     Method
## 1 Henze-Zirkler 0.5396273 0.8418019 asymptotic

cat("\nCriteria: p > 0.05 in BOTH groups → multivariate normality is met (MET)\n")

## 
## Criteria: p > 0.05 in BOTH groups → multivariate normality is met (MET)

Assumption 2 – Homogeneity of Covariance Matrices: Box’s M Test

cat("STEP 2b: Homogeneity of Covariance Matrices – Box's M Test\n\n")

## STEP 2b: Homogeneity of Covariance Matrices – Box's M Test

boxm_manova <- boxM(dvs_manova, df$KnownSex)
print(boxm_manova)

## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  dvs_manova by df$KnownSex 
## Chi-Sq (approx.) = 8.5266, df = 6, p-value = 0.202

if (boxm_manova$p.value > 0.05) {
  cat("Conclusion: p > 0.05 → Covariance matrices are homogeneous across groups (MET)\n")
} else {
  cat("Conclusion: p < 0.05 → Covariance matrices are NOT homogeneous (NOT MET)\n")
  cat("Note: Box's M is sensitive to normality violations; proceed with caution.\n")
}

## Conclusion: p > 0.05 → Covariance matrices are homogeneous across groups (MET)

Step 3: Fit MANOVA Model & SSCP Matrices

cat("STEP 3: Fit MANOVA Model\n")

## STEP 3: Fit MANOVA Model

cat("Model: cbind(BillWidth, BillDepth, BillLength) ~ KnownSex\n\n")

## Model: cbind(BillWidth, BillDepth, BillLength) ~ KnownSex

dv_matrix_manova <- as.matrix(df[, dv_names_manova])
model_manova      <- manova(dv_matrix_manova ~ KnownSex, data = df)

cat("--- MANOVA Summary (Wilks' Lambda) ---\n")

## --- MANOVA Summary (Wilks' Lambda) ---

print(summary(model_manova, test = "Wilks"))

##            Df  Wilks approx F num Df den Df    Pr(>F)    
## KnownSex    1 0.6085   25.521      3    119 8.083e-13 ***
## Residuals 121                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- MANOVA Summary (Pillai) ---\n")

## 
## --- MANOVA Summary (Pillai) ---

print(summary(model_manova, test = "Pillai"))

##            Df Pillai approx F num Df den Df    Pr(>F)    
## KnownSex    1 0.3915   25.521      3    119 8.083e-13 ***
## Residuals 121                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- MANOVA Summary (Hotelling-Lawley) ---\n")

## 
## --- MANOVA Summary (Hotelling-Lawley) ---

print(summary(model_manova, test = "Hotelling-Lawley"))

##            Df Hotelling-Lawley approx F num Df den Df    Pr(>F)    
## KnownSex    1           0.6434   25.521      3    119 8.083e-13 ***
## Residuals 121                                                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- MANOVA Summary (Roy) ---\n")

## 
## --- MANOVA Summary (Roy) ---

print(summary(model_manova, test = "Roy"))

##            Df    Roy approx F num Df den Df    Pr(>F)    
## KnownSex    1 0.6434   25.521      3    119 8.083e-13 ***
## Residuals 121                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Step 4: Wilks’ Lambda Manual Calculation

cat("STEP 4: Wilks' Lambda – Manual Calculation\n")

## STEP 4: Wilks' Lambda – Manual Calculation

cat("Formula: Λ = |E| / |E + H|\n\n")

## Formula: Λ = |E| / |E + H|

# --- Grand mean vector ---
grand_mean_vec <- colMeans(dv_matrix_manova)
N_m  <- nrow(df)
k_m  <- nlevels(df$KnownSex)    # = 2

# --- Total SSCP (T) ---
T_mat <- t(dv_matrix_manova - matrix(grand_mean_vec, nrow = N_m, ncol = 3, byrow = TRUE)) %*%
              (dv_matrix_manova - matrix(grand_mean_vec, nrow = N_m, ncol = 3, byrow = TRUE))

# --- Within-group (Error) SSCP (E) ---
E_mat <- matrix(0, 3, 3)
for (g in levels(df$KnownSex)) {
  X_g <- dv_matrix_manova[df$KnownSex == g, ]
  mu_g <- colMeans(X_g)
  diff_g <- X_g - matrix(mu_g, nrow = nrow(X_g), ncol = 3, byrow = TRUE)
  E_mat <- E_mat + t(diff_g) %*% diff_g
}
colnames(E_mat) <- rownames(E_mat) <- dv_names_manova

# --- Between-group (Hypothesis) SSCP (H) ---
H_mat <- T_mat - E_mat
colnames(H_mat) <- rownames(H_mat) <- dv_names_manova

df_H <- k_m - 1           # between-group df
df_E <- N_m - k_m         # within-group df

cat("--- Within-group SSCP Matrix (E) | df_E =", df_E, "---\n")

## --- Within-group SSCP Matrix (E) | df_E = 121 ---

print(round(E_mat, 4))

##            BillWidth BillDepth BillLength
## BillWidth    33.5584    4.2735    15.0361
## BillDepth     4.2735   12.8318    13.9195
## BillLength   15.0361   13.9195   118.3926

cat("\n--- Between-group SSCP Matrix (H) | df_H =", df_H, "---\n")

## 
## --- Between-group SSCP Matrix (H) | df_H = 1 ---

print(round(H_mat, 4))

##            BillWidth BillDepth BillLength
## BillWidth     1.1650    2.5864     7.6491
## BillDepth     2.5864    5.7424    16.9825
## BillLength    7.6491   16.9825    50.2235

# --- Wilks' Lambda ---
det_E       <- det(E_mat)
det_EplusH  <- det(E_mat + H_mat)
lambda_man  <- det_E / det_EplusH

cat(sprintf("\n|E|         = %.6f\n", det_E))

## 
## |E|         = 41205.269858

cat(sprintf("|E + H|     = %.6f\n", det_EplusH))

## |E + H|     = 67716.639330

cat(sprintf("Λ (Wilks')  = %.6f\n\n", lambda_man))

## Λ (Wilks')  = 0.608495

# --- F approximation (p = 3 DVs, v_H = k-1 = 1, exact formula) ---
p_dv   <- 3
v_H    <- df_H
v_E    <- df_E

F_man  <- ((1 - lambda_man) / lambda_man) * ((v_E - p_dv + v_H) / p_dv)
df1_man <- p_dv
df2_man <- v_E - p_dv + v_H
p_man   <- pf(F_man, df1_man, df2_man, lower.tail = FALSE)

cat("--- F-Approximation of Wilks' Λ ---\n")

## --- F-Approximation of Wilks' Λ ---

cat(sprintf("p  (# DVs)   = %d\n",    p_dv))

## p  (# DVs)   = 3

cat(sprintf("v_H (df_H)   = %d\n",    v_H))

## v_H (df_H)   = 1

cat(sprintf("v_E (df_E)   = %d\n",    v_E))

## v_E (df_E)   = 121

cat(sprintf("F_hitung     = %.4f  (df1 = %d, df2 = %d)\n", F_man, df1_man, df2_man))

## F_hitung     = 25.5214  (df1 = 3, df2 = 119)

cat(sprintf("p-value      = %.6f\n\n", p_man))

## p-value      = 0.000000

Step 5: Effect Size – η² Multivariate

cat("STEP 5: Effect Size – Eta-Squared (η²) from SSCP Matrices\n\n")

## STEP 5: Effect Size – Eta-Squared (η²) from SSCP Matrices

# Pillai's trace as effect size
pillai_res   <- summary(model_manova, test = "Pillai")$stats
pillai_val   <- pillai_res["KnownSex", "Pillai"]
cat(sprintf("Pillai's Trace (V) = %.4f\n", pillai_val))

## Pillai's Trace (V) = 0.3915

# Multivariate η² from Wilks' Λ
eta2_mv <- 1 - lambda_man
cat(sprintf("η²_mv = 1 − Λ = 1 − %.6f = %.4f\n\n", lambda_man, eta2_mv))

## η²_mv = 1 − Λ = 1 − 0.608495 = 0.3915

interp_mv <- ifelse(eta2_mv >= 0.14, "large",
              ifelse(eta2_mv >= 0.06, "medium",
               ifelse(eta2_mv >= 0.01, "small",
                      "very small (negligible)")))
cat(sprintf("Interpretation: η²_mv = %.4f → effect size is %s\n", eta2_mv, interp_mv))

## Interpretation: η²_mv = 0.3915 → effect size is large

cat("\nReference:\n  η² < 0.01 = small\n  η² < 0.06 = medium\n  η² ≥ 0.14 = large\n")

## 
## Reference:
##   η² < 0.01 = small
##   η² < 0.06 = medium
##   η² ≥ 0.14 = large

Step 6: HE Plot – Multivariate Visualization

cat("STEP 6: HE Plot – Hypothesis vs Error ellipsoids\n\n")

## STEP 6: HE Plot – Hypothesis vs Error ellipsoids

heplot(model_manova,
       variables = c(1, 3),   # BillWidth vs BillLength
       main      = "Figure B. HE Plot MANOVA: BillWidth vs BillLength by KnownSex",
       col       = c("blue", "red"),
       lwd       = 2)

heplot(model_manova,
       variables = c(1, 2),   # BillWidth vs BillDepth
       main      = "Figure C. HE Plot MANOVA: BillWidth vs BillDepth by KnownSex",
       col       = c("blue", "red"),
       lwd       = 2)

Step 7: Follow-up Univariate ANOVAs (Post-hoc per DV)

cat("STEP 7: Follow-Up Univariate ANOVAs per DV\n")

## STEP 7: Follow-Up Univariate ANOVAs per DV

cat("(Performed because MANOVA is significant)\n")

## (Performed because MANOVA is significant)

cat(sprintf("Bonferroni Correction: α* = 0.05 / 3 = %.4f\n\n", 0.05 / 3))

## Bonferroni Correction: α* = 0.05 / 3 = 0.0167

alpha_bonf_mv <- 0.05 / 3

cat("--- Univariate ANOVA Summary per DV ---\n")

## --- Univariate ANOVA Summary per DV ---

print(summary.aov(model_manova))

##  Response BillWidth :
##              Df Sum Sq Mean Sq F value  Pr(>F)  
## KnownSex      1  1.165 1.16496  4.2004 0.04257 *
## Residuals   121 33.558 0.27734                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response BillDepth :
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## KnownSex      1  5.7424  5.7424  54.149 2.451e-11 ***
## Residuals   121 12.8318  0.1060                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response BillLength :
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## KnownSex      1  50.224  50.224   51.33 6.659e-11 ***
## Residuals   121 118.393   0.978                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

results_fu_mv <- data.frame(
  DV         = character(),
  F_hitung   = numeric(),
  p_value    = numeric(),
  Signifikan = character(),
  eta2       = numeric(),
  stringsAsFactors = FALSE
)

for (dv in dv_names_manova) {
  m_uni <- aov(as.formula(paste(dv, "~ KnownSex")), data = df)
  tbl   <- summary(m_uni)[[1]]
  f_u   <- tbl["KnownSex", "F value"]
  p_u   <- tbl["KnownSex", "Pr(>F)"]
  ss_b  <- tbl["KnownSex", "Sum Sq"]
  ss_w  <- tbl["Residuals", "Sum Sq"]
  eta2_u <- ss_b / (ss_b + ss_w)
  sig_u  <- ifelse(p_u < alpha_bonf_mv, "SIGNIFICANT *", "not significant")

  results_fu_mv <- rbind(results_fu_mv,
                         data.frame(DV = dv,
                                    F_hitung   = round(f_u, 4),
                                    p_value    = round(p_u, 6),
                                    Signifikan = sig_u,
                                    eta2       = round(eta2_u, 4)))
}
print(results_fu_mv)

##           DV F_hitung  p_value      Signifikan   eta2
## 1  BillWidth   4.2004 0.042575 not significant 0.0335
## 2  BillDepth  54.1493 0.000000   SIGNIFICANT * 0.3092
## 3 BillLength  51.3296 0.000000   SIGNIFICANT * 0.2979

cat("\nSignificant DVs (p < α*) drive the multivariate group difference.\n")

## 
## Significant DVs (p < α*) drive the multivariate group difference.

Step 8: Estimated Marginal Means per DV

cat("STEP 8: Estimated Marginal Means per DV\n\n")

## STEP 8: Estimated Marginal Means per DV

for (dv in dv_names_manova) {
  m_emm_mv <- lm(as.formula(paste(dv, "~ KnownSex")), data = df)
  emm_mv   <- emmeans(m_emm_mv, ~ KnownSex)
  cat(sprintf("--- %s ---\n", dv))
  print(emm_mv)
  cat("\n")
}

## --- BillWidth ---
##  KnownSex emmean     SE  df lower.CL upper.CL
##  F          9.12 0.0680 121     8.98     9.25
##  M          9.31 0.0663 121     9.18     9.44
## 
## Confidence level used: 0.95 
## 
## --- BillDepth ---
##  KnownSex emmean    SE  df lower.CL upper.CL
##  F          8.01 0.042 121     7.93     8.10
##  M          8.44 0.041 121     8.36     8.53
## 
## Confidence level used: 0.95 
## 
## --- BillLength ---
##  KnownSex emmean    SE  df lower.CL upper.CL
##  F          24.2 0.128 121     23.9     24.4
##  M          25.5 0.125 121     25.2     25.7
## 
## Confidence level used: 0.95

Step 9: MANOVA Conclusion

cat("ONE-WAY MANOVA Conclusion\n\n")

## ONE-WAY MANOVA Conclusion

cat(sprintf("Wilks' Λ     = %.5f\n",                   lambda_man))

## Wilks' Λ     = 0.60850

cat(sprintf("F_calculated = %.4f (df1 = %d, df2 = %d)\n", F_man, df1_man, df2_man))

## F_calculated = 25.5214 (df1 = 3, df2 = 119)

cat(sprintf("p-value      = %.6f\n\n",                  p_man))

## p-value      = 0.000000

cat(sprintf("η²_mv        = %.4f (%s effect)\n\n",      eta2_mv, interp_mv))

## η²_mv        = 0.3915 (large effect)

if (p_man < 0.05) {
  cat("Decision: REJECT H0 (p < 0.05)\n\n")
  cat("Conclusion: There is a statistically significant multivariate difference\n")
  cat("in the combined mean vector of (BillWidth, BillDepth, BillLength)\n")
  cat("between male (M) and female (F) Blue Jays, WITHOUT controlling for\n")
  cat("any covariate (Wilks' Λ = ", round(lambda_man, 5), ", p < .05).\n")
  cat("Follow-up univariate ANOVAs (Bonferroni-corrected α* = 0.0167) reveal\n")
  cat("which specific DVs contribute to the group separation.\n")
} else {
  cat("Decision: FAIL TO REJECT H0 (p ≥ 0.05)\n\n")
  cat("Conclusion: There is NO statistically significant multivariate difference\n")
  cat("in the combined mean vector of (BillWidth, BillDepth, BillLength)\n")
  cat("between male and female Blue Jays (Wilks' Λ = ", round(lambda_man, 5), ", p ≥ .05).\n")
}

## Decision: REJECT H0 (p < 0.05)
## 
## Conclusion: There is a statistically significant multivariate difference
## in the combined mean vector of (BillWidth, BillDepth, BillLength)
## between male (M) and female (F) Blue Jays, WITHOUT controlling for
## any covariate (Wilks' Λ =  0.6085 , p < .05).
## Follow-up univariate ANOVAs (Bonferroni-corrected α* = 0.0167) reveal
## which specific DVs contribute to the group separation.

One Way ANCOVA

Question: After controlling for Mass (body weight) as a covariate, does the average BillLength still differ between males and females?

Variables: - Factor (IV): Known Sex (F vs. M) - Covariate (CV): Mass - Response (DV): Bill Length (one continuous variable)

Step 1 : Homogenity of Regression Slopes

cat("=== ANCOVA Assumptions : Homogenity of Regression Slopes\n")

## === ANCOVA Assumptions : Homogenity of Regression Slopes

cat("Test: Is the slope of the BillLength ~ Mass regression the same in both groups?\n")

## Test: Is the slope of the BillLength ~ Mass regression the same in both groups?

cat("Interaction Models : BillLength ~ Mass * KnownSex\n\n")

## Interaction Models : BillLength ~ Mass * KnownSex

model_interaction <- aov(BillLength ~ Mass * KnownSex, data = df)
print(summary(model_interaction))

##                Df Sum Sq Mean Sq F value   Pr(>F)    
## Mass            1  28.32  28.321  30.782 1.77e-07 ***
## KnownSex        1  30.72  30.722  33.393 6.16e-08 ***
## Mass:KnownSex   1   0.09   0.089   0.097    0.756    
## Residuals     119 109.48   0.920                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

p_interaction <- summary(model_interaction)[[1]]["Mass:KnownSex", "Pr(>F)"]
cat(sprintf("\np-value interaction (Mass:KnownSex) = %.4f\n", p_interaction))

## 
## p-value interaction (Mass:KnownSex) = 0.7564

if (p_interaction > 0.05) {
  cat("Conclusion: p > 0.05 --> Slope is homogeneous. ANCOVA can be continued.\n\n")
} else {
  cat("Conclusion: p < 0.05 --> Slope is NOT homogeneous.\n\n")
}

## Conclusion: p > 0.05 --> Slope is homogeneous. ANCOVA can be continued.

ggplot(df, aes(x = Mass, y = BillLength, color = KnownSex)) +
  geom_point(alpha = 0.6, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.2) +
  scale_color_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
  labs(
    title    = "Figure 4. Homogeneity of Slopes Test : BillLength vs Mass per KnownSex",
    subtitle = "Paralel Line = slopes homogen",
    x        = "Mass — Covariate (gram)",
    y        = "BillLength — DV (mm)",
    color    = "Gender"
  ) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

Step 2 : Fit Model ANCOVA

cat("Fit Model ANCOVA\n")

## Fit Model ANCOVA

cat("Model: BillLength ~ Mass + KnownSex\n")

## Model: BillLength ~ Mass + KnownSex

model_ancova <- aov(BillLength ~ Mass + KnownSex, data = df)
print(summary(model_ancova))

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Mass          1  28.32  28.321   31.02 1.59e-07 ***
## KnownSex      1  30.72  30.722   33.65 5.48e-08 ***
## Residuals   120 109.57   0.913                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Step 3 : Tabel ANCOVA Adjusted

cat("Table ANCOVA\n")

## Table ANCOVA

model_ancova_lm <- lm(BillLength ~ Mass + KnownSex, data = df,
                      contrasts = list(KnownSex = contr.sum))
tabel_ancova <- Anova(model_ancova_lm, type = "III")
print(tabel_ancova)

## Anova Table (Type III tests)
## 
## Response: BillLength
##              Sum Sq  Df  F value    Pr(>F)    
## (Intercept) 197.489   1 216.2822 < 2.2e-16 ***
## Mass          8.819   1   9.6587  0.002353 ** 
## KnownSex     30.722   1  33.6459 5.482e-08 ***
## Residuals   109.573 120                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

SS_cov    <- tabel_ancova["Mass", "Sum Sq"]
SS_factor <- tabel_ancova["KnownSex", "Sum Sq"]
SS_error  <- tabel_ancova["Residuals", "Sum Sq"]
df_cov    <- tabel_ancova["Mass", "Df"]
df_factor <- tabel_ancova["KnownSex", "Df"]
df_error  <- tabel_ancova["Residuals", "Df"]
MS_factor <- SS_factor / df_factor
MS_error  <- SS_error  / df_error
F_ancova  <- MS_factor / MS_error
p_ancova  <- tabel_ancova["KnownSex", "Pr(>F)"]

cat("\n--- Summary of ANCOVA Components ---\n")

## 
## --- Summary of ANCOVA Components ---

cat(sprintf("SS_covariate (Mass)    = %.4f  (df = %d)\n", SS_cov, df_cov))

## SS_covariate (Mass)    = 8.8194  (df = 1)

cat(sprintf("SS_factor (KnownSex)   = %.4f  (df = %d)\n", SS_factor, df_factor))

## SS_factor (KnownSex)   = 30.7224  (df = 1)

cat(sprintf("SS_error               = %.4f  (df = %d)\n", SS_error, df_error))

## SS_error               = 109.5731  (df = 120)

cat(sprintf("MS_faktor              = %.4f\n", MS_factor))

## MS_faktor              = 30.7224

cat(sprintf("MS_error               = %.4f\n", MS_error))

## MS_error               = 0.9131

cat(sprintf("F_hitung               = %.4f\n", F_ancova))

## F_hitung               = 33.6459

cat(sprintf("p-value                = %.6f\n", p_ancova))

## p-value                = 0.000000

Step 4 : Adjusted Means (EMM)

cat("Adjustment Means (Estimated Marginal Means)\n")

## Adjustment Means (Estimated Marginal Means)

cat("Average of BillLength per Group after Mass Controlled\n\n")

## Average of BillLength per Group after Mass Controlled

emm_ancova <- emmeans(model_ancova, ~ KnownSex)
print(emm_ancova)

##  KnownSex emmean    SE  df lower.CL upper.CL
##  F          24.3 0.128 120     24.0     24.5
##  M          25.4 0.125 120     25.1     25.6
## 
## Confidence level used: 0.95

cat("\nUnadjusted vs Adjusted Means\n")

## 
## Unadjusted vs Adjusted Means

unadj <- aggregate(BillLength ~ KnownSex, data = df, FUN = mean)
adj   <- as.data.frame(emm_ancova)[, c("KnownSex", "emmean")]

compare_means <- merge(unadj, adj, by = "KnownSex")
colnames(compare_means) <- c("KnownSex", "Unadjusted_Mean", "Adjusted_Mean")

compare_means$Selisih <- compare_means$Adjusted_Mean - compare_means$Unadjusted_Mean

compare_means[, 2:4] <- round(compare_means[, 2:4], 4)

print(compare_means)

##   KnownSex Unadjusted_Mean Adjusted_Mean Selisih
## 1        F         24.1840       24.2899  0.1059
## 2        M         25.4624       25.3616 -0.1008

cat("\nDifference = shift due to control covariate Mass.\n")

## 
## Difference = shift due to control covariate Mass.

Step 5: Effect Size Partial η²

cat("Effect Size : Partial ETA Squared (η²_p)\n\n")

## Effect Size : Partial ETA Squared (η²_p)

eta2_ancova <- eta_squared(model_ancova_lm, partial = TRUE)
print(eta2_ancova)

## # Effect Size for ANOVA (Type I)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## Mass      |           0.21 | [0.11, 1.00]
## KnownSex  |           0.22 | [0.12, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

eta2_p_manual <- SS_factor / (SS_factor + SS_error)

cat(sprintf("\nPartial η² manual: η²_p = SS_factor / (SS_factor + SS_error)\n"))

## 
## Partial η² manual: η²_p = SS_factor / (SS_factor + SS_error)

cat(sprintf("                         = %.4f / (%.4f + %.4f) = %.4f\n",
            SS_factor, SS_factor, SS_error, eta2_p_manual))

##                          = 30.7224 / (30.7224 + 109.5731) = 0.2190

cat("\nInterpretation:\n")

## 
## Interpretation:

cat("  η²_p ≈ 0.01 = small effect\n")

##   η²_p ≈ 0.01 = small effect

cat("  η²_p ≈ 0.06 = medium effect\n")

##   η²_p ≈ 0.06 = medium effect

cat("  η²_p ≈ 0.14 = big effect\n")

##   η²_p ≈ 0.14 = big effect

cat(sprintf("  η²_p = %.4f --> ", eta2_p_manual))

##   η²_p = 0.2190 -->

if (eta2_p_manual >= 0.14) cat("Big Effect\n") else
if (eta2_p_manual >= 0.06) cat("Medium Effect\n") else
  cat("Small Effect\n")

## Big Effect

Step 6: Comparison ANOVA & ANCOVA

cat("Comparison ANOVA & ANCOVA\n\n")

## Comparison ANOVA & ANCOVA

model_anova_ulang <- aov(BillLength ~ KnownSex, data = df)
ss_anova <- summary(model_anova_ulang)[[1]]
F_anova_ulang <- ss_anova["KnownSex", "F value"]
p_anova_ulang <- ss_anova["KnownSex", "Pr(>F)"]

cat("Model ANOVA (without covariate):\n")

## Model ANOVA (without covariate):

cat(sprintf("  F = %.4f | p = %.6f\n\n", F_anova_ulang, p_anova_ulang))

##   F = 51.3296 | p = 0.000000

cat("Model ANCOVA (with covariate Mass):\n")

## Model ANCOVA (with covariate Mass):

cat(sprintf("  F = %.4f | p = %.6f\n\n", F_ancova, p_ancova))

##   F = 33.6459 | p = 0.000000

cat("Interpretation:\n")

## Interpretation:

cat("The F difference indicates how much Mass (covariate)\n")

## The F difference indicates how much Mass (covariate)

cat(" affects the KnownSex → BillLength relationship.\n")

##  affects the KnownSex → BillLength relationship.

cat("Including covariates clarifies the true effect of the factor.\n\n")

## Including covariates clarifies the true effect of the factor.

ggplot(df, aes(x = Mass, y = BillLength, color = KnownSex)) +
  geom_point(alpha = 0.6, size = 2) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1.2) +
  geom_hline(data = as.data.frame(emm_ancova),
             aes(yintercept = emmean, color = KnownSex),
             linetype = "dashed", linewidth = 0.8) +
  scale_color_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
  labs(
    title    = "Figure 5. ANCOVA: BillLength vs Mass per KnownSex",
    subtitle = "Dashed line = Adjusted Mean; vertical distance between lines = factor effect",
    x = "Mass — Covariate (grams)",
    y = "BillLength — DV (mm)",
    color = "Gender"
  ) +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

Step 7: ANCOVA Conclusion

cat("Conclusion One Way ANCOVA\n\n")

## Conclusion One Way ANCOVA

cat(sprintf("F_hitung (KnownSex | dikontrol Mass) = %.4f\n", F_ancova))

## F_hitung (KnownSex | dikontrol Mass) = 33.6459

cat(sprintf("p-value = %.6f\n\n", p_ancova))

## p-value = 0.000000

if (p_ancova < 0.05) {
  cat("Keputusan: TOLAK H0 (p < 0.05)\n")
  cat("Kesimpulan: Setelah mengontrol berat badan (Mass), terdapat perbedaan\n")
  cat("            rata-rata panjang paruh (BillLength) yang signifikan antara\n")
  cat("            burung Blue Jay jantan (M) dan betina (F).\n")
} else {
  cat("Keputusan: GAGAL TOLAK H0 (p > 0.05)\n")
  cat("Kesimpulan: Setelah mengontrol berat badan (Mass), TIDAK terdapat\n")
  cat("            perbedaan rata-rata panjang paruh yang signifikan antara\n")
  cat("            burung Blue Jay jantan dan betina.\n")
}

## Keputusan: TOLAK H0 (p < 0.05)
## Kesimpulan: Setelah mengontrol berat badan (Mass), terdapat perbedaan
##             rata-rata panjang paruh (BillLength) yang signifikan antara
##             burung Blue Jay jantan (M) dan betina (F).

One Way MANCOVA

Question: After controlling for Mass as a covariate, are the mean vectors (BillWidth, BillDepth, BillLength) still SIMULTANEOUSLY different between males and females?

Variables: - Factor (IV): KnownSex (F vs. M) - Covariate (CV): Mass - Response (DV): BillWidth + BillDepth + BillLength (3 DVs)

Step 1: Fit Model MANCOVA

cat("Fit Model MANCOVA\n")

## Fit Model MANCOVA

cat("Model: cbind(BillWidth, BillDepth, BillLength) ~ Mass + KnownSex\n")

## Model: cbind(BillWidth, BillDepth, BillLength) ~ Mass + KnownSex

cat("PENTING: Covariate (Mass) masuk SEBELUM faktor\n\n")

## PENTING: Covariate (Mass) masuk SEBELUM faktor

dv_matrix     <- as.matrix(df[, c("BillWidth", "BillDepth", "BillLength")])
model_mancova <- lm(dv_matrix ~ Mass + KnownSex, data = df)

cat("--- Koefisien Model (per DV) ---\n")

## --- Koefisien Model (per DV) ---

print(summary(model_mancova))

## Response BillWidth :
## 
## Call:
## lm(formula = BillWidth ~ Mass + KnownSex, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.11125 -0.32492 -0.04305  0.33407  1.74679 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.10908    0.73180   9.714  < 2e-16 ***
## Mass         0.02878    0.01044   2.756  0.00676 ** 
## KnownSexM    0.09631    0.09916   0.971  0.33334    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5128 on 120 degrees of freedom
## Multiple R-squared:  0.0911, Adjusted R-squared:  0.07595 
## F-statistic: 6.014 on 2 and 120 DF,  p-value: 0.003243
## 
## 
## Response BillDepth :
## 
## Call:
## lm(formula = BillDepth ~ Mass + KnownSex, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72523 -0.22165  0.00835  0.20465  0.77759 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.222098   0.436822  14.244  < 2e-16 ***
## Mass        0.025646   0.006232   4.115 7.13e-05 ***
## KnownSexM   0.344594   0.059187   5.822 4.96e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3061 on 120 degrees of freedom
## Multiple R-squared:  0.3946, Adjusted R-squared:  0.3845 
## F-statistic: 39.11 on 2 and 120 DF,  p-value: 8.369e-14
## 
## 
## Response BillLength :
## 
## Call:
## lm(formula = BillLength ~ Mass + KnownSex, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6483 -0.6716 -0.0936  0.6105  4.2429 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 19.96361    1.36357  14.641  < 2e-16 ***
## Mass         0.06046    0.01945   3.108  0.00235 ** 
## KnownSexM    1.07169    0.18476   5.801 5.48e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9556 on 120 degrees of freedom
## Multiple R-squared:  0.3502, Adjusted R-squared:  0.3393 
## F-statistic: 32.33 on 2 and 120 DF,  p-value: 5.865e-12

Step 2: Wilks’ Lambda dan Statistik Multivariat

cat("Statistics Test Multivariate (Wilks' Lambda)\n\n")

## Statistics Test Multivariate (Wilks' Lambda)

tabel_mancova <- Anova(model_mancova, type = "III")
print(tabel_mancova)

## 
## Type III MANOVA Tests: Pillai test statistic
##             Df test stat approx F num Df den Df    Pr(>F)    
## (Intercept)  1   0.75155  118.982      3    118 < 2.2e-16 ***
## Mass         1   0.17086    8.105      3    118 5.925e-05 ***
## KnownSex     1   0.30465   17.233      3    118 2.402e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Summary of Wilks' Lambda per Group ---\n")

## 
## --- Summary of Wilks' Lambda per Group ---

Step 3: Matriks SSCP (E_Y.X dan H_Y.X)

cat("Matrix SSCP Covariate\n\n")

## Matrix SSCP Covariate

residuals_mancova <- residuals(model_mancova)
E_YX <- t(residuals_mancova) %*% residuals_mancova
colnames(E_YX) <- rownames(E_YX) <- c("BillWidth", "BillDepth", "BillLength")

df_error_m <- nrow(df) - nlevels(df$KnownSex) - 1      # N - k - c
cat(sprintf("--- Matriks E_Y.X (Error SSCP terkoreksi covariate) | df = %d ---\n",
            df_error_m))

## --- Matriks E_Y.X (Error SSCP terkoreksi covariate) | df = 120 ---

print(round(E_YX, 4))

##            BillWidth BillDepth BillLength
## BillWidth    31.5601    2.4927    10.8380
## BillDepth     2.4927   11.2449    10.1785
## BillLength   10.8380   10.1785   109.5731

heplot_res <- heplot(model_mancova,
                     variables = c(1, 3),     # BillWidth vs BillLength
                     main      = "Gambar 6. HE Plot MANCOVA: BillWidth vs BillLength",
                     col       = c("blue", "red"),
                     lwd       = 2)

cat("\nCatatan: heplot_res$E dan $H hanya 2x2 (sesuai variabel yang diplot).\n")

## 
## Catatan: heplot_res$E dan $H hanya 2x2 (sesuai variabel yang diplot).

cat("Matriks 3x3 lengkap dihitung dari residuals model di atas.\n")

## Matriks 3x3 lengkap dihitung dari residuals model di atas.

Step 4: Wilks’ Lambda Manual

cat("Wilks' Lambda Manual for MANCOVA\n\n")

## Wilks' Lambda Manual for MANCOVA

cat("Formula: Λ_A = |E_Y.X| / |E_Y.X + H_A,Y.X|\n\n")

## Formula: Λ_A = |E_Y.X| / |E_Y.X + H_A,Y.X|

model_full    <- lm(dv_matrix ~ Mass + KnownSex, data = df)
model_reduced <- lm(dv_matrix ~ Mass, data = df)
RSS_full    <- t(residuals(model_full))    %*% residuals(model_full)
RSS_reduced <- t(residuals(model_reduced)) %*% residuals(model_reduced)

H_YX_sex <- RSS_reduced - RSS_full
colnames(H_YX_sex) <- rownames(H_YX_sex) <- c("BillWidth", "BillDepth", "BillLength")

cat("--- Matriks H_A,Y.X (Treatment SSCP terkoreksi covariate) ---\n")

## --- Matriks H_A,Y.X (Treatment SSCP terkoreksi covariate) ---

cat(sprintf("df = %d\n", nlevels(df$KnownSex) - 1))

## df = 1

print(round(H_YX_sex, 4))

##            BillWidth BillDepth BillLength
## BillWidth     0.2481    0.8878     2.7610
## BillDepth     0.8878    3.1764     9.8785
## BillLength    2.7610    9.8785    30.7224

cat("\n--- Verifikasi E_Y.X == RSS_full ---\n")

## 
## --- Verifikasi E_Y.X == RSS_full ---

cat("Perbedaan maksimum (harus mendekati 0):",
    round(max(abs(E_YX - RSS_full)), 8), "\n\n")

## Perbedaan maksimum (harus mendekati 0): 0

# Wilks' Lambda
det_EYX        <- det(E_YX)
det_EYX_HYXS   <- det(E_YX + H_YX_sex)
lambda_mancova  <- det_EYX / det_EYX_HYXS

cat(sprintf("|E_Y.X|             = %.6f\n", det_EYX))

## |E_Y.X|             = 34165.089641

cat(sprintf("|E_Y.X + H_A,Y.X|  = %.6f\n", det_EYX_HYXS))

## |E_Y.X + H_A,Y.X|  = 49133.688310

cat(sprintf("Λ_A (Wilks')        = %.6f\n\n", lambda_mancova))

## Λ_A (Wilks')        = 0.695350

p_m   <- 3
v_H_m <- nlevels(df$KnownSex) - 1          # = 1
v_E_m <- nrow(df) - nlevels(df$KnownSex) - 1   # N - k - c (1 covariate)

F_mancova        <- ((1 - lambda_mancova) / lambda_mancova) *
                    ((v_E_m + v_H_m - p_m) / p_m)
df1_m            <- p_m
df2_m            <- v_E_m + v_H_m - p_m
p_mancova_manual <- pf(F_mancova, df1_m, df2_m, lower.tail = FALSE)

cat("--- Transformastion Λ → F (baris: p≥1, v_H=1 dari tabel hal 33) ---\n")

## --- Transformastion Λ → F (baris: p≥1, v_H=1 dari tabel hal 33) ---

cat(sprintf("p   = %d  (jumlah DV)\n",             p_m))

## p   = 3  (jumlah DV)

cat(sprintf("v_H = %d  (df perlakuan = k-1)\n",    v_H_m))

## v_H = 1  (df perlakuan = k-1)

cat(sprintf("v_E = %d  (df error = N-k-c)\n",      v_E_m))

## v_E = 120  (df error = N-k-c)

cat(sprintf("F_hitung = %.4f  (df1=%d, df2=%d)\n", F_mancova, df1_m, df2_m))

## F_hitung = 17.2329  (df1=3, df2=118)

cat(sprintf("p-value  = %.6f\n\n",                 p_mancova_manual))

## p-value  = 0.000000

# Verification with car::Anova
cat("--- Verification with car::Anova() ---\n")

## --- Verification with car::Anova() ---

tabel_car <- Anova(model_full, type = "III")
print(tabel_car)

## 
## Type III MANOVA Tests: Pillai test statistic
##             Df test stat approx F num Df den Df    Pr(>F)    
## (Intercept)  1   0.75155  118.982      3    118 < 2.2e-16 ***
## Mass         1   0.17086    8.105      3    118 5.925e-05 ***
## KnownSex     1   0.30465   17.233      3    118 2.402e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Step 5: Follow-up ANCOVA per DV (Post-hoc)

cat("Follow-Up: ANCOVA Univariate per DV\n")

## Follow-Up: ANCOVA Univariate per DV

cat("(Dilakukan jika MANCOVA signifikan)\n")

## (Dilakukan jika MANCOVA signifikan)

cat(sprintf("Bonferroni Correction: α* = 0.05 / 3 = %.4f\n\n", 0.05 / 3))

## Bonferroni Correction: α* = 0.05 / 3 = 0.0167

alpha_bonf_m <- 0.05 / 3

dv_names <- c("BillWidth", "BillDepth", "BillLength")
results_fu <- data.frame(DV = character(), F_hitung = numeric(),
                         p_value = numeric(), Signifikan = character(),
                         eta2_p = numeric(), stringsAsFactors = FALSE)

for (dv in dv_names) {
  formula_fu <- as.formula(paste(dv, "~ Mass + KnownSex"))
  m_fu <- aov(formula_fu, data = df)
  tbl_fu <- summary(m_fu)[[1]]
  f_fu   <- tbl_fu["KnownSex", "F value"]
  p_fu   <- tbl_fu["KnownSex", "Pr(>F)"]
  ss_f   <- tbl_fu["KnownSex", "Sum Sq"]
  ss_e   <- tbl_fu["Residuals", "Sum Sq"]
  eta2_fu <- ss_f / (ss_f + ss_e)
  sig_fu <- ifelse(p_fu < alpha_bonf_m, "SIGNIFICANT *", "no significant")

  results_fu <- rbind(results_fu,
                      data.frame(DV = dv, F_hitung = round(f_fu, 4),
                                 p_value = round(p_fu, 6),
                                 Signifikan = sig_fu,
                                 eta2_p = round(eta2_fu, 4)))
}
print(results_fu)

##           DV F_hitung  p_value     Signifikan eta2_p
## 1  BillWidth   0.9435 0.333344 no significant 0.0078
## 2  BillDepth  33.8967 0.000000  SIGNIFICANT * 0.2203
## 3 BillLength  33.6459 0.000000  SIGNIFICANT * 0.2190

cat("\nSignificant DVs contribute to multivariate differences.\n")

## 
## Significant DVs contribute to multivariate differences.

Step 6: Adjusted Means per DV

cat("Adjusted Means per DV (after control Mass) ===\n\n")

## Adjusted Means per DV (after control Mass) ===

for (dv in dv_names) {
  formula_emm <- as.formula(paste(dv, "~ Mass + KnownSex"))
  m_emm <- lm(formula_emm, data = df)
  emm_out <- emmeans(m_emm, ~ KnownSex)
  cat(sprintf("--- %s ---\n", dv))
  print(emm_out)
  cat("\n")
}

## --- BillWidth ---
##  KnownSex emmean     SE  df lower.CL upper.CL
##  F          9.17 0.0687 120     9.03      9.3
##  M          9.26 0.0669 120     9.13      9.4
## 
## Confidence level used: 0.95 
## 
## --- BillDepth ---
##  KnownSex emmean     SE  df lower.CL upper.CL
##  F          8.06 0.0410 120     7.98     8.14
##  M          8.40 0.0399 120     8.32     8.48
## 
## Confidence level used: 0.95 
## 
## --- BillLength ---
##  KnownSex emmean    SE  df lower.CL upper.CL
##  F          24.3 0.128 120     24.0     24.5
##  M          25.4 0.125 120     25.1     25.6
## 
## Confidence level used: 0.95

Step 7: MANCOVA Conclusion

cat("ONE-WAY MANCOVA Conclusion\n\n")

## ONE-WAY MANCOVA Conclusion

cat(sprintf("Wilks' Λ = %.5f\n", lambda_mancova))

## Wilks' Λ = 0.69535

cat(sprintf("F_calculated = %.4f (df1=%d, df2=%d)\n", F_mancova, df1_m, df2_m))

## F_calculated = 17.2329 (df1=3, df2=118)

cat(sprintf("p-value = %.6f\n\n", p_mancova_manual))

## p-value = 0.000000

if (p_mancova_manual < 0.05) {
cat("Decision: REJECT H0 (p < 0.05)\n\n")
cat("Conclusion: After controlling for body weight (Mass) as a covariate,\n")
cat("there is a difference in the mean vectors (BillWidth, BillDepth, BillLength)\n")
cat("multivariately significant difference between male Blue Jays (M)\n")
cat("and females (F).\n")
} else {
cat("Decision: FAIL TO REJECT H0 (p > 0.05)\n\n")
cat("Conclusion: After controlling for body weight (Mass) as a covariate,\n")
cat("There is NO multivariately significant difference in mean vectors\n")
cat("between male and female Blue Jays.\n")
}

## Decision: REJECT H0 (p < 0.05)
## 
## Conclusion: After controlling for body weight (Mass) as a covariate,
## there is a difference in the mean vectors (BillWidth, BillDepth, BillLength)
## multivariately significant difference between male Blue Jays (M)
## and females (F).

cat("\nOverall Summary\n")

## 
## Overall Summary

cat("ANOVA: Effect of KnownSex on 1 DV (BillLength), without covariates\n")

## ANOVA: Effect of KnownSex on 1 DV (BillLength), without covariates

cat("MANOVA: Effect of KnownSex on 3 DVs simultaneously, without covariates\n")

## MANOVA: Effect of KnownSex on 3 DVs simultaneously, without covariates

cat("ANCOVA: Effect of KnownSex on 1 DV (BillLength), controlled Mass\n")

## ANCOVA: Effect of KnownSex on 1 DV (BillLength), controlled Mass

cat(" MANCOVA : Effect of KnownSex on 3 DVs simultaneously, controlled for Mass\n")

##  MANCOVA : Effect of KnownSex on 3 DVs simultaneously, controlled for Mass

Modul2_3OVA

Abdullah_AlFirdaus_Nuzula | Elvira Tiara Suci T | M Raffi Fahrezi

2026-04-15