required_packages <- c("Stat2Data", "psych", "heplots", "MVN", "ggplot2")
new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])]
if (length(new_packages)) install.packages(new_packages)
library(Stat2Data)
library(psych)
library(heplots)
## Warning: package 'heplots' was built under R version 4.5.3
## Loading required package: broom
library(MVN)
## Warning: package 'MVN' was built under R version 4.5.3
##
## Attaching package: 'MVN'
## The following object is masked from 'package:psych':
##
## mardia
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(effectsize)
##
## Attaching package: 'effectsize'
## The following object is masked from 'package:psych':
##
## phi
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(BlueJays)
df <- na.omit(BlueJays[, c("KnownSex", "BillWidth", "BillDepth", "BillLength", "Mass")])
head(df, 5)
## KnownSex BillWidth BillDepth BillLength Mass
## 1 M 9.21 8.26 25.92 73.30
## 2 M 8.76 8.54 24.99 75.10
## 3 M 8.78 8.39 26.07 70.25
## 4 F 9.30 7.78 23.48 65.50
## 5 M 9.84 8.71 25.47 74.90
cat("Total observations:", nrow(df), "\n")
## Total observations: 123
cat("Group distribution (KnownSex):\n")
## Group distribution (KnownSex):
print(table(df$KnownSex))
##
## F M
## 60 63
cat("\nGroup means:\n")
##
## Group means:
print(aggregate(cbind(BillWidth, BillDepth, BillLength, Mass) ~ KnownSex, data = df, FUN = mean))
## KnownSex BillWidth BillDepth BillLength Mass
## 1 F 9.118000 8.012333 24.18400 69.80633
## 2 M 9.312698 8.444603 25.46238 73.22508
dvs <- df[, c("BillWidth", "BillDepth", "BillLength")]
cat("\nASSUMPTION 1: Bartlett Test of Sphericity (DVs must be intercorrelated)\n")
##
## ASSUMPTION 1: Bartlett Test of Sphericity (DVs must be intercorrelated)
result_bartlett <- cortest.bartlett(cor(dvs), n = nrow(df))
print(result_bartlett)
## $chisq
## [1] 56.92631
##
## $p.value
## [1] 2.664665e-12
##
## $df
## [1] 3
if (result_bartlett$p.value < 0.05) {
cat("Conclusion: p < .05, correlation matrix is not an identity matrix, DVs are intercorrelated (MET)\n")
} else {
cat("Conclusion: p > .05, DVs are not sufficiently correlated (NOT MET)\n")
}
## Conclusion: p < .05, correlation matrix is not an identity matrix, DVs are intercorrelated (MET)
cat("\nASSUMPTION 2: Box's M Test (Homogeneity of covariance matrix)\n")
##
## ASSUMPTION 2: Box's M Test (Homogeneity of covariance matrix)
result_boxm <- boxM(dvs, df$KnownSex)
print(result_boxm)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: dvs by df$KnownSex
## Chi-Sq (approx.) = 8.5266, df = 6, p-value = 0.202
if (result_boxm$p.value > 0.05) {
cat("Conclusion: p > .05, covariance matrix are homogeneous across groups (MET)\n")
} else {
cat("Conclusion: p < .05, covariance matrix are not homogeneous (NOT MET)\n")
}
## Conclusion: p > .05, covariance matrix are homogeneous across groups (MET)
cat("\nASSUMPTION 3: Multivariate Normality per group (MVN - Henze-Zirkler)\n")
##
## ASSUMPTION 3: Multivariate Normality per group (MVN - Henze-Zirkler)
dvs_f <- dvs[df$KnownSex == "F", ]
dvs_m <- dvs[df$KnownSex == "M", ]
cat("Female group (n =", nrow(dvs_f), "):\n")
## Female group (n = 60 ):
result_mvn_f <- hz(dvs_f)
print(result_mvn_f)
## Test Statistic p.value Method
## 1 Henze-Zirkler 0.6593133 0.511166 asymptotic
cat("\nMale group (n =", nrow(dvs_m), "):\n")
##
## Male group (n = 63 ):
result_mvn_m <- hz(dvs_m)
print(result_mvn_m)
## Test Statistic p.value Method
## 1 Henze-Zirkler 0.5396273 0.8418019 asymptotic
cat("Conclusion: If p > .05 in both groups, multivariate normality is met\n")
## Conclusion: If p > .05 in both groups, multivariate normality is met
cat("\nASSUMPTION 4: Linearity of covariate (Mass) with each DV\n")
##
## ASSUMPTION 4: Linearity of covariate (Mass) with each DV
lm_bw <- lm(BillWidth ~ Mass, data = df)
lm_bd <- lm(BillDepth ~ Mass, data = df)
lm_bl <- lm(BillLength ~ Mass, data = df)
cat("Mass -> BillWidth:\n")
## Mass -> BillWidth:
print(summary(lm_bw)$coefficients)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.89720006 0.698373703 9.876088 3.146236e-17
## Mass 0.03242884 0.009738223 3.330057 1.151394e-03
cat("R-squared:", round(summary(lm_bw)$r.squared, 4), "\n")
## R-squared: 0.084
cat("\nMass -> BillDepth:\n")
##
## Mass -> BillDepth:
print(summary(lm_bd)$coefficients)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.46400874 0.470240620 11.619602 2.013375e-21
## Mass 0.03870643 0.006557102 5.902977 3.341872e-08
cat("R-squared:", round(summary(lm_bd)$r.squared, 4), "\n")
## R-squared: 0.2236
cat("\nMass -> BillLength:\n")
##
## Mass -> BillLength:
print(summary(lm_bl)$coefficients)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.6059460 1.46669551 12.003818 2.410827e-22
## Mass 0.1010774 0.02045181 4.942221 2.510571e-06
cat("R-squared:", round(summary(lm_bl)$r.squared, 4), "\n")
## R-squared: 0.168
cat("Conclusion: If p(Mass) < .05 in each model, linearity assumption is met\n")
## Conclusion: If p(Mass) < .05 in each model, linearity assumption is met
plot_data <- data.frame(
Mass = rep(df$Mass, 3),
Value = c(df$BillWidth, df$BillDepth, df$BillLength),
DV = factor(rep(c("BillWidth", "BillDepth", "BillLength"), each = nrow(df)))
)
plot_data <- na.omit(plot_data)
p <- ggplot(plot_data, aes(x = Mass, y = Value)) +
geom_point(alpha = 0.5, color = "steelblue") +
geom_smooth(method = "lm", formula = y ~ x, color = "firebrick", se = TRUE) +
facet_wrap(~ DV, scales = "free_y") +
labs(
title = "Assumption 5: Linearity of Mass (Covariate) with each DV",
x = "Mass (grams)",
y = "DV value (mm)"
) +
theme_minimal()
print(p)
Question:
Is the average bill length (BillLength) significantly
different between male (M) and female (F) Blue Jays, without controlling
for covariates?
Variables:
KnownSex (F vs. M)BillLength (a continuous variable)cat("STEP 1: Descriptive Statistics + Boxplot per Group\n\n")
## STEP 1: Descriptive Statistics + Boxplot per Group
desc_anova <- df %>%
group_by(KnownSex) %>%
summarise(
n = n(),
Mean = round(mean(BillLength), 4),
SD = round(sd(BillLength), 4),
SE = round(sd(BillLength) / sqrt(n()), 4),
Min = round(min(BillLength), 4),
Max = round(max(BillLength), 4),
Median = round(median(BillLength), 4),
.groups = "drop"
)
print(desc_anova)
## # A tibble: 2 × 8
## KnownSex n Mean SD SE Min Max Median
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 F 60 24.2 0.965 0.125 22.2 26.3 24.0
## 2 M 63 25.5 1.01 0.127 23.6 30 25.5
grand_mean <- mean(df$BillLength)
cat(sprintf("\nGrand Mean (All BillLength) = %.4f mm\n", grand_mean))
##
## Grand Mean (All BillLength) = 24.8388 mm
# Boxplot per Group
ggplot(df, aes(x = KnownSex, y = BillLength, fill = KnownSex)) +
geom_boxplot(alpha = 0.7, outlier.shape = 21, outlier.size = 2) +
geom_jitter(width = 0.15, alpha = 0.4, size = 1.5, color = "gray30") +
stat_summary(fun = mean, geom = "point", shape = 23,
size = 4, fill = "white", color = "black") +
scale_fill_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
labs(
title = "Figure 1. Distribution of BillLength per KnownSex",
subtitle = "White diamond = group mean; dots = individual observations",
x = "KnownSex",
y = "BillLength (mm)",
fill = "Gender"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
cat("STEP 2a: Shapiro-Wilk Normality Test per Group\n\n")
## STEP 2a: Shapiro-Wilk Normality Test per Group
groups <- unique(df$KnownSex)
sw_results <- data.frame(Group = character(), W = numeric(),
p_value = numeric(), Conclusion = character(),
stringsAsFactors = FALSE)
for (g in groups) {
x <- df$BillLength[df$KnownSex == g]
sw <- shapiro.test(x)
kes <- ifelse(sw$p.value > 0.05,
"Normal (MET)", "Un Normal (NOT MET)")
sw_results <- rbind(sw_results,
data.frame(Group = g,
W = round(sw$statistic, 5),
p_value = round(sw$p.value, 5),
Conclusion = kes))
}
print(sw_results)
## Group W p_value Conclusion
## W M 0.91363 0.00030 Un Normal (NOT MET)
## W1 F 0.97791 0.34651 Normal (MET)
cat("\nCriteria: p > 0.05 → normal distribution per group.\n")
##
## Criteria: p > 0.05 → normal distribution per group.
par(mfrow = c(1, 2))
for (g in groups) {
x <- df$BillLength[df$KnownSex == g]
qqnorm(x, main = paste("QQ-Plot BillLength –", g),
col = ifelse(g == "F", "#4E9AF1", "#F17A4E"), pch = 16)
qqline(x, col = "black", lwd = 2)
}
par(mfrow = c(1, 1))
cat("STEP 2b: Uji Homogenity Varians – Levene's Test\n\n")
## STEP 2b: Uji Homogenity Varians – Levene's Test
levene_result <- leveneTest(BillLength ~ KnownSex, data = df, center = mean)
print(levene_result)
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 1 0.6141 0.4348
## 121
p_levene <- levene_result$`Pr(>F)`[1]
cat(sprintf("\np-value Levene = %.5f\n", p_levene))
##
## p-value Levene = 0.43478
if (p_levene > 0.05) {
cat("Conclusion: p > 0.05 → Homogeneous variance between groups (MET)\n")
} else {
cat("Conclusion: p < 0.05 → Variance is NOT homogeneous between groups (NOT MET)\n")
}
## Conclusion: p > 0.05 → Homogeneous variance between groups (MET)
aov(), ANOVA Table, Calculate SS/MS/F
Manuallycat("STEP 3: Fit ANOVA Model & Manually Calculate SS/MS/F\n\n")
## STEP 3: Fit ANOVA Model & Manually Calculate SS/MS/F
model_anova <- aov(BillLength ~ KnownSex, data = df)
cat("ANOVA table from aov()\n")
## ANOVA table from aov()
print(summary(model_anova))
## Df Sum Sq Mean Sq F value Pr(>F)
## KnownSex 1 50.22 50.22 51.33 6.66e-11 ***
## Residuals 121 118.39 0.98
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\nManual Calculation of SS/MS/F\n\n")
##
## Manual Calculation of SS/MS/F
grand_mean_bl <- mean(df$BillLength)
k <- nlevels(df$KnownSex)
N <- nrow(df)
SS_between <- sum(
tapply(df$BillLength, df$KnownSex, function(x)
length(x) * (mean(x) - grand_mean_bl)^2)
)
SS_within <- sum(
tapply(df$BillLength, df$KnownSex, function(x)
sum((x - mean(x))^2))
)
SS_total <- sum((df$BillLength - grand_mean_bl)^2)
# Degrees of freedom
df_between <- k - 1
df_within <- N - k
df_total <- N - 1
# MS
MS_between <- SS_between / df_between
MS_within <- SS_within / df_within
# F hitung
F_hitung_anova <- MS_between / MS_within
# p-value
p_anova <- pf(F_hitung_anova, df_between, df_within, lower.tail = FALSE)
cat(sprintf("Grand Mean = %.4f\n", grand_mean_bl))
## Grand Mean = 24.8388
cat(sprintf("k (amount of group) = %d\n", k))
## k (amount of group) = 2
cat(sprintf("N (total observation) = %d\n\n", N))
## N (total observation) = 123
cat(sprintf("SS_between (between group ) = %.4f (df = %d)\n", SS_between, df_between))
## SS_between (between group ) = 50.2235 (df = 1)
cat(sprintf("SS_within (in group) = %.4f (df = %d)\n", SS_within, df_within))
## SS_within (in group) = 118.3926 (df = 121)
cat(sprintf("SS_total = %.4f (df = %d)\n\n", SS_total, df_total))
## SS_total = 168.6161 (df = 122)
cat(sprintf("MS_between = SS_between / df_between = %.4f / %d = %.4f\n",
SS_between, df_between, MS_between))
## MS_between = SS_between / df_between = 50.2235 / 1 = 50.2235
cat(sprintf("MS_within = SS_within / df_within = %.4f / %d = %.4f\n\n",
SS_within, df_within, MS_within))
## MS_within = SS_within / df_within = 118.3926 / 121 = 0.9785
cat(sprintf("F_hitung = MS_between / MS_within = %.4f / %.4f = %.4f\n",
MS_between, MS_within, F_hitung_anova))
## F_hitung = MS_between / MS_within = 50.2235 / 0.9785 = 51.3296
cat(sprintf("p-value = P(F(%.0f, %.0f) > %.4f) = %.6f\n\n",
df_between, df_within, F_hitung_anova, p_anova))
## p-value = P(F(1, 121) > 51.3296) = 0.000000
tbl_aov <- summary(model_anova)[[1]]
cat("Verifikasi: SS manual vs aov()\n")
## Verifikasi: SS manual vs aov()
cat(sprintf("SS_between manual = %.4f | aov() = %.4f | selisih = %.8f\n",
SS_between,
tbl_aov["KnownSex", "Sum Sq"],
abs(SS_between - tbl_aov["KnownSex", "Sum Sq"])))
## SS_between manual = 50.2235 | aov() = 50.2235 | selisih = 0.00000000
cat(sprintf("SS_within manual = %.4f | aov() = %.4f | selisih = %.8f\n",
SS_within,
tbl_aov["Residuals", "Sum Sq"],
abs(SS_within - tbl_aov["Residuals", "Sum Sq"])))
## SS_within manual = 118.3926 | aov() = 118.3926 | selisih = 0.00000000
cat(sprintf("F_hitung manual = %.4f | aov() = %.4f | selisih = %.8f\n",
F_hitung_anova,
tbl_aov["KnownSex", "F value"],
abs(F_hitung_anova - tbl_aov["KnownSex", "F value"])))
## F_hitung manual = 51.3296 | aov() = 51.3296 | selisih = 0.00000000
effectsize)cat("STEP 4: Effect Size η² (Eta-Squared)\n\n")
## STEP 4: Effect Size η² (Eta-Squared)
eta2_manual <- SS_between / SS_total
cat("Formula: η² = SS_between / SS_total\n")
## Formula: η² = SS_between / SS_total
cat(sprintf("η² (manual) = %.4f / %.4f = %.4f\n\n",
SS_between, SS_total, eta2_manual))
## η² (manual) = 50.2235 / 168.6161 = 0.2979
eta2_pkg <- eta_squared(model_anova, partial = FALSE)
cat("η² dari package effectsize\n")
## η² dari package effectsize
print(eta2_pkg)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 | 95% CI
## -------------------------------
## KnownSex | 0.30 | [0.19, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
cat("\n Interpretasi η²\n")
##
## Interpretasi η²
cat(" η² < 0.01 = small\n")
## η² < 0.01 = small
cat(" η² < 0.06 = medium\n")
## η² < 0.06 = medium
cat(" η² >= 0.14 = big\n")
## η² >= 0.14 = big
eta2_val <- eta2_pkg$Eta2[1]
interp <- ifelse(eta2_val >= 0.14, "large",
ifelse(eta2_val >= 0.06, "medium",
ifelse(eta2_val >= 0.01, "small",
"very small (negligible)")))
cat(sprintf("\nη² = %.4f = Ukuran efek: %s\n", eta2_val, interp))
##
## η² = 0.2979 = Ukuran efek: large
cat("STEP 5: Conclusion + EMM Visualization\n\n")
## STEP 5: Conclusion + EMM Visualization
cat(sprintf("F_count (%d, %d) = %.4f\n", df_between, df_within, F_hitung_anova))
## F_count (1, 121) = 51.3296
cat(sprintf("p-value = %.6f\n", p_anova))
## p-value = 0.000000
cat(sprintf("η² = %.4f (%s)\n\n", eta2_val, interp))
## η² = 0.2979 (large)
if (p_anova < 0.05) {
cat("Decision : REJECT H0 (p < 0.05)\n")
} else {
cat("Decision : FAIL TO REJECT H0 (p ≥ 0.05)\n")
}
## Decision : REJECT H0 (p < 0.05)
emm_anova <- emmeans(model_anova, ~ KnownSex)
cat("\n Estimated Marginal Means ANOVA \n")
##
## Estimated Marginal Means ANOVA
print(emm_anova)
## KnownSex emmean SE df lower.CL upper.CL
## F 24.2 0.128 121 23.9 24.4
## M 25.5 0.125 121 25.2 25.7
##
## Confidence level used: 0.95
emm_df <- as.data.frame(emm_anova)
# Visualization 1: EMM Plot
ggplot(emm_df, aes(x = KnownSex, y = emmean, color = KnownSex)) +
geom_point(size = 5) +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
width = 0.15, linewidth = 1.2) +
scale_color_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
labs(
title = "Figure 2. Estimated Marginal Means BillLength per KnownSex",
subtitle = "Error bar = 95% Confidence Interval",
x = "KnownSex (Jenis Kelamin)",
y = "EMM BillLength (mm)",
color = "Gender"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
# Visualization 2: Boxplot + EMM overlay
ggplot(df, aes(x = KnownSex, y = BillLength, fill = KnownSex)) +
geom_boxplot(alpha = 0.4, outlier.shape = NA) +
geom_jitter(aes(color = KnownSex), width = 0.15,
alpha = 0.5, size = 1.8) +
geom_point(data = emm_df,
aes(x = KnownSex, y = emmean),
size = 5, shape = 23,
fill = "white", color = "black", stroke = 1.5) +
geom_errorbar(data = emm_df,
aes(x = KnownSex, y = emmean,
ymin = lower.CL, ymax = upper.CL),
width = 0.12, linewidth = 1.2, color = "black",
inherit.aes = FALSE) +
scale_fill_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
scale_color_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
labs(
title = "Figure 3. Distribusi BillLength + EMM per KnownSex",
subtitle = "Berlian hitam = EMM; error bar = 95% CI",
x = "KnownSex (Jenis Kelamin)",
y = "BillLength (mm)"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
Question:
Are the mean vectors of bill measurements (BillWidth,
BillDepth, BillLength)
simultaneously different between male (M) and female
(F) Blue Jays, without controlling for any covariate?
Variables:
KnownSex (F vs. M)BillWidth, BillDepth,
BillLength (3 continuous variables)cat("STEP 1: Descriptive Statistics per Group (3 DVs)\n\n")
## STEP 1: Descriptive Statistics per Group (3 DVs)
dv_names_manova <- c("BillWidth", "BillDepth", "BillLength")
for (dv in dv_names_manova) {
desc <- df %>%
group_by(KnownSex) %>%
summarise(
n = n(),
Mean = round(mean(.data[[dv]]), 4),
SD = round(sd(.data[[dv]]), 4),
SE = round(sd(.data[[dv]]) / sqrt(n()), 4),
Min = round(min(.data[[dv]]), 4),
Max = round(max(.data[[dv]]), 4),
Median = round(median(.data[[dv]]), 4),
.groups = "drop"
)
cat(sprintf("--- %s ---\n", dv))
print(desc)
grand_m <- mean(df[[dv]])
cat(sprintf("Grand Mean = %.4f mm\n\n", grand_m))
}
## --- BillWidth ---
## # A tibble: 2 × 8
## KnownSex n Mean SD SE Min Max Median
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 F 60 9.12 0.562 0.0726 8 10.8 9.18
## 2 M 63 9.31 0.490 0.0617 8.1 10.3 9.28
## Grand Mean = 9.2177 mm
##
## --- BillDepth ---
## # A tibble: 2 × 8
## KnownSex n Mean SD SE Min Max Median
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 F 60 8.01 0.372 0.048 7.21 8.8 8.01
## 2 M 63 8.44 0.274 0.0346 7.89 9 8.43
## Grand Mean = 8.2337 mm
##
## --- BillLength ---
## # A tibble: 2 × 8
## KnownSex n Mean SD SE Min Max Median
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 F 60 24.2 0.965 0.125 22.2 26.3 24.0
## 2 M 63 25.5 1.01 0.127 23.6 30 25.5
## Grand Mean = 24.8388 mm
# Side-by-side boxplots for all 3 DVs
plot_data_manova <- data.frame(
Sex = rep(df$KnownSex, 3),
Value = c(df$BillWidth, df$BillDepth, df$BillLength),
DV = factor(rep(dv_names_manova, each = nrow(df)),
levels = dv_names_manova)
)
ggplot(plot_data_manova, aes(x = Sex, y = Value, fill = Sex)) +
geom_boxplot(alpha = 0.7, outlier.shape = 21, outlier.size = 2) +
geom_jitter(width = 0.15, alpha = 0.35, size = 1.2, color = "gray30") +
stat_summary(fun = mean, geom = "point", shape = 23,
size = 3.5, fill = "white", color = "black") +
facet_wrap(~ DV, scales = "free_y") +
scale_fill_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
labs(
title = "Figure A. Distribution of 3 DVs per KnownSex (MANOVA)",
subtitle = "White diamond = group mean",
x = "KnownSex",
y = "Value (mm)",
fill = "Gender"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
cat("STEP 2a: Multivariate Normality – Henze-Zirkler Test per Group\n\n")
## STEP 2a: Multivariate Normality – Henze-Zirkler Test per Group
dvs_manova <- df[, dv_names_manova]
dvs_F <- dvs_manova[df$KnownSex == "F", ]
dvs_M <- dvs_manova[df$KnownSex == "M", ]
cat(sprintf("Female group (n = %d):\n", nrow(dvs_F)))
## Female group (n = 60):
result_mvn_F <- hz(dvs_F)
print(result_mvn_F)
## Test Statistic p.value Method
## 1 Henze-Zirkler 0.6593133 0.511166 asymptotic
cat(sprintf("\nMale group (n = %d):\n", nrow(dvs_M)))
##
## Male group (n = 63):
result_mvn_M <- hz(dvs_M)
print(result_mvn_M)
## Test Statistic p.value Method
## 1 Henze-Zirkler 0.5396273 0.8418019 asymptotic
cat("\nCriteria: p > 0.05 in BOTH groups → multivariate normality is met (MET)\n")
##
## Criteria: p > 0.05 in BOTH groups → multivariate normality is met (MET)
cat("STEP 2b: Homogeneity of Covariance Matrices – Box's M Test\n\n")
## STEP 2b: Homogeneity of Covariance Matrices – Box's M Test
boxm_manova <- boxM(dvs_manova, df$KnownSex)
print(boxm_manova)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: dvs_manova by df$KnownSex
## Chi-Sq (approx.) = 8.5266, df = 6, p-value = 0.202
if (boxm_manova$p.value > 0.05) {
cat("Conclusion: p > 0.05 → Covariance matrices are homogeneous across groups (MET)\n")
} else {
cat("Conclusion: p < 0.05 → Covariance matrices are NOT homogeneous (NOT MET)\n")
cat("Note: Box's M is sensitive to normality violations; proceed with caution.\n")
}
## Conclusion: p > 0.05 → Covariance matrices are homogeneous across groups (MET)
cat("STEP 3: Fit MANOVA Model\n")
## STEP 3: Fit MANOVA Model
cat("Model: cbind(BillWidth, BillDepth, BillLength) ~ KnownSex\n\n")
## Model: cbind(BillWidth, BillDepth, BillLength) ~ KnownSex
dv_matrix_manova <- as.matrix(df[, dv_names_manova])
model_manova <- manova(dv_matrix_manova ~ KnownSex, data = df)
cat("--- MANOVA Summary (Wilks' Lambda) ---\n")
## --- MANOVA Summary (Wilks' Lambda) ---
print(summary(model_manova, test = "Wilks"))
## Df Wilks approx F num Df den Df Pr(>F)
## KnownSex 1 0.6085 25.521 3 119 8.083e-13 ***
## Residuals 121
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- MANOVA Summary (Pillai) ---\n")
##
## --- MANOVA Summary (Pillai) ---
print(summary(model_manova, test = "Pillai"))
## Df Pillai approx F num Df den Df Pr(>F)
## KnownSex 1 0.3915 25.521 3 119 8.083e-13 ***
## Residuals 121
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- MANOVA Summary (Hotelling-Lawley) ---\n")
##
## --- MANOVA Summary (Hotelling-Lawley) ---
print(summary(model_manova, test = "Hotelling-Lawley"))
## Df Hotelling-Lawley approx F num Df den Df Pr(>F)
## KnownSex 1 0.6434 25.521 3 119 8.083e-13 ***
## Residuals 121
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- MANOVA Summary (Roy) ---\n")
##
## --- MANOVA Summary (Roy) ---
print(summary(model_manova, test = "Roy"))
## Df Roy approx F num Df den Df Pr(>F)
## KnownSex 1 0.6434 25.521 3 119 8.083e-13 ***
## Residuals 121
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("STEP 4: Wilks' Lambda – Manual Calculation\n")
## STEP 4: Wilks' Lambda – Manual Calculation
cat("Formula: Λ = |E| / |E + H|\n\n")
## Formula: Λ = |E| / |E + H|
# --- Grand mean vector ---
grand_mean_vec <- colMeans(dv_matrix_manova)
N_m <- nrow(df)
k_m <- nlevels(df$KnownSex) # = 2
# --- Total SSCP (T) ---
T_mat <- t(dv_matrix_manova - matrix(grand_mean_vec, nrow = N_m, ncol = 3, byrow = TRUE)) %*%
(dv_matrix_manova - matrix(grand_mean_vec, nrow = N_m, ncol = 3, byrow = TRUE))
# --- Within-group (Error) SSCP (E) ---
E_mat <- matrix(0, 3, 3)
for (g in levels(df$KnownSex)) {
X_g <- dv_matrix_manova[df$KnownSex == g, ]
mu_g <- colMeans(X_g)
diff_g <- X_g - matrix(mu_g, nrow = nrow(X_g), ncol = 3, byrow = TRUE)
E_mat <- E_mat + t(diff_g) %*% diff_g
}
colnames(E_mat) <- rownames(E_mat) <- dv_names_manova
# --- Between-group (Hypothesis) SSCP (H) ---
H_mat <- T_mat - E_mat
colnames(H_mat) <- rownames(H_mat) <- dv_names_manova
df_H <- k_m - 1 # between-group df
df_E <- N_m - k_m # within-group df
cat("--- Within-group SSCP Matrix (E) | df_E =", df_E, "---\n")
## --- Within-group SSCP Matrix (E) | df_E = 121 ---
print(round(E_mat, 4))
## BillWidth BillDepth BillLength
## BillWidth 33.5584 4.2735 15.0361
## BillDepth 4.2735 12.8318 13.9195
## BillLength 15.0361 13.9195 118.3926
cat("\n--- Between-group SSCP Matrix (H) | df_H =", df_H, "---\n")
##
## --- Between-group SSCP Matrix (H) | df_H = 1 ---
print(round(H_mat, 4))
## BillWidth BillDepth BillLength
## BillWidth 1.1650 2.5864 7.6491
## BillDepth 2.5864 5.7424 16.9825
## BillLength 7.6491 16.9825 50.2235
# --- Wilks' Lambda ---
det_E <- det(E_mat)
det_EplusH <- det(E_mat + H_mat)
lambda_man <- det_E / det_EplusH
cat(sprintf("\n|E| = %.6f\n", det_E))
##
## |E| = 41205.269858
cat(sprintf("|E + H| = %.6f\n", det_EplusH))
## |E + H| = 67716.639330
cat(sprintf("Λ (Wilks') = %.6f\n\n", lambda_man))
## Λ (Wilks') = 0.608495
# --- F approximation (p = 3 DVs, v_H = k-1 = 1, exact formula) ---
p_dv <- 3
v_H <- df_H
v_E <- df_E
F_man <- ((1 - lambda_man) / lambda_man) * ((v_E - p_dv + v_H) / p_dv)
df1_man <- p_dv
df2_man <- v_E - p_dv + v_H
p_man <- pf(F_man, df1_man, df2_man, lower.tail = FALSE)
cat("--- F-Approximation of Wilks' Λ ---\n")
## --- F-Approximation of Wilks' Λ ---
cat(sprintf("p (# DVs) = %d\n", p_dv))
## p (# DVs) = 3
cat(sprintf("v_H (df_H) = %d\n", v_H))
## v_H (df_H) = 1
cat(sprintf("v_E (df_E) = %d\n", v_E))
## v_E (df_E) = 121
cat(sprintf("F_hitung = %.4f (df1 = %d, df2 = %d)\n", F_man, df1_man, df2_man))
## F_hitung = 25.5214 (df1 = 3, df2 = 119)
cat(sprintf("p-value = %.6f\n\n", p_man))
## p-value = 0.000000
cat("STEP 5: Effect Size – Eta-Squared (η²) from SSCP Matrices\n\n")
## STEP 5: Effect Size – Eta-Squared (η²) from SSCP Matrices
# Pillai's trace as effect size
pillai_res <- summary(model_manova, test = "Pillai")$stats
pillai_val <- pillai_res["KnownSex", "Pillai"]
cat(sprintf("Pillai's Trace (V) = %.4f\n", pillai_val))
## Pillai's Trace (V) = 0.3915
# Multivariate η² from Wilks' Λ
eta2_mv <- 1 - lambda_man
cat(sprintf("η²_mv = 1 − Λ = 1 − %.6f = %.4f\n\n", lambda_man, eta2_mv))
## η²_mv = 1 − Λ = 1 − 0.608495 = 0.3915
interp_mv <- ifelse(eta2_mv >= 0.14, "large",
ifelse(eta2_mv >= 0.06, "medium",
ifelse(eta2_mv >= 0.01, "small",
"very small (negligible)")))
cat(sprintf("Interpretation: η²_mv = %.4f → effect size is %s\n", eta2_mv, interp_mv))
## Interpretation: η²_mv = 0.3915 → effect size is large
cat("\nReference:\n η² < 0.01 = small\n η² < 0.06 = medium\n η² ≥ 0.14 = large\n")
##
## Reference:
## η² < 0.01 = small
## η² < 0.06 = medium
## η² ≥ 0.14 = large
cat("STEP 6: HE Plot – Hypothesis vs Error ellipsoids\n\n")
## STEP 6: HE Plot – Hypothesis vs Error ellipsoids
heplot(model_manova,
variables = c(1, 3), # BillWidth vs BillLength
main = "Figure B. HE Plot MANOVA: BillWidth vs BillLength by KnownSex",
col = c("blue", "red"),
lwd = 2)
heplot(model_manova,
variables = c(1, 2), # BillWidth vs BillDepth
main = "Figure C. HE Plot MANOVA: BillWidth vs BillDepth by KnownSex",
col = c("blue", "red"),
lwd = 2)
cat("STEP 7: Follow-Up Univariate ANOVAs per DV\n")
## STEP 7: Follow-Up Univariate ANOVAs per DV
cat("(Performed because MANOVA is significant)\n")
## (Performed because MANOVA is significant)
cat(sprintf("Bonferroni Correction: α* = 0.05 / 3 = %.4f\n\n", 0.05 / 3))
## Bonferroni Correction: α* = 0.05 / 3 = 0.0167
alpha_bonf_mv <- 0.05 / 3
cat("--- Univariate ANOVA Summary per DV ---\n")
## --- Univariate ANOVA Summary per DV ---
print(summary.aov(model_manova))
## Response BillWidth :
## Df Sum Sq Mean Sq F value Pr(>F)
## KnownSex 1 1.165 1.16496 4.2004 0.04257 *
## Residuals 121 33.558 0.27734
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response BillDepth :
## Df Sum Sq Mean Sq F value Pr(>F)
## KnownSex 1 5.7424 5.7424 54.149 2.451e-11 ***
## Residuals 121 12.8318 0.1060
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response BillLength :
## Df Sum Sq Mean Sq F value Pr(>F)
## KnownSex 1 50.224 50.224 51.33 6.659e-11 ***
## Residuals 121 118.393 0.978
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
results_fu_mv <- data.frame(
DV = character(),
F_hitung = numeric(),
p_value = numeric(),
Signifikan = character(),
eta2 = numeric(),
stringsAsFactors = FALSE
)
for (dv in dv_names_manova) {
m_uni <- aov(as.formula(paste(dv, "~ KnownSex")), data = df)
tbl <- summary(m_uni)[[1]]
f_u <- tbl["KnownSex", "F value"]
p_u <- tbl["KnownSex", "Pr(>F)"]
ss_b <- tbl["KnownSex", "Sum Sq"]
ss_w <- tbl["Residuals", "Sum Sq"]
eta2_u <- ss_b / (ss_b + ss_w)
sig_u <- ifelse(p_u < alpha_bonf_mv, "SIGNIFICANT *", "not significant")
results_fu_mv <- rbind(results_fu_mv,
data.frame(DV = dv,
F_hitung = round(f_u, 4),
p_value = round(p_u, 6),
Signifikan = sig_u,
eta2 = round(eta2_u, 4)))
}
print(results_fu_mv)
## DV F_hitung p_value Signifikan eta2
## 1 BillWidth 4.2004 0.042575 not significant 0.0335
## 2 BillDepth 54.1493 0.000000 SIGNIFICANT * 0.3092
## 3 BillLength 51.3296 0.000000 SIGNIFICANT * 0.2979
cat("\nSignificant DVs (p < α*) drive the multivariate group difference.\n")
##
## Significant DVs (p < α*) drive the multivariate group difference.
cat("STEP 8: Estimated Marginal Means per DV\n\n")
## STEP 8: Estimated Marginal Means per DV
for (dv in dv_names_manova) {
m_emm_mv <- lm(as.formula(paste(dv, "~ KnownSex")), data = df)
emm_mv <- emmeans(m_emm_mv, ~ KnownSex)
cat(sprintf("--- %s ---\n", dv))
print(emm_mv)
cat("\n")
}
## --- BillWidth ---
## KnownSex emmean SE df lower.CL upper.CL
## F 9.12 0.0680 121 8.98 9.25
## M 9.31 0.0663 121 9.18 9.44
##
## Confidence level used: 0.95
##
## --- BillDepth ---
## KnownSex emmean SE df lower.CL upper.CL
## F 8.01 0.042 121 7.93 8.10
## M 8.44 0.041 121 8.36 8.53
##
## Confidence level used: 0.95
##
## --- BillLength ---
## KnownSex emmean SE df lower.CL upper.CL
## F 24.2 0.128 121 23.9 24.4
## M 25.5 0.125 121 25.2 25.7
##
## Confidence level used: 0.95
cat("ONE-WAY MANOVA Conclusion\n\n")
## ONE-WAY MANOVA Conclusion
cat(sprintf("Wilks' Λ = %.5f\n", lambda_man))
## Wilks' Λ = 0.60850
cat(sprintf("F_calculated = %.4f (df1 = %d, df2 = %d)\n", F_man, df1_man, df2_man))
## F_calculated = 25.5214 (df1 = 3, df2 = 119)
cat(sprintf("p-value = %.6f\n\n", p_man))
## p-value = 0.000000
cat(sprintf("η²_mv = %.4f (%s effect)\n\n", eta2_mv, interp_mv))
## η²_mv = 0.3915 (large effect)
if (p_man < 0.05) {
cat("Decision: REJECT H0 (p < 0.05)\n\n")
cat("Conclusion: There is a statistically significant multivariate difference\n")
cat("in the combined mean vector of (BillWidth, BillDepth, BillLength)\n")
cat("between male (M) and female (F) Blue Jays, WITHOUT controlling for\n")
cat("any covariate (Wilks' Λ = ", round(lambda_man, 5), ", p < .05).\n")
cat("Follow-up univariate ANOVAs (Bonferroni-corrected α* = 0.0167) reveal\n")
cat("which specific DVs contribute to the group separation.\n")
} else {
cat("Decision: FAIL TO REJECT H0 (p ≥ 0.05)\n\n")
cat("Conclusion: There is NO statistically significant multivariate difference\n")
cat("in the combined mean vector of (BillWidth, BillDepth, BillLength)\n")
cat("between male and female Blue Jays (Wilks' Λ = ", round(lambda_man, 5), ", p ≥ .05).\n")
}
## Decision: REJECT H0 (p < 0.05)
##
## Conclusion: There is a statistically significant multivariate difference
## in the combined mean vector of (BillWidth, BillDepth, BillLength)
## between male (M) and female (F) Blue Jays, WITHOUT controlling for
## any covariate (Wilks' Λ = 0.6085 , p < .05).
## Follow-up univariate ANOVAs (Bonferroni-corrected α* = 0.0167) reveal
## which specific DVs contribute to the group separation.
Question: After controlling for Mass (body weight) as a covariate, does the average BillLength still differ between males and females?
Variables: - Factor (IV): Known Sex (F vs. M) - Covariate (CV): Mass - Response (DV): Bill Length (one continuous variable)
cat("=== ANCOVA Assumptions : Homogenity of Regression Slopes\n")
## === ANCOVA Assumptions : Homogenity of Regression Slopes
cat("Test: Is the slope of the BillLength ~ Mass regression the same in both groups?\n")
## Test: Is the slope of the BillLength ~ Mass regression the same in both groups?
cat("Interaction Models : BillLength ~ Mass * KnownSex\n\n")
## Interaction Models : BillLength ~ Mass * KnownSex
model_interaction <- aov(BillLength ~ Mass * KnownSex, data = df)
print(summary(model_interaction))
## Df Sum Sq Mean Sq F value Pr(>F)
## Mass 1 28.32 28.321 30.782 1.77e-07 ***
## KnownSex 1 30.72 30.722 33.393 6.16e-08 ***
## Mass:KnownSex 1 0.09 0.089 0.097 0.756
## Residuals 119 109.48 0.920
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_interaction <- summary(model_interaction)[[1]]["Mass:KnownSex", "Pr(>F)"]
cat(sprintf("\np-value interaction (Mass:KnownSex) = %.4f\n", p_interaction))
##
## p-value interaction (Mass:KnownSex) = 0.7564
if (p_interaction > 0.05) {
cat("Conclusion: p > 0.05 --> Slope is homogeneous. ANCOVA can be continued.\n\n")
} else {
cat("Conclusion: p < 0.05 --> Slope is NOT homogeneous.\n\n")
}
## Conclusion: p > 0.05 --> Slope is homogeneous. ANCOVA can be continued.
ggplot(df, aes(x = Mass, y = BillLength, color = KnownSex)) +
geom_point(alpha = 0.6, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1.2) +
scale_color_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
labs(
title = "Figure 4. Homogeneity of Slopes Test : BillLength vs Mass per KnownSex",
subtitle = "Paralel Line = slopes homogen",
x = "Mass — Covariate (gram)",
y = "BillLength — DV (mm)",
color = "Gender"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
cat("Fit Model ANCOVA\n")
## Fit Model ANCOVA
cat("Model: BillLength ~ Mass + KnownSex\n")
## Model: BillLength ~ Mass + KnownSex
model_ancova <- aov(BillLength ~ Mass + KnownSex, data = df)
print(summary(model_ancova))
## Df Sum Sq Mean Sq F value Pr(>F)
## Mass 1 28.32 28.321 31.02 1.59e-07 ***
## KnownSex 1 30.72 30.722 33.65 5.48e-08 ***
## Residuals 120 109.57 0.913
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("Table ANCOVA\n")
## Table ANCOVA
model_ancova_lm <- lm(BillLength ~ Mass + KnownSex, data = df,
contrasts = list(KnownSex = contr.sum))
tabel_ancova <- Anova(model_ancova_lm, type = "III")
print(tabel_ancova)
## Anova Table (Type III tests)
##
## Response: BillLength
## Sum Sq Df F value Pr(>F)
## (Intercept) 197.489 1 216.2822 < 2.2e-16 ***
## Mass 8.819 1 9.6587 0.002353 **
## KnownSex 30.722 1 33.6459 5.482e-08 ***
## Residuals 109.573 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SS_cov <- tabel_ancova["Mass", "Sum Sq"]
SS_factor <- tabel_ancova["KnownSex", "Sum Sq"]
SS_error <- tabel_ancova["Residuals", "Sum Sq"]
df_cov <- tabel_ancova["Mass", "Df"]
df_factor <- tabel_ancova["KnownSex", "Df"]
df_error <- tabel_ancova["Residuals", "Df"]
MS_factor <- SS_factor / df_factor
MS_error <- SS_error / df_error
F_ancova <- MS_factor / MS_error
p_ancova <- tabel_ancova["KnownSex", "Pr(>F)"]
cat("\n--- Summary of ANCOVA Components ---\n")
##
## --- Summary of ANCOVA Components ---
cat(sprintf("SS_covariate (Mass) = %.4f (df = %d)\n", SS_cov, df_cov))
## SS_covariate (Mass) = 8.8194 (df = 1)
cat(sprintf("SS_factor (KnownSex) = %.4f (df = %d)\n", SS_factor, df_factor))
## SS_factor (KnownSex) = 30.7224 (df = 1)
cat(sprintf("SS_error = %.4f (df = %d)\n", SS_error, df_error))
## SS_error = 109.5731 (df = 120)
cat(sprintf("MS_faktor = %.4f\n", MS_factor))
## MS_faktor = 30.7224
cat(sprintf("MS_error = %.4f\n", MS_error))
## MS_error = 0.9131
cat(sprintf("F_hitung = %.4f\n", F_ancova))
## F_hitung = 33.6459
cat(sprintf("p-value = %.6f\n", p_ancova))
## p-value = 0.000000
cat("Adjustment Means (Estimated Marginal Means)\n")
## Adjustment Means (Estimated Marginal Means)
cat("Average of BillLength per Group after Mass Controlled\n\n")
## Average of BillLength per Group after Mass Controlled
emm_ancova <- emmeans(model_ancova, ~ KnownSex)
print(emm_ancova)
## KnownSex emmean SE df lower.CL upper.CL
## F 24.3 0.128 120 24.0 24.5
## M 25.4 0.125 120 25.1 25.6
##
## Confidence level used: 0.95
cat("\nUnadjusted vs Adjusted Means\n")
##
## Unadjusted vs Adjusted Means
unadj <- aggregate(BillLength ~ KnownSex, data = df, FUN = mean)
adj <- as.data.frame(emm_ancova)[, c("KnownSex", "emmean")]
compare_means <- merge(unadj, adj, by = "KnownSex")
colnames(compare_means) <- c("KnownSex", "Unadjusted_Mean", "Adjusted_Mean")
compare_means$Selisih <- compare_means$Adjusted_Mean - compare_means$Unadjusted_Mean
compare_means[, 2:4] <- round(compare_means[, 2:4], 4)
print(compare_means)
## KnownSex Unadjusted_Mean Adjusted_Mean Selisih
## 1 F 24.1840 24.2899 0.1059
## 2 M 25.4624 25.3616 -0.1008
cat("\nDifference = shift due to control covariate Mass.\n")
##
## Difference = shift due to control covariate Mass.
cat("Effect Size : Partial ETA Squared (η²_p)\n\n")
## Effect Size : Partial ETA Squared (η²_p)
eta2_ancova <- eta_squared(model_ancova_lm, partial = TRUE)
print(eta2_ancova)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## -----------------------------------------
## Mass | 0.21 | [0.11, 1.00]
## KnownSex | 0.22 | [0.12, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
eta2_p_manual <- SS_factor / (SS_factor + SS_error)
cat(sprintf("\nPartial η² manual: η²_p = SS_factor / (SS_factor + SS_error)\n"))
##
## Partial η² manual: η²_p = SS_factor / (SS_factor + SS_error)
cat(sprintf(" = %.4f / (%.4f + %.4f) = %.4f\n",
SS_factor, SS_factor, SS_error, eta2_p_manual))
## = 30.7224 / (30.7224 + 109.5731) = 0.2190
cat("\nInterpretation:\n")
##
## Interpretation:
cat(" η²_p ≈ 0.01 = small effect\n")
## η²_p ≈ 0.01 = small effect
cat(" η²_p ≈ 0.06 = medium effect\n")
## η²_p ≈ 0.06 = medium effect
cat(" η²_p ≈ 0.14 = big effect\n")
## η²_p ≈ 0.14 = big effect
cat(sprintf(" η²_p = %.4f --> ", eta2_p_manual))
## η²_p = 0.2190 -->
if (eta2_p_manual >= 0.14) cat("Big Effect\n") else
if (eta2_p_manual >= 0.06) cat("Medium Effect\n") else
cat("Small Effect\n")
## Big Effect
cat("Comparison ANOVA & ANCOVA\n\n")
## Comparison ANOVA & ANCOVA
model_anova_ulang <- aov(BillLength ~ KnownSex, data = df)
ss_anova <- summary(model_anova_ulang)[[1]]
F_anova_ulang <- ss_anova["KnownSex", "F value"]
p_anova_ulang <- ss_anova["KnownSex", "Pr(>F)"]
cat("Model ANOVA (without covariate):\n")
## Model ANOVA (without covariate):
cat(sprintf(" F = %.4f | p = %.6f\n\n", F_anova_ulang, p_anova_ulang))
## F = 51.3296 | p = 0.000000
cat("Model ANCOVA (with covariate Mass):\n")
## Model ANCOVA (with covariate Mass):
cat(sprintf(" F = %.4f | p = %.6f\n\n", F_ancova, p_ancova))
## F = 33.6459 | p = 0.000000
cat("Interpretation:\n")
## Interpretation:
cat("The F difference indicates how much Mass (covariate)\n")
## The F difference indicates how much Mass (covariate)
cat(" affects the KnownSex → BillLength relationship.\n")
## affects the KnownSex → BillLength relationship.
cat("Including covariates clarifies the true effect of the factor.\n\n")
## Including covariates clarifies the true effect of the factor.
ggplot(df, aes(x = Mass, y = BillLength, color = KnownSex)) +
geom_point(alpha = 0.6, size = 2) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1.2) +
geom_hline(data = as.data.frame(emm_ancova),
aes(yintercept = emmean, color = KnownSex),
linetype = "dashed", linewidth = 0.8) +
scale_color_manual(values = c("F" = "#4E9AF1", "M" = "#F17A4E")) +
labs(
title = "Figure 5. ANCOVA: BillLength vs Mass per KnownSex",
subtitle = "Dashed line = Adjusted Mean; vertical distance between lines = factor effect",
x = "Mass — Covariate (grams)",
y = "BillLength — DV (mm)",
color = "Gender"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
cat("Conclusion One Way ANCOVA\n\n")
## Conclusion One Way ANCOVA
cat(sprintf("F_hitung (KnownSex | dikontrol Mass) = %.4f\n", F_ancova))
## F_hitung (KnownSex | dikontrol Mass) = 33.6459
cat(sprintf("p-value = %.6f\n\n", p_ancova))
## p-value = 0.000000
if (p_ancova < 0.05) {
cat("Keputusan: TOLAK H0 (p < 0.05)\n")
cat("Kesimpulan: Setelah mengontrol berat badan (Mass), terdapat perbedaan\n")
cat(" rata-rata panjang paruh (BillLength) yang signifikan antara\n")
cat(" burung Blue Jay jantan (M) dan betina (F).\n")
} else {
cat("Keputusan: GAGAL TOLAK H0 (p > 0.05)\n")
cat("Kesimpulan: Setelah mengontrol berat badan (Mass), TIDAK terdapat\n")
cat(" perbedaan rata-rata panjang paruh yang signifikan antara\n")
cat(" burung Blue Jay jantan dan betina.\n")
}
## Keputusan: TOLAK H0 (p < 0.05)
## Kesimpulan: Setelah mengontrol berat badan (Mass), terdapat perbedaan
## rata-rata panjang paruh (BillLength) yang signifikan antara
## burung Blue Jay jantan (M) dan betina (F).
Question: After controlling for Mass as a covariate, are the mean vectors (BillWidth, BillDepth, BillLength) still SIMULTANEOUSLY different between males and females?
Variables: - Factor (IV): KnownSex (F vs. M) - Covariate (CV): Mass - Response (DV): BillWidth + BillDepth + BillLength (3 DVs)
cat("Fit Model MANCOVA\n")
## Fit Model MANCOVA
cat("Model: cbind(BillWidth, BillDepth, BillLength) ~ Mass + KnownSex\n")
## Model: cbind(BillWidth, BillDepth, BillLength) ~ Mass + KnownSex
cat("PENTING: Covariate (Mass) masuk SEBELUM faktor\n\n")
## PENTING: Covariate (Mass) masuk SEBELUM faktor
dv_matrix <- as.matrix(df[, c("BillWidth", "BillDepth", "BillLength")])
model_mancova <- lm(dv_matrix ~ Mass + KnownSex, data = df)
cat("--- Koefisien Model (per DV) ---\n")
## --- Koefisien Model (per DV) ---
print(summary(model_mancova))
## Response BillWidth :
##
## Call:
## lm(formula = BillWidth ~ Mass + KnownSex, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.11125 -0.32492 -0.04305 0.33407 1.74679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.10908 0.73180 9.714 < 2e-16 ***
## Mass 0.02878 0.01044 2.756 0.00676 **
## KnownSexM 0.09631 0.09916 0.971 0.33334
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5128 on 120 degrees of freedom
## Multiple R-squared: 0.0911, Adjusted R-squared: 0.07595
## F-statistic: 6.014 on 2 and 120 DF, p-value: 0.003243
##
##
## Response BillDepth :
##
## Call:
## lm(formula = BillDepth ~ Mass + KnownSex, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72523 -0.22165 0.00835 0.20465 0.77759
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.222098 0.436822 14.244 < 2e-16 ***
## Mass 0.025646 0.006232 4.115 7.13e-05 ***
## KnownSexM 0.344594 0.059187 5.822 4.96e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3061 on 120 degrees of freedom
## Multiple R-squared: 0.3946, Adjusted R-squared: 0.3845
## F-statistic: 39.11 on 2 and 120 DF, p-value: 8.369e-14
##
##
## Response BillLength :
##
## Call:
## lm(formula = BillLength ~ Mass + KnownSex, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6483 -0.6716 -0.0936 0.6105 4.2429
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.96361 1.36357 14.641 < 2e-16 ***
## Mass 0.06046 0.01945 3.108 0.00235 **
## KnownSexM 1.07169 0.18476 5.801 5.48e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9556 on 120 degrees of freedom
## Multiple R-squared: 0.3502, Adjusted R-squared: 0.3393
## F-statistic: 32.33 on 2 and 120 DF, p-value: 5.865e-12
cat("Statistics Test Multivariate (Wilks' Lambda)\n\n")
## Statistics Test Multivariate (Wilks' Lambda)
tabel_mancova <- Anova(model_mancova, type = "III")
print(tabel_mancova)
##
## Type III MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## (Intercept) 1 0.75155 118.982 3 118 < 2.2e-16 ***
## Mass 1 0.17086 8.105 3 118 5.925e-05 ***
## KnownSex 1 0.30465 17.233 3 118 2.402e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Summary of Wilks' Lambda per Group ---\n")
##
## --- Summary of Wilks' Lambda per Group ---
cat("Matrix SSCP Covariate\n\n")
## Matrix SSCP Covariate
residuals_mancova <- residuals(model_mancova)
E_YX <- t(residuals_mancova) %*% residuals_mancova
colnames(E_YX) <- rownames(E_YX) <- c("BillWidth", "BillDepth", "BillLength")
df_error_m <- nrow(df) - nlevels(df$KnownSex) - 1 # N - k - c
cat(sprintf("--- Matriks E_Y.X (Error SSCP terkoreksi covariate) | df = %d ---\n",
df_error_m))
## --- Matriks E_Y.X (Error SSCP terkoreksi covariate) | df = 120 ---
print(round(E_YX, 4))
## BillWidth BillDepth BillLength
## BillWidth 31.5601 2.4927 10.8380
## BillDepth 2.4927 11.2449 10.1785
## BillLength 10.8380 10.1785 109.5731
heplot_res <- heplot(model_mancova,
variables = c(1, 3), # BillWidth vs BillLength
main = "Gambar 6. HE Plot MANCOVA: BillWidth vs BillLength",
col = c("blue", "red"),
lwd = 2)
cat("\nCatatan: heplot_res$E dan $H hanya 2x2 (sesuai variabel yang diplot).\n")
##
## Catatan: heplot_res$E dan $H hanya 2x2 (sesuai variabel yang diplot).
cat("Matriks 3x3 lengkap dihitung dari residuals model di atas.\n")
## Matriks 3x3 lengkap dihitung dari residuals model di atas.
cat("Wilks' Lambda Manual for MANCOVA\n\n")
## Wilks' Lambda Manual for MANCOVA
cat("Formula: Λ_A = |E_Y.X| / |E_Y.X + H_A,Y.X|\n\n")
## Formula: Λ_A = |E_Y.X| / |E_Y.X + H_A,Y.X|
model_full <- lm(dv_matrix ~ Mass + KnownSex, data = df)
model_reduced <- lm(dv_matrix ~ Mass, data = df)
RSS_full <- t(residuals(model_full)) %*% residuals(model_full)
RSS_reduced <- t(residuals(model_reduced)) %*% residuals(model_reduced)
H_YX_sex <- RSS_reduced - RSS_full
colnames(H_YX_sex) <- rownames(H_YX_sex) <- c("BillWidth", "BillDepth", "BillLength")
cat("--- Matriks H_A,Y.X (Treatment SSCP terkoreksi covariate) ---\n")
## --- Matriks H_A,Y.X (Treatment SSCP terkoreksi covariate) ---
cat(sprintf("df = %d\n", nlevels(df$KnownSex) - 1))
## df = 1
print(round(H_YX_sex, 4))
## BillWidth BillDepth BillLength
## BillWidth 0.2481 0.8878 2.7610
## BillDepth 0.8878 3.1764 9.8785
## BillLength 2.7610 9.8785 30.7224
cat("\n--- Verifikasi E_Y.X == RSS_full ---\n")
##
## --- Verifikasi E_Y.X == RSS_full ---
cat("Perbedaan maksimum (harus mendekati 0):",
round(max(abs(E_YX - RSS_full)), 8), "\n\n")
## Perbedaan maksimum (harus mendekati 0): 0
# Wilks' Lambda
det_EYX <- det(E_YX)
det_EYX_HYXS <- det(E_YX + H_YX_sex)
lambda_mancova <- det_EYX / det_EYX_HYXS
cat(sprintf("|E_Y.X| = %.6f\n", det_EYX))
## |E_Y.X| = 34165.089641
cat(sprintf("|E_Y.X + H_A,Y.X| = %.6f\n", det_EYX_HYXS))
## |E_Y.X + H_A,Y.X| = 49133.688310
cat(sprintf("Λ_A (Wilks') = %.6f\n\n", lambda_mancova))
## Λ_A (Wilks') = 0.695350
p_m <- 3
v_H_m <- nlevels(df$KnownSex) - 1 # = 1
v_E_m <- nrow(df) - nlevels(df$KnownSex) - 1 # N - k - c (1 covariate)
F_mancova <- ((1 - lambda_mancova) / lambda_mancova) *
((v_E_m + v_H_m - p_m) / p_m)
df1_m <- p_m
df2_m <- v_E_m + v_H_m - p_m
p_mancova_manual <- pf(F_mancova, df1_m, df2_m, lower.tail = FALSE)
cat("--- Transformastion Λ → F (baris: p≥1, v_H=1 dari tabel hal 33) ---\n")
## --- Transformastion Λ → F (baris: p≥1, v_H=1 dari tabel hal 33) ---
cat(sprintf("p = %d (jumlah DV)\n", p_m))
## p = 3 (jumlah DV)
cat(sprintf("v_H = %d (df perlakuan = k-1)\n", v_H_m))
## v_H = 1 (df perlakuan = k-1)
cat(sprintf("v_E = %d (df error = N-k-c)\n", v_E_m))
## v_E = 120 (df error = N-k-c)
cat(sprintf("F_hitung = %.4f (df1=%d, df2=%d)\n", F_mancova, df1_m, df2_m))
## F_hitung = 17.2329 (df1=3, df2=118)
cat(sprintf("p-value = %.6f\n\n", p_mancova_manual))
## p-value = 0.000000
# Verification with car::Anova
cat("--- Verification with car::Anova() ---\n")
## --- Verification with car::Anova() ---
tabel_car <- Anova(model_full, type = "III")
print(tabel_car)
##
## Type III MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## (Intercept) 1 0.75155 118.982 3 118 < 2.2e-16 ***
## Mass 1 0.17086 8.105 3 118 5.925e-05 ***
## KnownSex 1 0.30465 17.233 3 118 2.402e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("Follow-Up: ANCOVA Univariate per DV\n")
## Follow-Up: ANCOVA Univariate per DV
cat("(Dilakukan jika MANCOVA signifikan)\n")
## (Dilakukan jika MANCOVA signifikan)
cat(sprintf("Bonferroni Correction: α* = 0.05 / 3 = %.4f\n\n", 0.05 / 3))
## Bonferroni Correction: α* = 0.05 / 3 = 0.0167
alpha_bonf_m <- 0.05 / 3
dv_names <- c("BillWidth", "BillDepth", "BillLength")
results_fu <- data.frame(DV = character(), F_hitung = numeric(),
p_value = numeric(), Signifikan = character(),
eta2_p = numeric(), stringsAsFactors = FALSE)
for (dv in dv_names) {
formula_fu <- as.formula(paste(dv, "~ Mass + KnownSex"))
m_fu <- aov(formula_fu, data = df)
tbl_fu <- summary(m_fu)[[1]]
f_fu <- tbl_fu["KnownSex", "F value"]
p_fu <- tbl_fu["KnownSex", "Pr(>F)"]
ss_f <- tbl_fu["KnownSex", "Sum Sq"]
ss_e <- tbl_fu["Residuals", "Sum Sq"]
eta2_fu <- ss_f / (ss_f + ss_e)
sig_fu <- ifelse(p_fu < alpha_bonf_m, "SIGNIFICANT *", "no significant")
results_fu <- rbind(results_fu,
data.frame(DV = dv, F_hitung = round(f_fu, 4),
p_value = round(p_fu, 6),
Signifikan = sig_fu,
eta2_p = round(eta2_fu, 4)))
}
print(results_fu)
## DV F_hitung p_value Signifikan eta2_p
## 1 BillWidth 0.9435 0.333344 no significant 0.0078
## 2 BillDepth 33.8967 0.000000 SIGNIFICANT * 0.2203
## 3 BillLength 33.6459 0.000000 SIGNIFICANT * 0.2190
cat("\nSignificant DVs contribute to multivariate differences.\n")
##
## Significant DVs contribute to multivariate differences.
cat("Adjusted Means per DV (after control Mass) ===\n\n")
## Adjusted Means per DV (after control Mass) ===
for (dv in dv_names) {
formula_emm <- as.formula(paste(dv, "~ Mass + KnownSex"))
m_emm <- lm(formula_emm, data = df)
emm_out <- emmeans(m_emm, ~ KnownSex)
cat(sprintf("--- %s ---\n", dv))
print(emm_out)
cat("\n")
}
## --- BillWidth ---
## KnownSex emmean SE df lower.CL upper.CL
## F 9.17 0.0687 120 9.03 9.3
## M 9.26 0.0669 120 9.13 9.4
##
## Confidence level used: 0.95
##
## --- BillDepth ---
## KnownSex emmean SE df lower.CL upper.CL
## F 8.06 0.0410 120 7.98 8.14
## M 8.40 0.0399 120 8.32 8.48
##
## Confidence level used: 0.95
##
## --- BillLength ---
## KnownSex emmean SE df lower.CL upper.CL
## F 24.3 0.128 120 24.0 24.5
## M 25.4 0.125 120 25.1 25.6
##
## Confidence level used: 0.95
cat("ONE-WAY MANCOVA Conclusion\n\n")
## ONE-WAY MANCOVA Conclusion
cat(sprintf("Wilks' Λ = %.5f\n", lambda_mancova))
## Wilks' Λ = 0.69535
cat(sprintf("F_calculated = %.4f (df1=%d, df2=%d)\n", F_mancova, df1_m, df2_m))
## F_calculated = 17.2329 (df1=3, df2=118)
cat(sprintf("p-value = %.6f\n\n", p_mancova_manual))
## p-value = 0.000000
if (p_mancova_manual < 0.05) {
cat("Decision: REJECT H0 (p < 0.05)\n\n")
cat("Conclusion: After controlling for body weight (Mass) as a covariate,\n")
cat("there is a difference in the mean vectors (BillWidth, BillDepth, BillLength)\n")
cat("multivariately significant difference between male Blue Jays (M)\n")
cat("and females (F).\n")
} else {
cat("Decision: FAIL TO REJECT H0 (p > 0.05)\n\n")
cat("Conclusion: After controlling for body weight (Mass) as a covariate,\n")
cat("There is NO multivariately significant difference in mean vectors\n")
cat("between male and female Blue Jays.\n")
}
## Decision: REJECT H0 (p < 0.05)
##
## Conclusion: After controlling for body weight (Mass) as a covariate,
## there is a difference in the mean vectors (BillWidth, BillDepth, BillLength)
## multivariately significant difference between male Blue Jays (M)
## and females (F).
cat("\nOverall Summary\n")
##
## Overall Summary
cat("ANOVA: Effect of KnownSex on 1 DV (BillLength), without covariates\n")
## ANOVA: Effect of KnownSex on 1 DV (BillLength), without covariates
cat("MANOVA: Effect of KnownSex on 3 DVs simultaneously, without covariates\n")
## MANOVA: Effect of KnownSex on 3 DVs simultaneously, without covariates
cat("ANCOVA: Effect of KnownSex on 1 DV (BillLength), controlled Mass\n")
## ANCOVA: Effect of KnownSex on 1 DV (BillLength), controlled Mass
cat(" MANCOVA : Effect of KnownSex on 3 DVs simultaneously, controlled for Mass\n")
## MANCOVA : Effect of KnownSex on 3 DVs simultaneously, controlled for Mass