library(car)
library(lmtest)
library(ggplot2)
library(gridExtra)
library(heplots)
library(MVN)
raw <- read.csv("Penguindata.csv", check.names = FALSE)
df <- raw[, c("Species", "Culmen Length (mm)", "Culmen Depth (mm)",
"Flipper Length (mm)", "Body Mass (g)")]
names(df) <- c("species", "bill_length_mm", "bill_depth_mm",
"flipper_length_mm", "body_mass_g")
df <- df[complete.cases(df), ]
df$species <- gsub("Adelie Penguin.*", "Adelie", df$species)
df$species <- gsub("Chinstrap penguin.*", "Chinstrap", df$species)
df$species <- gsub("Gentoo penguin.*", "Gentoo", df$species)
df$species <- factor(df$species)
set.seed(42)
idx <- c(sample(which(df$species == "Adelie"), 50),
sample(which(df$species == "Chinstrap"), 50),
sample(which(df$species == "Gentoo"), 50))
df <- df[idx, ]
rownames(df) <- NULL
df$bill_length_mm <- sqrt(df$bill_length_mm)
df$bill_depth_mm <- sqrt(df$bill_depth_mm)
df$body_mass_g <- sqrt(df$body_mass_g)
cat("Total observasi:", nrow(df), "\n")## Total observasi: 150
##
## Adelie Chinstrap Gentoo
## 50 50 50
Catatan pra-pemrosesan:
flipper_length_mmdikeluarkan dari DV karena melanggar Asumsi 3 (Durbin-Watson, p = 0.019) dan Asumsi 5 (Homogeneity of Slopes, p = 0.020).Subsample seimbang 50 observasi per species (total n = 150) diterapkan karena Box’s M sangat sensitif pada n besar dengan n = 342 perbedaan kecil antar matriks kovarians sudah terdeteksi signifikan meskipun secara praktis tidak bermakna.
Transformasi Square Root diterapkan pada semua variabel numerik untuk menstabilkan variansi dan mendekatkan distribusi ke normal.
DV final: bill_length_mm (sqrt) dan bill_depth_mm (sqrt)
| Peran | Variabel |
|---|---|
| Dependent Variable 1 | sqrt(bill_length_mm) |
| Dependent Variable 2 | sqrt(bill_depth_mm) |
| Independent Variable | species (Adelie, Chinstrap, Gentoo) |
| Covariate | sqrt(body_mass_g) |
Karena asumsi MANCOVA mencakup semua asumsi ANCOVA dan MANOVA, pengujian dilakukan satu kali dan berlaku untuk ketiga analisis.
p1 <- ggplot(df, aes(x = body_mass_g, y = bill_length_mm, color = species)) +
geom_point(alpha = 0.5, size = 1.8) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "Asumsi 1a: sqrt(bill_length) vs sqrt(body_mass)",
x = "sqrt(Body Mass)", y = "sqrt(Bill Length)") +
theme_minimal()
p2 <- ggplot(df, aes(x = body_mass_g, y = bill_depth_mm, color = species)) +
geom_point(alpha = 0.5, size = 1.8) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "Asumsi 1b: sqrt(bill_depth) vs sqrt(body_mass)",
x = "sqrt(Body Mass)", y = "sqrt(Bill Depth)") +
theme_minimal()
grid.arrange(p1, p2, ncol = 2)Interpretasi: Garis regresi per species mengikuti pola linear → Asumsi 1 TERPENUHI ✓
heplotsY <- cbind(df$bill_length_mm, df$bill_depth_mm)
lev <- leveneTest(bill_length_mm ~ species, data = df)
print(lev)## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.3215 0.7255
## 147
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: Y by df$species
## Chi-Sq (approx.) = 5.7291, df = 6, p-value = 0.4542
Interpretasi:
# ANCOVA
dw_anc <- dwtest(lm(bill_length_mm ~ species + body_mass_g, data = df))
# MANOVA
dw_mv1 <- dwtest(lm(bill_length_mm ~ species, data = df))
dw_mv2 <- dwtest(lm(bill_depth_mm ~ species, data = df))
# MANCOVA
dw_mc1 <- dwtest(lm(bill_length_mm ~ species + body_mass_g, data = df))
dw_mc2 <- dwtest(lm(bill_depth_mm ~ species + body_mass_g, data = df))
cat("ANCOVA")## ANCOVA
## bill_length: DW = 1.8108, p = 0.0913
## MANOVA
## bill_length: DW = 1.9035, p = 0.2237
## bill_depth: DW = 1.9907, p = 0.4123
## MANCOVA
## bill_length: DW = 1.8108, p = 0.0913
## bill_depth: DW = 2.2482, p = 0.9138
Interpretasi: Semua DW ≈ 2 dan p > 0.05 → Asumsi 3 TERPENUHI ✓ (ANCOVA, MANOVA, MANCOVA)
MVNmodel_ancova <- aov(bill_length_mm ~ species + body_mass_g, data = df)
model_manova <- manova(cbind(bill_length_mm, bill_depth_mm) ~ species, data = df)
model_mancova <- lm(cbind(bill_length_mm, bill_depth_mm) ~
species + body_mass_g, data = df)
res_ancova <- residuals(model_ancova)
res_manova <- as.data.frame(residuals(model_manova))
res_mancova <- as.data.frame(residuals(model_mancova))
# ANCOVA: Shapiro-Wilk
cat("ANCOVA: Shapiro-Wilk ")## ANCOVA: Shapiro-Wilk
##
## Shapiro-Wilk normality test
##
## data: res_ancova
## W = 0.98773, p-value = 0.2098
# Q-Q Plot ANCOVA
qqnorm(res_ancova, main = "Q-Q Plot: Residual ANCOVA (bill_length)")
qqline(res_ancova, col = "red", lwd = 2)## MANOVA: Mardia's Test (MVN)
## Test Statistic p.value Method MVN
## 1 Mardia Skewness 4.659 0.324 asymptotic ✓ Normal
## 2 Mardia Kurtosis 0.316 0.752 asymptotic ✓ Normal
## MANCOVA: Mardia's Test (MVN)
## Test Statistic p.value Method MVN
## 1 Mardia Skewness 4.267 0.371 asymptotic ✓ Normal
## 2 Mardia Kurtosis 0.554 0.579 asymptotic ✓ Normal
# Q-Q Plot multivariat MANCOVA
par(mfrow = c(1, 2))
qqnorm(res_mancova[, 1], main = "Q-Q: bill_length residuals (MANCOVA)")
qqline(res_mancova[, 1], col = "red", lwd = 2)
qqnorm(res_mancova[, 2], main = "Q-Q: bill_depth residuals (MANCOVA)")
qqline(res_mancova[, 2], col = "red", lwd = 2)Interpretasi:
Berlaku untuk ANCOVA dan MANCOVA.
Uji interaksi species × body_mass_g — ingin p > 0.05
(slopes paralel).
a_anc <- summary(aov(bill_length_mm ~ species * body_mass_g, data = df))[[1]]
a_mc1 <- Anova(lm(bill_length_mm ~ species * body_mass_g, data = df), type = "III")
a_mc2 <- Anova(lm(bill_depth_mm ~ species * body_mass_g, data = df), type = "III")
cat("ANCOVA: Uji Interaksi")## ANCOVA: Uji Interaksi
cat(sprintf("species:body_mass_g F = %.4f, p = %.4f\n",
a_anc["species:body_mass_g", "F value"],
a_anc["species:body_mass_g", "Pr(>F)"]))## species:body_mass_g F = 0.0354, p = 0.9652
## MANCOVA: Uji Interaksi
cat(sprintf("bill_length: F = %.4f, p = %.4f\n",
a_mc1["species:body_mass_g", "F value"],
a_mc1["species:body_mass_g", "Pr(>F)"]))## bill_length: F = 0.0354, p = 0.9652
cat(sprintf("bill_depth: F = %.4f, p = %.4f\n",
a_mc2["species:body_mass_g", "F value"],
a_mc2["species:body_mass_g", "Pr(>F)"]))## bill_depth: F = 1.3485, p = 0.2629
# Visual slopes
p3 <- ggplot(df, aes(x = body_mass_g, y = bill_length_mm, color = species)) +
geom_point(alpha = 0.4, size = 1.5) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1.2) +
labs(title = "Asumsi 5a: Slopes bill_length",
subtitle = "Slopes harus paralel antar species",
x = "sqrt(Body Mass)", y = "sqrt(Bill Length)") +
theme_minimal()
p4 <- ggplot(df, aes(x = body_mass_g, y = bill_depth_mm, color = species)) +
geom_point(alpha = 0.4, size = 1.5) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1.2) +
labs(title = "Asumsi 5b: Slopes bill_depth",
subtitle = "Slopes harus paralel antar species",
x = "sqrt(Body Mass)", y = "sqrt(Bill Depth)") +
theme_minimal()
grid.arrange(p3, p4, ncol = 2)Interpretasi:
| No | Asumsi | Uji ANCOVA | Uji MANOVA | Uji MANCOVA | Hasil |
|---|---|---|---|---|---|
| 1 | Linearity | Visual Plot | Visual Plot | Visual Plot | ✓ |
| 2 | Homogeneity of Error Variances | Levene’s Test | Box’s M Test (heplots) | Box’s M Test (heplots) | ✓ |
| 3 | Independence of Error Terms | Durbin-Watson | Durbin-Watson per DV | Durbin-Watson per DV | ✓ |
| 4 | Normality of Error Terms | Shapiro-Wilk (univariat) | Mardia’s Test (MVN) | Mardia’s Test (MVN) | ✓ |
| 5 | Homogeneity of Regression Slopes | Uji Interaksi (Type I) | N/A | Uji Interaksi (Type III) per DV | ✓ |
Kesimpulan: Seluruh asumsi MANCOVA terpenuhi setelah: (1) mengeluarkan
flipper_length_mm, (2) subsample seimbang 50/species, (3) transformasi square root. Karena MANCOVA adalah model paling kompleks yang mencakup MANOVA dan ANCOVA, maka ketiga analisis valid untuk dilakukan.
## Df Sum Sq Mean Sq F value Pr(>F)
## species 2 17.010 8.505 224.07 < 2e-16 ***
## body_mass_g 1 2.643 2.643 69.64 4.95e-14 ***
## Residuals 146 5.542 0.038
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Y <- cbind(df$bill_length_mm, df$bill_depth_mm)
colnames(Y) <- c("bill_length", "bill_depth")
model_manova <- manova(Y ~ species, data = df)
print(summary(model_manova, test = "Pillai"))## Df Pillai approx F num Df den Df Pr(>F)
## species 2 1.3358 147.8 4 294 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Df Wilks approx F num Df den Df Pr(>F)
## species 2 0.071182 200.61 4 292 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Df Hotelling-Lawley approx F num Df den Df Pr(>F)
## species 2 7.3317 265.77 4 290 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Df Roy approx F num Df den Df Pr(>F)
## species 2 6.4446 473.68 2 147 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Response bill_length :
## Df Sum Sq Mean Sq F value Pr(>F)
## species 2 17.0096 8.5048 152.75 < 2.2e-16 ***
## Residuals 147 8.1846 0.0557
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response bill_depth :
## Df Sum Sq Mean Sq F value Pr(>F)
## species 2 5.8431 2.92157 164.08 < 2.2e-16 ***
## Residuals 147 2.6174 0.01781
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mv <- manova(lm(cbind(bill_length_mm, bill_depth_mm) ~
species + body_mass_g, data = df))
print(summary(mv, test = "Pillai"))## Df Pillai approx F num Df den Df Pr(>F)
## species 2 1.48764 211.959 4 292 < 2.2e-16 ***
## body_mass_g 1 0.46366 62.675 2 145 < 2.2e-16 ***
## Residuals 146
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Df Wilks approx F num Df den Df Pr(>F)
## species 2 0.05049 250.166 4 290 < 2.2e-16 ***
## body_mass_g 1 0.53634 62.675 2 145 < 2.2e-16 ***
## Residuals 146
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1