1. Persiapan Data

library(car)
library(lmtest)
library(ggplot2)
library(gridExtra)
library(heplots)   
library(MVN)       

raw <- read.csv("Penguindata.csv", check.names = FALSE)

df <- raw[, c("Species", "Culmen Length (mm)", "Culmen Depth (mm)",
              "Flipper Length (mm)", "Body Mass (g)")]
names(df) <- c("species", "bill_length_mm", "bill_depth_mm",
               "flipper_length_mm", "body_mass_g")

df <- df[complete.cases(df), ]
df$species <- gsub("Adelie Penguin.*",    "Adelie",    df$species)
df$species <- gsub("Chinstrap penguin.*", "Chinstrap", df$species)
df$species <- gsub("Gentoo penguin.*",    "Gentoo",    df$species)
df$species <- factor(df$species)

set.seed(42)
idx <- c(sample(which(df$species == "Adelie"),    50),
         sample(which(df$species == "Chinstrap"), 50),
         sample(which(df$species == "Gentoo"),    50))
df <- df[idx, ]
rownames(df) <- NULL


df$bill_length_mm <- sqrt(df$bill_length_mm)
df$bill_depth_mm  <- sqrt(df$bill_depth_mm)
df$body_mass_g    <- sqrt(df$body_mass_g)

cat("Total observasi:", nrow(df), "\n")
## Total observasi: 150
print(table(df$species))
## 
##    Adelie Chinstrap    Gentoo 
##        50        50        50

Catatan pra-pemrosesan:

  1. flipper_length_mm dikeluarkan dari DV karena melanggar Asumsi 3 (Durbin-Watson, p = 0.019) dan Asumsi 5 (Homogeneity of Slopes, p = 0.020).

  2. Subsample seimbang 50 observasi per species (total n = 150) diterapkan karena Box’s M sangat sensitif pada n besar dengan n = 342 perbedaan kecil antar matriks kovarians sudah terdeteksi signifikan meskipun secara praktis tidak bermakna.

  3. Transformasi Square Root diterapkan pada semua variabel numerik untuk menstabilkan variansi dan mendekatkan distribusi ke normal.

DV final: bill_length_mm (sqrt) dan bill_depth_mm (sqrt)


2. Struktur Variabel

Peran Variabel
Dependent Variable 1 sqrt(bill_length_mm)
Dependent Variable 2 sqrt(bill_depth_mm)
Independent Variable species (Adelie, Chinstrap, Gentoo)
Covariate sqrt(body_mass_g)

3. Uji Asumsi MANCOVA

Karena asumsi MANCOVA mencakup semua asumsi ANCOVA dan MANOVA, pengujian dilakukan satu kali dan berlaku untuk ketiga analisis.

Asumsi 1 — Linearity

p1 <- ggplot(df, aes(x = body_mass_g, y = bill_length_mm, color = species)) +
  geom_point(alpha = 0.5, size = 1.8) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Asumsi 1a: sqrt(bill_length) vs sqrt(body_mass)",
       x = "sqrt(Body Mass)", y = "sqrt(Bill Length)") +
  theme_minimal()

p2 <- ggplot(df, aes(x = body_mass_g, y = bill_depth_mm, color = species)) +
  geom_point(alpha = 0.5, size = 1.8) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Asumsi 1b: sqrt(bill_depth) vs sqrt(body_mass)",
       x = "sqrt(Body Mass)", y = "sqrt(Bill Depth)") +
  theme_minimal()

grid.arrange(p1, p2, ncol = 2)

Interpretasi: Garis regresi per species mengikuti pola linear → Asumsi 1 TERPENUHI ✓


Asumsi 2 — Homogeneity of Error Variances

  • ANCOVA → Levene’s Test (1 DV)
  • MANOVA & MANCOVA → Box’s M Test via package heplots
Y <- cbind(df$bill_length_mm, df$bill_depth_mm)

lev <- leveneTest(bill_length_mm ~ species, data = df)
print(lev)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   2  0.3215 0.7255
##       147
boxm <- boxM(Y, df$species)
print(boxm)
## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  Y by df$species 
## Chi-Sq (approx.) = 5.7291, df = 6, p-value = 0.4542

Interpretasi:

  • ANCOVA Levene’s: p = 0.7255 > 0.05 → TERPENUHI ✓
  • MANOVA/MANCOVA Box’s M: p = 0.4542 > 0.05 → TERPENUHI ✓

Asumsi 3 — Independence of Error Terms

# ANCOVA
dw_anc <- dwtest(lm(bill_length_mm ~ species + body_mass_g, data = df))

# MANOVA
dw_mv1 <- dwtest(lm(bill_length_mm ~ species, data = df))
dw_mv2 <- dwtest(lm(bill_depth_mm  ~ species, data = df))

# MANCOVA
dw_mc1 <- dwtest(lm(bill_length_mm ~ species + body_mass_g, data = df))
dw_mc2 <- dwtest(lm(bill_depth_mm  ~ species + body_mass_g, data = df))

cat("ANCOVA")
## ANCOVA
cat(sprintf("bill_length: DW = %.4f, p = %.4f\n",
            dw_anc$statistic, dw_anc$p.value))
## bill_length: DW = 1.8108, p = 0.0913
cat("MANOVA")
## MANOVA
cat(sprintf("bill_length: DW = %.4f, p = %.4f\n",
            dw_mv1$statistic, dw_mv1$p.value))
## bill_length: DW = 1.9035, p = 0.2237
cat(sprintf("bill_depth:  DW = %.4f, p = %.4f\n",
            dw_mv2$statistic, dw_mv2$p.value))
## bill_depth:  DW = 1.9907, p = 0.4123
cat("MANCOVA")
## MANCOVA
cat(sprintf("bill_length: DW = %.4f, p = %.4f\n",
            dw_mc1$statistic, dw_mc1$p.value))
## bill_length: DW = 1.8108, p = 0.0913
cat(sprintf("bill_depth:  DW = %.4f, p = %.4f\n",
            dw_mc2$statistic, dw_mc2$p.value))
## bill_depth:  DW = 2.2482, p = 0.9138

Interpretasi: Semua DW ≈ 2 dan p > 0.05 → Asumsi 3 TERPENUHI ✓ (ANCOVA, MANOVA, MANCOVA)


Asumsi 4 — Normality of Error Terms

  • ANCOVA → Shapiro-Wilk pada residual univariat
  • MANOVA & MANCOVA → Mardia’s Test via package MVN
model_ancova  <- aov(bill_length_mm ~ species + body_mass_g, data = df)
model_manova  <- manova(cbind(bill_length_mm, bill_depth_mm) ~ species, data = df)
model_mancova <- lm(cbind(bill_length_mm, bill_depth_mm) ~
                      species + body_mass_g, data = df)

res_ancova  <- residuals(model_ancova)
res_manova  <- as.data.frame(residuals(model_manova))
res_mancova <- as.data.frame(residuals(model_mancova))

# ANCOVA: Shapiro-Wilk
cat("ANCOVA: Shapiro-Wilk ")
## ANCOVA: Shapiro-Wilk
sw_anc <- shapiro.test(res_ancova)
print(sw_anc)
## 
##  Shapiro-Wilk normality test
## 
## data:  res_ancova
## W = 0.98773, p-value = 0.2098
# Q-Q Plot ANCOVA
qqnorm(res_ancova, main = "Q-Q Plot: Residual ANCOVA (bill_length)")
qqline(res_ancova, col = "red", lwd = 2)

cat("MANOVA: Mardia's Test (MVN)")
## MANOVA: Mardia's Test (MVN)
mv_mv <- mvn(res_manova, mvn_test = "mardia")
print(mv_mv$multivariate_normality)
##              Test Statistic p.value     Method      MVN
## 1 Mardia Skewness     4.659   0.324 asymptotic ✓ Normal
## 2 Mardia Kurtosis     0.316   0.752 asymptotic ✓ Normal
cat("MANCOVA: Mardia's Test (MVN)")
## MANCOVA: Mardia's Test (MVN)
mv_mc <- mvn(res_mancova, mvn_test = "mardia")
print(mv_mc$multivariate_normality)
##              Test Statistic p.value     Method      MVN
## 1 Mardia Skewness     4.267   0.371 asymptotic ✓ Normal
## 2 Mardia Kurtosis     0.554   0.579 asymptotic ✓ Normal
# Q-Q Plot multivariat MANCOVA
par(mfrow = c(1, 2))
qqnorm(res_mancova[, 1], main = "Q-Q: bill_length residuals (MANCOVA)")
qqline(res_mancova[, 1], col = "red", lwd = 2)
qqnorm(res_mancova[, 2], main = "Q-Q: bill_depth residuals (MANCOVA)")
qqline(res_mancova[, 2], col = "red", lwd = 2)

par(mfrow = c(1, 1))

Interpretasi:

  • ANCOVA SW: p = 0.2098 > 0.05 → TERPENUHI ✓
  • MANOVA Mardia: p_skew = 0.324 p_kurt = 0.752 → TERPENUHI ✓
  • MANCOVA p_skew = 0.324 p_kurt = 0.752 → TERPENUHI ✓

Asumsi 5 — Homogeneity of Regression Slopes

Berlaku untuk ANCOVA dan MANCOVA. Uji interaksi species × body_mass_g — ingin p > 0.05 (slopes paralel).

a_anc <- summary(aov(bill_length_mm ~ species * body_mass_g, data = df))[[1]]

a_mc1 <- Anova(lm(bill_length_mm ~ species * body_mass_g, data = df), type = "III")
a_mc2 <- Anova(lm(bill_depth_mm  ~ species * body_mass_g, data = df), type = "III")

cat("ANCOVA: Uji Interaksi")
## ANCOVA: Uji Interaksi
cat(sprintf("species:body_mass_g  F = %.4f, p = %.4f\n",
    a_anc["species:body_mass_g", "F value"],
    a_anc["species:body_mass_g", "Pr(>F)"]))
## species:body_mass_g  F = 0.0354, p = 0.9652
cat(" MANCOVA: Uji Interaksi ")
##  MANCOVA: Uji Interaksi
cat(sprintf("bill_length: F = %.4f, p = %.4f\n",
    a_mc1["species:body_mass_g", "F value"],
    a_mc1["species:body_mass_g", "Pr(>F)"]))
## bill_length: F = 0.0354, p = 0.9652
cat(sprintf("bill_depth:  F = %.4f, p = %.4f\n",
    a_mc2["species:body_mass_g", "F value"],
    a_mc2["species:body_mass_g", "Pr(>F)"]))
## bill_depth:  F = 1.3485, p = 0.2629
# Visual slopes
p3 <- ggplot(df, aes(x = body_mass_g, y = bill_length_mm, color = species)) +
  geom_point(alpha = 0.4, size = 1.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1.2) +
  labs(title = "Asumsi 5a: Slopes bill_length",
       subtitle = "Slopes harus paralel antar species",
       x = "sqrt(Body Mass)", y = "sqrt(Bill Length)") +
  theme_minimal()

p4 <- ggplot(df, aes(x = body_mass_g, y = bill_depth_mm, color = species)) +
  geom_point(alpha = 0.4, size = 1.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1.2) +
  labs(title = "Asumsi 5b: Slopes bill_depth",
       subtitle = "Slopes harus paralel antar species",
       x = "sqrt(Body Mass)", y = "sqrt(Bill Depth)") +
  theme_minimal()

grid.arrange(p3, p4, ncol = 2)

Interpretasi:

  • ANCOVA bill_length: p = 0.9652 > 0.05 → TERPENUHI ✓
  • MANCOVA bill_length: p = 0.9652 > 0.05 → TERPENUHI ✓
  • MANCOVA bill_depth: p = 0.2629 > 0.05 → TERPENUHI ✓

4. Ringkasan Uji Asumsi

Ringkasan Hasil Uji Asumsi — Semua Terpenuhi ✓
No Asumsi Uji ANCOVA Uji MANOVA Uji MANCOVA Hasil
1 Linearity Visual Plot Visual Plot Visual Plot
2 Homogeneity of Error Variances Levene’s Test Box’s M Test (heplots) Box’s M Test (heplots)
3 Independence of Error Terms Durbin-Watson Durbin-Watson per DV Durbin-Watson per DV
4 Normality of Error Terms Shapiro-Wilk (univariat) Mardia’s Test (MVN) Mardia’s Test (MVN)
5 Homogeneity of Regression Slopes Uji Interaksi (Type I) N/A Uji Interaksi (Type III) per DV

Kesimpulan: Seluruh asumsi MANCOVA terpenuhi setelah: (1) mengeluarkan flipper_length_mm, (2) subsample seimbang 50/species, (3) transformasi square root. Karena MANCOVA adalah model paling kompleks yang mencakup MANOVA dan ANCOVA, maka ketiga analisis valid untuk dilakukan.


5. Hasil Analisis

5.1 ANCOVA

model_ancova <- aov(bill_length_mm ~ species + body_mass_g, data = df)
summary(model_ancova)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## species       2 17.010   8.505  224.07  < 2e-16 ***
## body_mass_g   1  2.643   2.643   69.64 4.95e-14 ***
## Residuals   146  5.542   0.038                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.2 MANOVA

Y <- cbind(df$bill_length_mm, df$bill_depth_mm)
colnames(Y) <- c("bill_length", "bill_depth")
model_manova <- manova(Y ~ species, data = df)

print(summary(model_manova, test = "Pillai"))
##            Df Pillai approx F num Df den Df    Pr(>F)    
## species     2 1.3358    147.8      4    294 < 2.2e-16 ***
## Residuals 147                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(summary(model_manova, test = "Wilks"))
##            Df    Wilks approx F num Df den Df    Pr(>F)    
## species     2 0.071182   200.61      4    292 < 2.2e-16 ***
## Residuals 147                                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(summary(model_manova, test = "Hotelling"))
##            Df Hotelling-Lawley approx F num Df den Df    Pr(>F)    
## species     2           7.3317   265.77      4    290 < 2.2e-16 ***
## Residuals 147                                                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(summary(model_manova, test = "Roy"))
##            Df    Roy approx F num Df den Df    Pr(>F)    
## species     2 6.4446   473.68      2    147 < 2.2e-16 ***
## Residuals 147                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(summary.aov(model_manova))
##  Response bill_length :
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## species       2 17.0096  8.5048  152.75 < 2.2e-16 ***
## Residuals   147  8.1846  0.0557                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response bill_depth :
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## species       2 5.8431 2.92157  164.08 < 2.2e-16 ***
## Residuals   147 2.6174 0.01781                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.3 MANCOVA

mv <- manova(lm(cbind(bill_length_mm, bill_depth_mm) ~
                  species + body_mass_g, data = df))

print(summary(mv, test = "Pillai"))
##              Df  Pillai approx F num Df den Df    Pr(>F)    
## species       2 1.48764  211.959      4    292 < 2.2e-16 ***
## body_mass_g   1 0.46366   62.675      2    145 < 2.2e-16 ***
## Residuals   146                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(summary(mv, test = "Wilks"))
##              Df   Wilks approx F num Df den Df    Pr(>F)    
## species       2 0.05049  250.166      4    290 < 2.2e-16 ***
## body_mass_g   1 0.53634   62.675      2    145 < 2.2e-16 ***
## Residuals   146                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1