Modul 2

import library dan import data

library(tidyverse)
library(car)
library(rstatix)
library(biotools)
library(psych)

df <- read.csv("hsb2.csv")

glimpse(df)

## Rows: 200
## Columns: 11
## $ id      <int> 70, 121, 86, 141, 172, 113, 50, 11, 84, 48, 75, 60, 95, 104, 3…
## $ female  <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ race    <int> 4, 4, 4, 4, 4, 4, 3, 1, 4, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4,…
## $ ses     <int> 1, 2, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 1, 1, 3, 2, 3, 2, 2,…
## $ schtyp  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,…
## $ prog    <int> 1, 3, 1, 3, 2, 2, 1, 2, 1, 2, 3, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1,…
## $ read    <int> 57, 68, 44, 63, 47, 44, 50, 34, 63, 57, 60, 57, 73, 54, 45, 42…
## $ write   <int> 52, 59, 33, 44, 52, 52, 59, 46, 57, 55, 46, 65, 60, 63, 57, 49…
## $ math    <int> 41, 53, 54, 47, 57, 51, 42, 45, 54, 52, 51, 51, 71, 57, 50, 43…
## $ science <int> 47, 63, 58, 53, 53, 63, 53, 39, 58, 50, 53, 63, 61, 55, 31, 50…
## $ socst   <int> 57, 61, 31, 56, 61, 61, 61, 36, 51, 51, 61, 61, 71, 46, 56, 56…

names(df)

##  [1] "id"      "female"  "race"    "ses"     "schtyp"  "prog"    "read"   
##  [8] "write"   "math"    "science" "socst"

memastikan apakah dalam data tersebut terdapat missing value

colSums(is.na(df))

##      id  female    race     ses  schtyp    prog    read   write    math science 
##       0       0       0       0       0       0       0       0       0       0 
##   socst 
##       0

Asumsi

dv <- c("read", "write", "math")
iv <- "prog"
covar <- "science"

X <- na.omit(df[, dv])

1. Mutlivariat normal

mardia_result <- psych::mardia(X)

mardia_result

## Call: psych::mardia(x = X)
## 
## Mardia tests of multivariate skew and kurtosis
## Use describe(x) the to get univariate tests
## n.obs = 200   num.vars =  3 
## b1p =  0.52   skew =  17.25  with probability  <=  0.069
##  small sample skew =  17.64  with probability <=  0.061
## b2p =  13.22   kurtosis =  -2.3  with probability <=  0.021

pvalue skewness= 0.061 dengan p walue kurtosis = 0.021 (salah satunya memenuhi)

Homogenitas

boxM(df[, dv], df$prog)

## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  df[, dv]
## Chi-Sq (approx.) = 16.574, df = 12, p-value = 0.1663

p-value = 0.1663 > 0.05 (memenuhi asusmsi)

DV Independensi Bartlett

R <- cor(X)

bartlett_result <- cortest.bartlett(
  R,
  n = nrow(X)
)

bartlett_result

## $chisq
## [1] 229.6047
## 
## $p.value
## [1] 1.683784e-49
## 
## $df
## [1] 3

p-value (1.683784e-49) < 0.05 artinya memenuhi asumsi

4. Cov DV linier

par(mfrow=c(1,3))

for (y in dv) {
  plot(
    df[[covar]],
    df[[y]],
    xlab = "science",
    ylab = y,
    main = paste("science vs", y)
  )

  abline(
    lm(df[[y]] ~ df[[covar]]),
    col = "red"
  )
}

MANOVA

manova_model <- manova(cbind(read, write, math) ~ prog, data = df)

summary(manova_model, test = "Pillai")

##            Df   Pillai approx F num Df den Df  Pr(>F)  
## prog        1 0.035319    2.392      3    196 0.06984 .
## Residuals 198                                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

P-value = 2.304e-10 < 0.05 yang artinya terdapat perbedaan signifikan pada nilai read, write, dan math berdasarkan prog.

summary.aov(manova_model)

##  Response read :
##              Df  Sum Sq Mean Sq F value  Pr(>F)  
## prog          1   381.1  381.10   3.674 0.05671 .
## Residuals   198 20538.3  103.73                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response write :
##              Df  Sum Sq Mean Sq F value  Pr(>F)  
## prog          1   586.4  586.42  6.7146 0.01027 *
## Residuals   198 17292.5   87.34                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response math :
##              Df  Sum Sq Mean Sq F value  Pr(>F)  
## prog          1   393.5  393.53   4.564 0.03388 *
## Residuals   198 17072.3   86.22                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Read p = 4.283e-09 < 0.05 (signifikan)

Write p = 4.31e-09 < 0.05, (signifikan)

Math p = 7.364e-12 < 0.05, (signifikan)

Hasil uji univariat menunjukkan bahwa seluruh variabel dependen, yaitu read, write, dan math, memiliki perbedaan yang signifikan berdasarkan kelompok prog.

Mancova

slope_model <- manova(cbind(read, write, math) ~ prog * science, data = df)

summary(slope_model, test = "Pillai")

##               Df  Pillai approx F num Df den Df    Pr(>F)    
## prog           1 0.06205    4.278      3    194  0.005967 ** 
## science        1 0.48618   61.188      3    194 < 2.2e-16 ***
## prog:science   1 0.00739    0.482      3    194  0.695366    
## Residuals    196                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

prog : science = p = 0.3466 > 0.05 (tidak signifikan) Tidak terdapat interaksi yang signifikan antara prog dan science terhadap variabel dependen.

mancova_model <- manova(cbind(read, write, math) ~ prog + science, data = df)

summary(mancova_model, test = "Pillai")

##            Df  Pillai approx F num Df den Df    Pr(>F)    
## prog        1 0.06198    4.295      3    195  0.005828 ** 
## science     1 0.48602   61.463      3    195 < 2.2e-16 ***
## Residuals 197                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(mancova_model, test = “Pillai”)

summary.aov(mancova_model)

##  Response read :
##              Df  Sum Sq Mean Sq  F value  Pr(>F)    
## prog          1   381.1   381.1   5.9551 0.01556 *  
## science       1  7931.1  7931.1 123.9309 < 2e-16 ***
## Residuals   197 12607.2    64.0                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response write :
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## prog          1   586.4   586.4  9.6572  0.002165 ** 
## science       1  5329.8  5329.8 87.7703 < 2.2e-16 ***
## Residuals   197 11962.7    60.7                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response math :
##              Df  Sum Sq Mean Sq  F value    Pr(>F)    
## prog          1   393.5   393.5   7.3828  0.007172 ** 
## science       1  6571.5  6571.5 123.2850 < 2.2e-16 ***
## Residuals   197 10500.8    53.3                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

read Variabel read dipengaruhi secara signifikan oleh prog (F = 33.195; p = 3.84e-13) dan science (F = 111.267; p < 2.2e-16).

write Variabel write dipengaruhi secara signifikan oleh prog (F = 29.327; p = 7.209e-12) dan science (F = 75.564; p = 1.410e-15).

math Variabel math dipengaruhi secara signifikan oleh prog (F = 46.009; p < 2.2e-16) dan science (F = 113.565; p < 2.2e-16).

Perbandingan ANOVA dan ANCOVA

# ANOVA (tanpa covariate)
anova_read  <- aov(read  ~ prog, data = df)
anova_write <- aov(write ~ prog, data = df)
anova_math  <- aov(math  ~ prog, data = df)

# ANCOVA (dengan covariate science)
ancova_read  <- aov(read  ~ science + prog, data = df)
ancova_write <- aov(write ~ science + prog, data = df)
ancova_math  <- aov(math  ~ science + prog, data = df)

hasil_aa <- data.frame(
  DV       = c("read", "write", "math"),
  F_ANOVA  = c(
    summary(anova_read)[[1]][1, "F value"],
    summary(anova_write)[[1]][1, "F value"],
    summary(anova_math)[[1]][1, "F value"]
  ),
  p_ANOVA  = c(
    summary(anova_read)[[1]][1, "Pr(>F)"],
    summary(anova_write)[[1]][1, "Pr(>F)"],
    summary(anova_math)[[1]][1, "Pr(>F)"]
  ),
  F_ANCOVA = c(
    summary(ancova_read)[[1]][2, "F value"],
    summary(ancova_write)[[1]][2, "F value"],
    summary(ancova_math)[[1]][2, "F value"]
  ),
  p_ANCOVA = c(
    summary(ancova_read)[[1]][2, "Pr(>F)"],
    summary(ancova_write)[[1]][2, "Pr(>F)"],
    summary(ancova_math)[[1]][2, "Pr(>F)"]
  )
)
hasil_aa$F_naik <- round(hasil_aa$F_ANCOVA - hasil_aa$F_ANOVA, 2)

print(hasil_aa)

##      DV  F_ANOVA    p_ANOVA   F_ANCOVA  p_ANCOVA F_naik
## 1  read 3.674017 0.05670663 0.07990165 0.7777275  -3.59
## 2 write 6.714608 0.01027459 1.61966902 0.2046379  -5.09
## 3  math 4.564033 0.03387766 0.31357142 0.5761330  -4.25

Hasil analisis menunjukkan bahwa pada uji ANOVA, variabel write dan math menunjukkan pengaruh signifikan (p < 0.05), sedangkan variabel read tidak signifikan.

Namun, setelah dilakukan analisis ANCOVA dengan memasukkan kovariat science, seluruh variabel dependen menunjukkan nilai p > 0.05, yang berarti tidak terdapat pengaruh signifikan setelah pengaruh kovariat dikontrol.

Nilai F pada ANCOVA juga lebih kecil dibandingkan ANOVA (F_naik negatif), yang menunjukkan bahwa penambahan kovariat science tidak meningkatkan kekuatan model.

library(ggplot2)

plot_aa <- data.frame(
  DV     = rep(c("read","write","math"), 2),
  Metode = rep(c("ANOVA","ANCOVA"), each = 3),
  F_val  = c(hasil_aa$F_ANOVA, hasil_aa$F_ANCOVA)
)
plot_aa$Metode <- factor(plot_aa$Metode, levels = c("ANOVA","ANCOVA"))

ggplot(plot_aa, aes(x = DV, y = F_val, fill = Metode)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.6, color = "white") +
  geom_text(aes(label = round(F_val, 1)),
            position = position_dodge(width = 0.6),
            vjust = -0.5, size = 4, fontface = "bold") +
  scale_fill_manual(values = c("#4472C4","#70AD47")) +
  labs(
    title    = "ANOVA vs ANCOVA: F-value efek 'prog'",
    subtitle = "ANCOVA mengontrol 'science' --> F-value prog meningkat",
    x = "Variabel Dependen", y = "F-value", fill = "Metode"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title    = element_text(face = "bold", hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, color = "gray40"),
        legend.position = "top")

Perbandingan MANOVA dan MANCOVA

manova_aov  <- summary.aov(manova_model)
mancova_aov <- summary.aov(mancova_model)

# Ekstraksi manual dengan indeks kolom untuk keamanan
# Kolom 4: F value, Kolom 5: Pr(>F)
hasil_mv <- data.frame(
  DV        = c("read", "write", "math"),
  F_MANOVA  = c(
    manova_aov[[1]][1, 4],
    manova_aov[[2]][1, 4],
    manova_aov[[3]][1, 4]
  ),
  p_MANOVA  = c(
    manova_aov[[1]][1, 5],
    manova_aov[[2]][1, 5],
    manova_aov[[3]][1, 5]
  ),
  F_MANCOVA = c(
    mancova_aov[[1]][2, 4],
    mancova_aov[[2]][2, 4],
    mancova_aov[[3]][2, 4]
  ),
  p_MANCOVA = c(
    mancova_aov[[1]][2, 5],
    mancova_aov[[2]][2, 5],
    mancova_aov[[3]][2, 5]
  )
)

hasil_mv$F_naik <- round(hasil_mv$F_MANCOVA - hasil_mv$F_MANOVA, 2)

print(hasil_mv)

##      DV F_MANOVA   p_MANOVA F_MANCOVA    p_MANCOVA F_naik
## 1  read 3.674017 0.05670663 123.93090 1.204359e-22 120.26
## 2 write 6.714608 0.01027459  87.77026 1.746477e-17  81.06
## 3  math 4.564033 0.03387766 123.28500 1.471013e-22 118.72

Hasil analisis menunjukkan bahwa pada uji MANOVA, variabel write dan math menunjukkan pengaruh signifikan (p < 0.05), sedangkan variabel read tidak signifikan (p > 0.05).

Namun setelah dilakukan analisis MANCOVA dengan memasukkan kovariat science, seluruh variabel dependen menunjukkan hasil yang sangat signifikan (p < 0.05).

Nilai F pada MANCOVA meningkat secara signifikan dibandingkan MANOVA, yang menunjukkan bahwa kovariat science memberikan kontribusi besar dalam meningkatkan kekuatan model.

plot_mv <- data.frame(
  DV     = rep(c("read","write","math"), 2),
  Metode = rep(c("MANOVA","MANCOVA"), each = 3),
  F_val  = c(hasil_mv$F_MANOVA, hasil_mv$F_MANCOVA)
)
plot_mv$Metode <- factor(plot_mv$Metode, levels = c("MANOVA","MANCOVA"))

ggplot(plot_mv, aes(x = DV, y = F_val, fill = Metode)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.6, color = "white") +
  geom_text(aes(label = round(F_val, 1)),
            position = position_dodge(width = 0.6),
            vjust = -0.5, size = 4, fontface = "bold") +
  scale_fill_manual(values = c("#ED7D31","#C00000")) +
  labs(
    title    = "MANOVA vs MANCOVA: F-value follow-up univariat efek 'prog'",
    subtitle = "MANCOVA mengontrol 'science' → F-value prog meningkat",
    x = "Variabel Dependen", y = "F-value", fill = "Metode"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title    = element_text(face = "bold", hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, color = "gray40"),
        legend.position = "top")