library(tidyverse)
library(car)
library(rstatix)
library(biotools)
library(psych)
library(ggplot2)
df <- read.csv("hsb2.csv")
glimpse(df)
## Rows: 200
## Columns: 11
## $ id <int> 70, 121, 86, 141, 172, 113, 50, 11, 84, 48, 75, 60, 95, 104, 3…
## $ female <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ race <int> 4, 4, 4, 4, 4, 4, 3, 1, 4, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4,…
## $ ses <int> 1, 2, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 1, 1, 3, 2, 3, 2, 2,…
## $ schtyp <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,…
## $ prog <int> 1, 3, 1, 3, 2, 2, 1, 2, 1, 2, 3, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1,…
## $ read <int> 57, 68, 44, 63, 47, 44, 50, 34, 63, 57, 60, 57, 73, 54, 45, 42…
## $ write <int> 52, 59, 33, 44, 52, 52, 59, 46, 57, 55, 46, 65, 60, 63, 57, 49…
## $ math <int> 41, 53, 54, 47, 57, 51, 42, 45, 54, 52, 51, 51, 71, 57, 50, 43…
## $ science <int> 47, 63, 58, 53, 53, 63, 53, 39, 58, 50, 53, 63, 61, 55, 31, 50…
## $ socst <int> 57, 61, 31, 56, 61, 61, 61, 36, 51, 51, 61, 61, 71, 46, 56, 56…
describe(df[, c("read", "write", "math", "science", "socst")])
## vars n mean sd median trimmed mad min max range skew kurtosis
## read 1 200 52.23 10.25 50 52.03 10.38 28 76 48 0.19 -0.66
## write 2 200 52.77 9.48 54 53.36 11.86 31 67 36 -0.47 -0.78
## math 3 200 52.65 9.37 52 52.23 10.38 33 75 42 0.28 -0.69
## science 4 200 51.85 9.90 53 52.02 11.86 26 74 48 -0.19 -0.60
## socst 5 200 52.41 10.74 52 52.99 13.34 26 71 45 -0.38 -0.57
## se
## read 0.72
## write 0.67
## math 0.66
## science 0.70
## socst 0.76
names(df)
## [1] "id" "female" "race" "ses" "schtyp" "prog" "read"
## [8] "write" "math" "science" "socst"
memastikan apakah dalam data tersebut terdapat missing value
colSums(is.na(df))
## id female race ses schtyp prog read write math science
## 0 0 0 0 0 0 0 0 0 0
## socst
## 0
dv <- c("read", "write", "math")
iv <- "prog"
covar <- "science"
X <- na.omit(df[, dv])
mardia_result <- psych::mardia(X)
mardia_result
## Call: psych::mardia(x = X)
##
## Mardia tests of multivariate skew and kurtosis
## Use describe(x) the to get univariate tests
## n.obs = 200 num.vars = 3
## b1p = 0.52 skew = 17.25 with probability <= 0.069
## small sample skew = 17.64 with probability <= 0.061
## b2p = 13.22 kurtosis = -2.3 with probability <= 0.021
pvalue skewness= 0.061 dengan pvalue kurtosis = 0.021 (salah satunya memenuhi)
boxM(df[, dv], df$prog)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df[, dv]
## Chi-Sq (approx.) = 16.574, df = 12, p-value = 0.1663
p-value = 0.1663 > 0.05 (memenuhi asusmsi)
R <- cor(X)
bartlett_result <- cortest.bartlett(
R,
n = nrow(X)
)
bartlett_result
## $chisq
## [1] 229.6047
##
## $p.value
## [1] 1.683784e-49
##
## $df
## [1] 3
p-value (1.683784e-49) < 0.05 artinya memenuhi asumsi
par(mfrow=c(1,3))
for (y in dv) {
plot(
df[[covar]],
df[[y]],
xlab = "science",
ylab = y,
main = paste("science vs", y)
)
abline(
lm(df[[y]] ~ df[[covar]]),
col = "red"
)
}
df$prog <- as.factor(df$prog)
anova_read <- aov(read ~ prog, data = df)
anova_write <- aov(write ~ prog, data = df)
anova_math <- aov(math ~ prog, data = df)
summary(anova_read)
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 3717 1858.4 21.28 4.28e-09 ***
## Residuals 197 17203 87.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(anova_write)
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 3176 1587.8 21.27 4.31e-09 ***
## Residuals 197 14703 74.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(anova_math)
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 4002 2001.1 29.28 7.36e-12 ***
## Residuals 197 13464 68.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
variabel prog terhadap skor read, p-value < 0.05 (signifikan) variabel prog terhadap skor write, p-value < 0.05 (signifikan) variabel prog terhadap skor math p-value < 0.05 (signifikan)
ancova_read <- aov(read ~ prog + science, data = df)
ancova_write <- aov(write ~ prog + science, data = df)
ancova_math <- aov(math ~ prog + science, data = df)
summary(ancova_read)
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 3717 1858 33.2 3.84e-13 ***
## science 1 6229 6229 111.3 < 2e-16 ***
## Residuals 196 10973 56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(ancova_write)
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 3176 1588 29.33 7.21e-12 ***
## science 1 4091 4091 75.56 1.41e-15 ***
## Residuals 196 10612 54
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(ancova_math)
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 4002 2001 46.01 <2e-16 ***
## science 1 4939 4939 113.56 <2e-16 ***
## Residuals 196 8524 43
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
slope_model <- aov(read ~ prog * science, data = df)
summary(slope_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 3717 1858 33.337 3.59e-13 ***
## science 1 6229 6229 111.745 < 2e-16 ***
## prog:science 2 158 79 1.421 0.244
## Residuals 194 10815 56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
manova_model <- manova(cbind(read, write, math) ~ prog, data = df)
summary(manova_model, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## prog 2 0.26721 10.075 6 392 2.304e-10 ***
## Residuals 197
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
P-value = 0.06984 < 0.05 yang artinya tidak terdapat perbedaan yang signifikan pada nilai read, write, dan math berdasarkan prog.
summary.aov(manova_model)
## Response read :
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 3716.9 1858.43 21.282 4.283e-09 ***
## Residuals 197 17202.6 87.32
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response write :
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 3175.7 1587.85 21.275 4.31e-09 ***
## Residuals 197 14703.2 74.64
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response math :
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 4002.1 2001.05 29.279 7.364e-12 ***
## Residuals 197 13463.7 68.34
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
slope_model <- manova(cbind(read, write, math) ~ prog * science, data = df)
summary(slope_model, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## prog 2 0.41510 16.849 6 386 <2e-16 ***
## science 1 0.49104 61.746 3 192 <2e-16 ***
## prog:science 2 0.03439 1.126 6 386 0.3466
## Residuals 194
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mancova_model <- manova(cbind(read, write, math) ~ prog + science, data = df)
summary(mancova_model, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## prog 2 0.41233 16.881 6 390 < 2.2e-16 ***
## science 1 0.48803 61.642 3 194 < 2.2e-16 ***
## Residuals 196
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hasil analisis MANCOVA menggunakan statistik Pillai’s Trace menunjukkan bahwa variabel prog berpengaruh signifikan terhadap variabel dependen read, write, dan math setelah mengontrol kovariat science, dengan nilai p (< 0.05).
Kovariat science juga menunjukkan pengaruh yang sangat signifikan terhadap variabel dependen, dengan nilai p < 0.001.
summary.aov(mancova_model)
## Response read :
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 3716.9 1858.4 33.195 3.84e-13 ***
## science 1 6229.3 6229.3 111.267 < 2.2e-16 ***
## Residuals 196 10973.2 56.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response write :
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 3175.7 1587.8 29.327 7.209e-12 ***
## science 1 4091.2 4091.2 75.564 1.410e-15 ***
## Residuals 196 10612.0 54.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response math :
## Df Sum Sq Mean Sq F value Pr(>F)
## prog 2 4002.1 2001.1 46.009 < 2.2e-16 ***
## science 1 4939.2 4939.2 113.565 < 2.2e-16 ***
## Residuals 196 8524.5 43.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Berdasarkan uji univariat MANCOVA, diperoleh bahwa variabel prog berpengaruh signifikan terhadap skor read dengan F(1,197) = 5.955, p = 0.016 (< 0.05), terhadap skor write dengan F(1,197) = 9.657, p = 0.002 (< 0.05), serta terhadap skor math dengan F(1,197) = 7.383, p = 0.007 (< 0.05).
anova_p <- c(
summary(anova_read)[[1]]$`Pr(>F)`[1],
summary(anova_write)[[1]]$`Pr(>F)`[1],
summary(anova_math)[[1]]$`Pr(>F)`[1]
)
ancova_p <- c(
summary(ancova_read)[[1]]$`Pr(>F)`[1],
summary(ancova_write)[[1]]$`Pr(>F)`[1],
summary(ancova_math)[[1]]$`Pr(>F)`[1]
)
compare_p_anova <- data.frame(
DV = rep(c("Read", "Write", "Math"), 2),
P_value = c(anova_p, ancova_p),
Model = rep(c("ANOVA", "ANCOVA"), each = 3)
)
compare_p_anova
## DV P_value Model
## 1 Read 4.283274e-09 ANOVA
## 2 Write 4.310163e-09 ANOVA
## 3 Math 7.364011e-12 ANOVA
## 4 Read 3.840255e-13 ANCOVA
## 5 Write 7.209367e-12 ANCOVA
## 6 Math 4.148418e-17 ANCOVA
compare_p_anova$logP <- -log10(compare_p_anova$P_value)
ggplot(compare_p_anova,
aes(x = DV,
y = logP,
fill = Model)) +
geom_bar(stat = "identity",
position = "dodge") +
labs(
title = "Perbandingan ANOVA vs ANCOVA (-log10 p-value)",
x = "Dependent Variable",
y = "-log10(p-value)"
) +
theme_minimal()
manova_p <-
summary(manova_model,
test = "Pillai")$stats[1, "Pr(>F)"]
mancova_p <-
summary(mancova_model,
test = "Pillai")$stats[1, "Pr(>F)"]
compare_p_manova <- data.frame(
Model = c("MANOVA", "MANCOVA"),
P_value = c(manova_p,
mancova_p)
)
compare_p_manova
## Model P_value
## 1 MANOVA 2.304130e-10
## 2 MANCOVA 2.391229e-17
compare_p_manova$logP <-
-log10(compare_p_manova$P_value)
ggplot(compare_p_manova,
aes(x = Model,
y = logP,
fill = Model)) +
geom_bar(stat = "identity") +
labs(
title = "Perbandingan MANOVA vs MANCOVA (-log10 p-value)",
y = "-log10(p-value)"
) +
theme_minimal()