Modul 2 Analisis Multivariat

1. Persiapan Data

# Load Library
library(MVN)
library(biotools)
library(car)
library(psych)
library(readxl)
library(dplyr)
library(tidyr)
library(lmtest)

# Load Data
data <- read.csv("D:/KULIYEAH/Semester 4/Analisis Multivariat/Performance_of_Students.csv")

# Cek nama variabel
names(data)

## [1] "gender"                   "race.group"              
## [3] "parental.education.level" "lunch"                   
## [5] "test.prep.course"         "math.score"              
## [7] "science.score"            "english.score"

# Variabel Dependen (Y)
Y <- cbind(data$math.score, data$science.score)

# Variabel Independen (X)
data$gender                   <- as.factor(data$gender)
data$parental.education.level <- as.factor(data$parental.education.level)
data$lunch                    <- as.factor(data$lunch)
data$test.prep.course         <- as.factor(data$test.prep.course)

# Kovariate (C)
covariate <- data$english.score

2. Uji Asumsi MANOVA

2.1 Normalitas Multivariat

hasil_mvn_raw <- MVN::mvn(data[, c("math.score", "science.score")])

hasil_mvn_raw$multivariate_normality

##            Test Statistic p.value     Method      MVN
## 1 Henze-Zirkler     0.823   0.345 asymptotic ✓ Normal

hasil_mvn_raw$univariate_normality

##               Test      Variable Statistic p.value    Normality
## 1 Anderson-Darling    math.score     0.669   0.081     ✓ Normal
## 2 Anderson-Darling science.score     1.042   0.010 ✗ Not normal

hasil_mvn_raw$descriptives

##        Variable    n   Mean Std.Dev Median Min Max 25th 75th   Skew Kurtosis
## 1    math.score 1000 66.082  15.150     66   0 100   57   77 -0.280    3.274
## 2 science.score 1000 69.128  14.608     70  17 100   59   79 -0.259    2.918

2.2 Homogenitas Matriks Kovarians (Box’s M)

boxM(Y, data$gender)

## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 7.3451, df = 3, p-value = 0.06168

boxM(Y, data$parental.education.level)

## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 8.5468, df = 15, p-value = 0.9

boxM(Y, data$lunch)

## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 5.2363, df = 3, p-value = 0.1553

boxM(Y, data$test.prep.course)

## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 1.7849, df = 3, p-value = 0.6182

2.3 Uji Dependensi (Bartlett)

hasil_bartlett <- cortest.bartlett(cor(data[, c("math.score","science.score")]), n = nrow(data))
print(hasil_bartlett)

## $chisq
## [1] 1086.077
## 
## $p.value
## [1] 3.507441e-238
## 
## $df
## [1] 1

3. Uji Asumsi MANCOVA

3.1 Uji Linearitas Regresi

# Math Score vs English Score per Gender
par(mar = c(5, 5, 4, 2))

plot(data$english.score[data$gender == "Female"],
     data$math.score[data$gender == "Female"],
     col = "#E05C8A", pch = 16, cex = 0.6,
     main = "Linearitas English Score vs Math Score per Gender",
     xlab = "English Score (C)", ylab = "Math Score (Y1)",
     xlim = range(data$english.score), ylim = range(data$math.score))

points(data$english.score[data$gender == "Male"],
       data$math.score[data$gender == "Male"],
       col = "#3A82C4", pch = 16, cex = 0.6)

abline(lm(math.score ~ english.score, data = subset(data, gender == "Female")),
       col = "#E05C8A", lwd = 2)
abline(lm(math.score ~ english.score, data = subset(data, gender == "Male")),
       col = "#3A82C4", lwd = 2)

legend("topleft", legend = c("Female", "Male"),
       col = c("#E05C8A", "#3A82C4"), pch = 16)

# Science Score vs English Score per Gender
par(mar = c(5, 5, 4, 2))

plot(data$english.score[data$gender == "Female"],
     data$science.score[data$gender == "Female"],
     col = "#E05C8A", pch = 16, cex = 0.6,
     main = "Linearitas English Score vs Science Score per Gender",
     xlab = "English Score (C)", ylab = "Science Score (Y2)",
     xlim = range(data$english.score), ylim = range(data$science.score))

points(data$english.score[data$gender == "Male"],
       data$science.score[data$gender == "Male"],
       col = "#3A82C4", pch = 16, cex = 0.6)

abline(lm(science.score ~ english.score, data = subset(data, gender == "Female")),
       col = "#E05C8A", lwd = 2)
abline(lm(science.score ~ english.score, data = subset(data, gender == "Male")),
       col = "#3A82C4", lwd = 2)

legend("topleft", legend = c("Female", "Male"),
       col = c("#E05C8A", "#3A82C4"), pch = 16)

3.2 Homogenitas Varians Error (Uji Levene)

# Math Score
leveneTest(math.score ~ gender,                   data = data)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.2758 0.5996
##       998

leveneTest(math.score ~ parental.education.level, data = data)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   5  0.9976 0.4179
##       994

leveneTest(math.score ~ lunch,                    data = data)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1  3.3618 0.06702 .
##       998                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

leveneTest(math.score ~ test.prep.course,         data = data)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1   0.564 0.4528
##       998

# Science Score
leveneTest(science.score ~ gender,                   data = data)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0054 0.9417
##       998

leveneTest(science.score ~ parental.education.level, data = data)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   5  0.3966 0.8513
##       994

leveneTest(science.score ~ lunch,                    data = data)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  1.8769  0.171
##       998

leveneTest(science.score ~ test.prep.course,         data = data)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  1.1094 0.2925
##       998

3.3 Independensi Error (Durbin-Watson)

model_math <- lm(math.score ~ gender + parental.education.level +
                   lunch + test.prep.course + covariate, data = data)

model_science <- lm(science.score ~ gender + parental.education.level +
                      lunch + test.prep.course + covariate, data = data)

print(dwtest(model_math))

## 
##  Durbin-Watson test
## 
## data:  model_math
## DW = 2.0023, p-value = 0.5152
## alternative hypothesis: true autocorrelation is greater than 0

print(dwtest(model_science))

## 
##  Durbin-Watson test
## 
## data:  model_science
## DW = 2.0096, p-value = 0.5612
## alternative hypothesis: true autocorrelation is greater than 0

3.4 Normalitas Residual (Shapiro-Wilk)

res_math    <- residuals(model_math)
res_science <- residuals(model_science)

shapiro.test(res_math)

## 
##  Shapiro-Wilk normality test
## 
## data:  res_math
## W = 0.99831, p-value = 0.4373

shapiro.test(res_science)

## 
##  Shapiro-Wilk normality test
## 
## data:  res_science
## W = 0.97918, p-value = 9.04e-11

# QQ-Plot Residual Y2
par(mfrow = c(1, 2), mar = c(5, 5, 4, 2))

hist(res_science,
     main = "Histogram Residual Y2 (Science Score)",
     xlab = "Residual", col = "#5DCAA5", border = "white", breaks = 30)

qqnorm(res_science,
       main = "QQ Plot Residual Y2 (Science Score)",
       pch = 16, col = "#3A82C4", cex = 0.6)
qqline(res_science, col = "red", lwd = 2)

3.5 Homogenitas Slope Regresi

# Math Score
anova(lm(math.score ~ covariate * gender,                   data = data))

## Analysis of Variance Table
## 
## Response: math.score
##                   Df Sum Sq Mean Sq   F value  Pr(>F)    
## covariate          1 147630  147630 3759.9716 < 2e-16 ***
## gender             1  42417   42417 1080.3138 < 2e-16 ***
## covariate:gender   1    138     138    3.5162 0.06106 .  
## Residuals        996  39106      39                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(math.score ~ covariate * parental.education.level, data = data))

## Analysis of Variance Table
## 
## Response: math.score
##                                     Df Sum Sq Mean Sq   F value Pr(>F)    
## covariate                            1 147630  147630 1807.4076 <2e-16 ***
## parental.education.level             5    610     122    1.4928 0.1894    
## covariate:parental.education.level   5    352      70    0.8609 0.5069    
## Residuals                          988  80700      82                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(math.score ~ covariate * lunch,                    data = data))

## Analysis of Variance Table
## 
## Response: math.score
##                  Df Sum Sq Mean Sq   F value Pr(>F)    
## covariate         1 147630  147630 1942.2727 <2e-16 ***
## lunch             1   5767    5767   75.8712 <2e-16 ***
## covariate:lunch   1    190     190    2.4978 0.1143    
## Residuals       996  75705      76                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(math.score ~ covariate * test.prep.course,         data = data))

## Analysis of Variance Table
## 
## Response: math.score
##                             Df Sum Sq Mean Sq   F value    Pr(>F)    
## covariate                    1 147630  147630 1832.0438 < 2.2e-16 ***
## test.prep.course             1   1400    1400   17.3676 3.348e-05 ***
## covariate:test.prep.course   1      2       2    0.0282    0.8666    
## Residuals                  996  80260      81                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Science Score
anova(lm(science.score ~ covariate * gender,                   data = data))

## Analysis of Variance Table
## 
## Response: science.score
##                   Df Sum Sq Mean Sq   F value    Pr(>F)    
## covariate          1 193302  193302 9873.4278 < 2.2e-16 ***
## gender             1    386     386   19.6918 1.011e-05 ***
## covariate:gender   1      6       6    0.3243    0.5692    
## Residuals        996  19500      20                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(science.score ~ covariate * parental.education.level, data = data))

## Analysis of Variance Table
## 
## Response: science.score
##                                     Df Sum Sq Mean Sq   F value    Pr(>F)    
## covariate                            1 193302  193302 9872.1205 < 2.2e-16 ***
## parental.education.level             5    441      88    4.5096 0.0004535 ***
## covariate:parental.education.level   5    104      21    1.0667 0.3772401    
## Residuals                          988  19346      20                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(science.score ~ covariate * lunch,                    data = data))

## Analysis of Variance Table
## 
## Response: science.score
##                  Df Sum Sq Mean Sq   F value Pr(>F)    
## covariate         1 193302  193302 9685.7279 <2e-16 ***
## lunch             1     10      10    0.4889 0.4846    
## covariate:lunch   1      4       4    0.2120 0.6453    
## Residuals       996  19878      20                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(science.score ~ covariate * test.prep.course,         data = data))

## Analysis of Variance Table
## 
## Response: science.score
##                             Df Sum Sq Mean Sq    F value    Pr(>F)    
## covariate                    1 193302  193302 10079.9509 < 2.2e-16 ***
## test.prep.course             1    697     697    36.3548 2.315e-09 ***
## covariate:test.prep.course   1     94      94     4.9133   0.02688 *  
## Residuals                  996  19100      19                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4. Analisis MANOVA

model_manova <- manova(cbind(math.score, science.score) ~
                         gender + parental.education.level + lunch + test.prep.course, data = data)
summary(model_manova, test = "Wilks")

##                           Df   Wilks approx F num Df den Df    Pr(>F)    
## gender                     1 0.51093   473.82      2    990 < 2.2e-16 ***
## parental.education.level   5 0.94397     5.79     10   1980 1.222e-08 ***
## lunch                      1 0.84103    93.57      2    990 < 2.2e-16 ***
## test.prep.course           1 0.92213    41.80      2    990 < 2.2e-16 ***
## Residuals                991                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5. Analisis MANCOVA

model_mancova <- manova(cbind(math.score, science.score) ~ 
                          gender + parental.education.level + lunch + test.prep.course + english.score, data = data)
summary(model_mancova, test = "Wilks")

##                           Df   Wilks approx F num Df den Df    Pr(>F)    
## gender                     1 0.46788    562.4      2    989 < 2.2e-16 ***
## parental.education.level   5 0.62623     52.2     10   1978 < 2.2e-16 ***
## lunch                      1 0.44345    620.6      2    989 < 2.2e-16 ***
## test.prep.course           1 0.55643    394.2      2    989 < 2.2e-16 ***
## english.score              1 0.08579   5269.8      2    989 < 2.2e-16 ***
## Residuals                990                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Modul 2 Analisis Multivariat - MANOVA MANCOVA

Cantika Latifatul N.E (24031554023), Fawwaz Azri (24031554074), Sofia Dwi K. (24031554079)

2026-04-07

1. Persiapan Data

2. Uji Asumsi MANOVA

2.1 Normalitas Multivariat

2.2 Homogenitas Matriks Kovarians (Box’s M)

2.3 Uji Dependensi (Bartlett)

3. Uji Asumsi MANCOVA

3.1 Uji Linearitas Regresi

3.2 Homogenitas Varians Error (Uji Levene)

3.3 Independensi Error (Durbin-Watson)

3.4 Normalitas Residual (Shapiro-Wilk)

3.5 Homogenitas Slope Regresi

4. Analisis MANOVA

5. Analisis MANCOVA