1. Persiapan Data

# Load Library
library(MVN)
library(biotools)
library(car)
library(psych)
library(readxl)
library(dplyr)
library(tidyr)
library(lmtest)

# Load Data
data <- read.csv("D:/KULIYEAH/Semester 4/Analisis Multivariat/Performance_of_Students.csv")

# Cek nama variabel
names(data)
## [1] "gender"                   "race.group"              
## [3] "parental.education.level" "lunch"                   
## [5] "test.prep.course"         "math.score"              
## [7] "science.score"            "english.score"
# Variabel Dependen (Y)
Y <- cbind(data$math.score, data$science.score)

# Variabel Independen (X)
data$gender                   <- as.factor(data$gender)
data$parental.education.level <- as.factor(data$parental.education.level)
data$lunch                    <- as.factor(data$lunch)
data$test.prep.course         <- as.factor(data$test.prep.course)

# Kovariate (C)
covariate <- data$english.score

2. Uji Asumsi MANOVA

2.1 Normalitas Multivariat

hasil_mvn_raw <- MVN::mvn(data[, c("math.score", "science.score")])
hasil_mvn_raw$multivariate_normality
##            Test Statistic p.value     Method      MVN
## 1 Henze-Zirkler     0.823   0.345 asymptotic ✓ Normal
hasil_mvn_raw$univariate_normality
##               Test      Variable Statistic p.value    Normality
## 1 Anderson-Darling    math.score     0.669   0.081     ✓ Normal
## 2 Anderson-Darling science.score     1.042   0.010 ✗ Not normal
hasil_mvn_raw$descriptives
##        Variable    n   Mean Std.Dev Median Min Max 25th 75th   Skew Kurtosis
## 1    math.score 1000 66.082  15.150     66   0 100   57   77 -0.280    3.274
## 2 science.score 1000 69.128  14.608     70  17 100   59   79 -0.259    2.918

2.2 Homogenitas Matriks Kovarians (Box’s M)

boxM(Y, data$gender)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 7.3451, df = 3, p-value = 0.06168
boxM(Y, data$parental.education.level)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 8.5468, df = 15, p-value = 0.9
boxM(Y, data$lunch)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 5.2363, df = 3, p-value = 0.1553
boxM(Y, data$test.prep.course)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 1.7849, df = 3, p-value = 0.6182

2.3 Uji Dependensi (Bartlett)

hasil_bartlett <- cortest.bartlett(cor(data[, c("math.score","science.score")]), n = nrow(data))
print(hasil_bartlett)
## $chisq
## [1] 1086.077
## 
## $p.value
## [1] 3.507441e-238
## 
## $df
## [1] 1

3. Uji Asumsi MANCOVA

3.1 Uji Linearitas Regresi

# Math Score vs English Score per Gender
par(mar = c(5, 5, 4, 2))

plot(data$english.score[data$gender == "Female"],
     data$math.score[data$gender == "Female"],
     col = "#E05C8A", pch = 16, cex = 0.6,
     main = "Linearitas English Score vs Math Score per Gender",
     xlab = "English Score (C)", ylab = "Math Score (Y1)",
     xlim = range(data$english.score), ylim = range(data$math.score))

points(data$english.score[data$gender == "Male"],
       data$math.score[data$gender == "Male"],
       col = "#3A82C4", pch = 16, cex = 0.6)

abline(lm(math.score ~ english.score, data = subset(data, gender == "Female")),
       col = "#E05C8A", lwd = 2)
abline(lm(math.score ~ english.score, data = subset(data, gender == "Male")),
       col = "#3A82C4", lwd = 2)

legend("topleft", legend = c("Female", "Male"),
       col = c("#E05C8A", "#3A82C4"), pch = 16)

# Science Score vs English Score per Gender
par(mar = c(5, 5, 4, 2))

plot(data$english.score[data$gender == "Female"],
     data$science.score[data$gender == "Female"],
     col = "#E05C8A", pch = 16, cex = 0.6,
     main = "Linearitas English Score vs Science Score per Gender",
     xlab = "English Score (C)", ylab = "Science Score (Y2)",
     xlim = range(data$english.score), ylim = range(data$science.score))

points(data$english.score[data$gender == "Male"],
       data$science.score[data$gender == "Male"],
       col = "#3A82C4", pch = 16, cex = 0.6)

abline(lm(science.score ~ english.score, data = subset(data, gender == "Female")),
       col = "#E05C8A", lwd = 2)
abline(lm(science.score ~ english.score, data = subset(data, gender == "Male")),
       col = "#3A82C4", lwd = 2)

legend("topleft", legend = c("Female", "Male"),
       col = c("#E05C8A", "#3A82C4"), pch = 16)

3.2 Homogenitas Varians Error (Uji Levene)

# Math Score
leveneTest(math.score ~ gender,                   data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.2758 0.5996
##       998
leveneTest(math.score ~ parental.education.level, data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   5  0.9976 0.4179
##       994
leveneTest(math.score ~ lunch,                    data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1  3.3618 0.06702 .
##       998                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(math.score ~ test.prep.course,         data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1   0.564 0.4528
##       998
# Science Score
leveneTest(science.score ~ gender,                   data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0054 0.9417
##       998
leveneTest(science.score ~ parental.education.level, data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   5  0.3966 0.8513
##       994
leveneTest(science.score ~ lunch,                    data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  1.8769  0.171
##       998
leveneTest(science.score ~ test.prep.course,         data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  1.1094 0.2925
##       998

3.3 Independensi Error (Durbin-Watson)

model_math <- lm(math.score ~ gender + parental.education.level +
                   lunch + test.prep.course + covariate, data = data)

model_science <- lm(science.score ~ gender + parental.education.level +
                      lunch + test.prep.course + covariate, data = data)

print(dwtest(model_math))
## 
##  Durbin-Watson test
## 
## data:  model_math
## DW = 2.0023, p-value = 0.5152
## alternative hypothesis: true autocorrelation is greater than 0
print(dwtest(model_science))
## 
##  Durbin-Watson test
## 
## data:  model_science
## DW = 2.0096, p-value = 0.5612
## alternative hypothesis: true autocorrelation is greater than 0

3.4 Normalitas Residual (Shapiro-Wilk)

res_math    <- residuals(model_math)
res_science <- residuals(model_science)

shapiro.test(res_math)
## 
##  Shapiro-Wilk normality test
## 
## data:  res_math
## W = 0.99831, p-value = 0.4373
shapiro.test(res_science)
## 
##  Shapiro-Wilk normality test
## 
## data:  res_science
## W = 0.97918, p-value = 9.04e-11
# QQ-Plot Residual Y2
par(mfrow = c(1, 2), mar = c(5, 5, 4, 2))

hist(res_science,
     main = "Histogram Residual Y2 (Science Score)",
     xlab = "Residual", col = "#5DCAA5", border = "white", breaks = 30)

qqnorm(res_science,
       main = "QQ Plot Residual Y2 (Science Score)",
       pch = 16, col = "#3A82C4", cex = 0.6)
qqline(res_science, col = "red", lwd = 2)

3.5 Homogenitas Slope Regresi

# Math Score
anova(lm(math.score ~ covariate * gender,                   data = data))
## Analysis of Variance Table
## 
## Response: math.score
##                   Df Sum Sq Mean Sq   F value  Pr(>F)    
## covariate          1 147630  147630 3759.9716 < 2e-16 ***
## gender             1  42417   42417 1080.3138 < 2e-16 ***
## covariate:gender   1    138     138    3.5162 0.06106 .  
## Residuals        996  39106      39                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(math.score ~ covariate * parental.education.level, data = data))
## Analysis of Variance Table
## 
## Response: math.score
##                                     Df Sum Sq Mean Sq   F value Pr(>F)    
## covariate                            1 147630  147630 1807.4076 <2e-16 ***
## parental.education.level             5    610     122    1.4928 0.1894    
## covariate:parental.education.level   5    352      70    0.8609 0.5069    
## Residuals                          988  80700      82                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(math.score ~ covariate * lunch,                    data = data))
## Analysis of Variance Table
## 
## Response: math.score
##                  Df Sum Sq Mean Sq   F value Pr(>F)    
## covariate         1 147630  147630 1942.2727 <2e-16 ***
## lunch             1   5767    5767   75.8712 <2e-16 ***
## covariate:lunch   1    190     190    2.4978 0.1143    
## Residuals       996  75705      76                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(math.score ~ covariate * test.prep.course,         data = data))
## Analysis of Variance Table
## 
## Response: math.score
##                             Df Sum Sq Mean Sq   F value    Pr(>F)    
## covariate                    1 147630  147630 1832.0438 < 2.2e-16 ***
## test.prep.course             1   1400    1400   17.3676 3.348e-05 ***
## covariate:test.prep.course   1      2       2    0.0282    0.8666    
## Residuals                  996  80260      81                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Science Score
anova(lm(science.score ~ covariate * gender,                   data = data))
## Analysis of Variance Table
## 
## Response: science.score
##                   Df Sum Sq Mean Sq   F value    Pr(>F)    
## covariate          1 193302  193302 9873.4278 < 2.2e-16 ***
## gender             1    386     386   19.6918 1.011e-05 ***
## covariate:gender   1      6       6    0.3243    0.5692    
## Residuals        996  19500      20                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(science.score ~ covariate * parental.education.level, data = data))
## Analysis of Variance Table
## 
## Response: science.score
##                                     Df Sum Sq Mean Sq   F value    Pr(>F)    
## covariate                            1 193302  193302 9872.1205 < 2.2e-16 ***
## parental.education.level             5    441      88    4.5096 0.0004535 ***
## covariate:parental.education.level   5    104      21    1.0667 0.3772401    
## Residuals                          988  19346      20                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(science.score ~ covariate * lunch,                    data = data))
## Analysis of Variance Table
## 
## Response: science.score
##                  Df Sum Sq Mean Sq   F value Pr(>F)    
## covariate         1 193302  193302 9685.7279 <2e-16 ***
## lunch             1     10      10    0.4889 0.4846    
## covariate:lunch   1      4       4    0.2120 0.6453    
## Residuals       996  19878      20                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(science.score ~ covariate * test.prep.course,         data = data))
## Analysis of Variance Table
## 
## Response: science.score
##                             Df Sum Sq Mean Sq    F value    Pr(>F)    
## covariate                    1 193302  193302 10079.9509 < 2.2e-16 ***
## test.prep.course             1    697     697    36.3548 2.315e-09 ***
## covariate:test.prep.course   1     94      94     4.9133   0.02688 *  
## Residuals                  996  19100      19                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4. Analisis MANOVA

model_manova <- manova(cbind(math.score, science.score) ~
                         gender + parental.education.level + lunch + test.prep.course, data = data)
summary(model_manova, test = "Wilks")
##                           Df   Wilks approx F num Df den Df    Pr(>F)    
## gender                     1 0.51093   473.82      2    990 < 2.2e-16 ***
## parental.education.level   5 0.94397     5.79     10   1980 1.222e-08 ***
## lunch                      1 0.84103    93.57      2    990 < 2.2e-16 ***
## test.prep.course           1 0.92213    41.80      2    990 < 2.2e-16 ***
## Residuals                991                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5. Analisis MANCOVA

model_mancova <- manova(cbind(math.score, science.score) ~ 
                          gender + parental.education.level + lunch + test.prep.course + english.score, data = data)
summary(model_mancova, test = "Wilks")
##                           Df   Wilks approx F num Df den Df    Pr(>F)    
## gender                     1 0.46788    562.4      2    989 < 2.2e-16 ***
## parental.education.level   5 0.62623     52.2     10   1978 < 2.2e-16 ***
## lunch                      1 0.44345    620.6      2    989 < 2.2e-16 ***
## test.prep.course           1 0.55643    394.2      2    989 < 2.2e-16 ***
## english.score              1 0.08579   5269.8      2    989 < 2.2e-16 ***
## Residuals                990                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1