#1. LOAD DATA
df <- read.csv2(file.choose())
# Cek data
names(df)
## [1] "school" "sex" "age" "address" "famsize"
## [6] "Pstatus" "Medu" "Fedu" "Mjob" "Fjob"
## [11] "reason" "guardian" "traveltime" "studytime" "failures"
## [16] "schoolsup" "famsup" "paid" "activities" "nursery"
## [21] "higher" "internet" "romantic" "famrel" "freetime"
## [26] "goout" "Dalc" "Walc" "health" "absences"
## [31] "G1" "G2" "G3"
str(df)
## 'data.frame': 395 obs. of 33 variables:
## $ school : chr "GP" "GP" "GP" "GP" ...
## $ sex : chr "F" "F" "F" "F" ...
## $ age : int 18 17 15 15 16 16 16 17 15 15 ...
## $ address : chr "U" "U" "U" "U" ...
## $ famsize : chr "GT3" "GT3" "LE3" "GT3" ...
## $ Pstatus : chr "A" "T" "T" "T" ...
## $ Medu : int 4 1 1 4 3 4 2 4 3 3 ...
## $ Fedu : int 4 1 1 2 3 3 2 4 2 4 ...
## $ Mjob : chr "at_home" "at_home" "at_home" "health" ...
## $ Fjob : chr "teacher" "other" "other" "services" ...
## $ reason : chr "course" "course" "other" "home" ...
## $ guardian : chr "mother" "father" "mother" "mother" ...
## $ traveltime: int 2 1 1 1 1 1 1 2 1 1 ...
## $ studytime : int 2 2 2 3 2 2 2 2 2 2 ...
## $ failures : int 0 0 3 0 0 0 0 0 0 0 ...
## $ schoolsup : chr "yes" "no" "yes" "no" ...
## $ famsup : chr "no" "yes" "no" "yes" ...
## $ paid : chr "no" "no" "yes" "yes" ...
## $ activities: chr "no" "no" "no" "yes" ...
## $ nursery : chr "yes" "no" "yes" "yes" ...
## $ higher : chr "yes" "yes" "yes" "yes" ...
## $ internet : chr "no" "yes" "yes" "yes" ...
## $ romantic : chr "no" "no" "no" "yes" ...
## $ famrel : int 4 5 4 3 4 5 4 4 4 5 ...
## $ freetime : int 3 3 3 2 3 4 4 1 2 5 ...
## $ goout : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Dalc : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Walc : int 1 1 3 1 2 2 1 1 1 1 ...
## $ health : int 3 3 3 5 5 5 3 1 1 5 ...
## $ absences : int 6 4 10 2 4 10 0 6 0 0 ...
## $ G1 : int 5 5 7 15 6 15 12 6 16 14 ...
## $ G2 : int 6 5 8 14 10 15 12 5 18 15 ...
## $ G3 : int 6 6 10 15 10 15 11 6 19 15 ...
#2. PREPROCESSING
# Factor
df$sex <- as.factor(df$sex)
df$schoolsup <- as.factor(df$schoolsup)
df$internet <- as.factor(df$internet)
df$famsup <- as.factor(df$famsup)
# Numeric
df$Medu <- as.numeric(df$Medu)
df$studytime <- as.numeric(df$studytime)
df$absences <- as.numeric(df$absences)
df$G3 <- as.numeric(df$G3)
#3. MANOVA
manova_model <- manova(cbind(G1, G2, G3) ~ sex + schoolsup, data = df)
summary(manova_model, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## sex 1 0.011296 1.4853 3 390 0.2181
## schoolsup 1 0.065415 9.0992 3 390 7.796e-06 ***
## Residuals 392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(ggplot2)
library(reshape2)
# Ubah ke long format
df_long <- melt(df,
id.vars = c("sex", "schoolsup"),
measure.vars = c("G1", "G2", "G3"),
variable.name = "Ujian",
value.name = "Nilai")
# Plot
ggplot(df_long, aes(x = Ujian, y = Nilai,
color = schoolsup, group = schoolsup)) +
stat_summary(fun = mean, geom = "line", size = 1.2) +
stat_summary(fun = mean, geom = "point", size = 3) +
labs(
title = "Profil Rata-rata Nilai Akademik Berdasarkan Dukungan Sekolah",
x = "Komponen Nilai",
y = "Rata-rata Nilai",
color = "School Support"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#4. TWO-WAY MANOVA
manova_interaksi <- manova(cbind(G1, G2, G3) ~ sex * schoolsup, data = df)
summary(manova_interaksi, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## sex 1 0.011315 1.4839 3 389 0.2184
## schoolsup 1 0.065454 9.0817 3 389 7.991e-06 ***
## sex:schoolsup 1 0.005713 0.7450 3 389 0.5258
## Residuals 391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(ggplot2)
library(reshape2)
# Ubah ke long format
df_long <- melt(df,
id.vars = c("sex", "schoolsup"),
measure.vars = c("G1", "G2", "G3"),
variable.name = "Ujian",
value.name = "Nilai")
# Interaction plot
ggplot(df_long, aes(x = Ujian, y = Nilai,
color = sex, linetype = schoolsup,
group = interaction(sex, schoolsup))) +
stat_summary(fun = mean, geom = "line", size = 1.1) +
stat_summary(fun = mean, geom = "point", size = 3) +
labs(
title = "Interaksi Jenis Kelamin dan Dukungan Sekolah terhadap Nilai Akademik",
x = "Komponen Nilai",
y = "Rata-rata Nilai",
color = "Sex",
linetype = "School Support"
) +
theme_minimal()

#5. MANCOVA
mancova_model <- manova(
cbind(G1, G2, G3) ~ sex + schoolsup + internet + famsup + Medu + studytime + absences,
data = df
)
summary(mancova_model, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## sex 1 0.012155 1.5791 3 385 0.1939358
## schoolsup 1 0.069135 9.5313 3 385 4.369e-06 ***
## internet 1 0.017832 2.3300 3 385 0.0739564 .
## famsup 1 0.006776 0.8755 3 385 0.4538239
## Medu 1 0.047993 6.4697 3 385 0.0002787 ***
## studytime 1 0.044005 5.9073 3 385 0.0005992 ***
## absences 1 0.023534 3.0930 3 385 0.0269793 *
## Residuals 387
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(ggplot2)
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.5.3
# Plot 1: studytime
p1 <- ggplot(df, aes(x = studytime, y = G3, color = schoolsup)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "(a) Study Time vs G3",
x = "Study Time", y = "G3") +
theme_minimal()
# Plot 2: absences
p2 <- ggplot(df, aes(x = absences, y = G3, color = schoolsup)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "(b) Absences vs G3",
x = "Absences", y = "G3") +
theme_minimal()
# Plot 3: Medu
p3 <- ggplot(df, aes(x = factor(Medu), y = G3)) +
geom_boxplot(fill = "skyblue") +
labs(title = "(c) Medu vs G3",
x = "Pendidikan Ibu", y = "G3") +
theme_minimal()
# Gabungan
p1 + p2 + p3
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#6. ANCOVA
# ANCOVA
ancova_model <- aov(G3 ~ sex + schoolsup + internet + famsup + Medu + studytime + absences, data = df)
summary(ancova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 89 88.5 4.507 0.0344 *
## schoolsup 1 40 39.5 2.013 0.1567
## internet 1 73 72.7 3.702 0.0551 .
## famsup 1 7 6.8 0.345 0.5574
## Medu 1 337 337.2 17.169 4.2e-05 ***
## studytime 1 119 118.6 6.039 0.0144 *
## absences 1 6 6.1 0.311 0.5775
## Residuals 387 7600 19.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.5.3
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(ggplot2)
library(gridExtra)
emm <- emmeans(ancova_model, ~ sex)
emm_df <- as.data.frame(emm)
# (a) Studytime vs G3 + sex
p1 <- ggplot(df, aes(x = studytime, y = G3, color = sex)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "(a) Study Time vs G3",
x = "Study Time",
y = "Nilai Akhir (G3)"
) +
theme_minimal()
# (b) Medu vs G3
p2 <- ggplot(df, aes(x = as.factor(Medu), y = G3)) +
geom_boxplot(fill = "skyblue") +
labs(
title = "(b) Pendidikan Ibu (Medu) vs G3",
x = "Tingkat Pendidikan Ibu",
y = "Nilai Akhir (G3)"
) +
theme_minimal()
# (c) Adjusted Mean (sex)
p3 <- ggplot(emm_df, aes(x = sex, y = emmean)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
labs(
title = "(c) Adjusted Mean G3 (by sex)",
x = "Jenis Kelamin",
y = "Adjusted Mean G3"
) +
theme_minimal()
# Gabungan
grid.arrange(p1, p2, p3, ncol = 3)
## `geom_smooth()` using formula = 'y ~ x'

# 8. UJI ASUMSI
# Normalitas
shapiro.test(residuals(ancova_model))
##
## Shapiro-Wilk normality test
##
## data: residuals(ancova_model)
## W = 0.94438, p-value = 5.158e-11
# Homogenitas varians
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
leveneTest(G3 ~ sex * schoolsup, data = df)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 3.4578 0.01655 *
## 391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Homogenitas kovarians (MANOVA)
library(biotools)
## Warning: package 'biotools' was built under R version 4.5.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:patchwork':
##
## area
## ---
## biotools version 4.3
boxM(df[, c("G1","G2","G3")], df$sex)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df[, c("G1", "G2", "G3")]
## Chi-Sq (approx.) = 18.232, df = 6, p-value = 0.005677
#9. DESKRIPSI DATA
summary(df)
## school sex age address
## Length:395 F:208 Min. :15.0 Length:395
## Class :character M:187 1st Qu.:16.0 Class :character
## Mode :character Median :17.0 Mode :character
## Mean :16.7
## 3rd Qu.:18.0
## Max. :22.0
## famsize Pstatus Medu Fedu
## Length:395 Length:395 Min. :0.000 Min. :0.000
## Class :character Class :character 1st Qu.:2.000 1st Qu.:2.000
## Mode :character Mode :character Median :3.000 Median :2.000
## Mean :2.749 Mean :2.522
## 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :4.000 Max. :4.000
## Mjob Fjob reason guardian
## Length:395 Length:395 Length:395 Length:395
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## traveltime studytime failures schoolsup famsup
## Min. :1.000 Min. :1.000 Min. :0.0000 no :344 no :153
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.0000 yes: 51 yes:242
## Median :1.000 Median :2.000 Median :0.0000
## Mean :1.448 Mean :2.035 Mean :0.3342
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :4.000 Max. :4.000 Max. :3.0000
## paid activities nursery higher
## Length:395 Length:395 Length:395 Length:395
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## internet romantic famrel freetime goout
## no : 66 Length:395 Min. :1.000 Min. :1.000 Min. :1.000
## yes:329 Class :character 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:2.000
## Mode :character Median :4.000 Median :3.000 Median :3.000
## Mean :3.944 Mean :3.235 Mean :3.109
## 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000
## Dalc Walc health absences
## Min. :1.000 Min. :1.000 Min. :1.000 Min. : 0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:3.000 1st Qu.: 0.000
## Median :1.000 Median :2.000 Median :4.000 Median : 4.000
## Mean :1.481 Mean :2.291 Mean :3.554 Mean : 5.709
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:5.000 3rd Qu.: 8.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :75.000
## G1 G2 G3
## Min. : 3.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 8.00 1st Qu.: 9.00 1st Qu.: 8.00
## Median :11.00 Median :11.00 Median :11.00
## Mean :10.91 Mean :10.71 Mean :10.42
## 3rd Qu.:13.00 3rd Qu.:13.00 3rd Qu.:14.00
## Max. :19.00 Max. :19.00 Max. :20.00
# Karakteristik Data
library(ggplot2)
ggplot(df, aes(x = absences, y = G3)) +
geom_point(alpha = 0.5, color = "steelblue") + # transparansi biar ga numpuk
geom_smooth(method = "lm", color = "red", se = TRUE) + # garis tren
labs(
title = "Hubungan Absensi dengan Nilai Akhir (G3)",
x = "Jumlah Absensi",
y = "Nilai Akhir (G3)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
