library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(MVN)
## Warning: package 'MVN' was built under R version 4.5.3
library(biotools)
## Warning: package 'biotools' was built under R version 4.5.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## ---
## biotools version 4.3
library(heplots)
## Warning: package 'heplots' was built under R version 4.5.3
## Loading required package: broom
##
## Attaching package: 'heplots'
##
## The following object is masked from 'package:biotools':
##
## boxM
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(psych)
## Warning: package 'psych' was built under R version 4.5.3
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:car':
##
## logit
##
## The following object is masked from 'package:MVN':
##
## mardia
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
df <- read.csv("education_intervention_10000.csv")
head(df,10)
## student_id school_id grade_level intervention ses_index attendance_rate
## 1 93207890 110 9 blended 1.379 0.996
## 2 61581758 128 11 blended 2.037 0.930
## 3 75844861 158 9 tutoring 0.403 0.933
## 4 57564202 153 9 flipped -0.442 0.924
## 5 57864707 100 12 tutoring -0.572 0.971
## 6 78728520 100 11 blended -1.128 0.875
## 7 73540890 144 12 tutoring 0.280 0.982
## 8 83052348 125 9 tutoring -0.608 0.936
## 9 60636531 143 11 control -0.016 0.865
## 10 55920596 120 11 blended -1.269 0.810
## study_hours_wk teacher_experience_years class_size baseline_math
## 1 5.93 0.0 28 76.9
## 2 2.52 6.7 32 78.9
## 3 5.32 8.3 23 69.4
## 4 5.26 9.2 30 55.9
## 5 7.47 10.6 21 55.3
## 6 4.12 15.7 25 59.4
## 7 3.61 8.3 34 56.4
## 8 2.78 4.1 24 67.9
## 9 1.05 7.6 14 67.5
## 10 2.73 0.2 37 81.5
## baseline_reading baseline_motivation_1_10 post_math post_reading
## 1 65.3 7.7 87.8 75.1
## 2 82.3 7.1 88.9 89.0
## 3 83.9 5.8 84.8 95.2
## 4 61.8 6.7 72.7 68.3
## 5 66.4 4.6 75.7 83.4
## 6 67.2 7.4 69.1 78.9
## 7 71.1 6.0 66.5 87.1
## 8 47.9 9.7 82.1 58.6
## 9 79.6 7.1 76.6 87.1
## 10 69.9 5.9 85.3 76.0
## test_anxiety_1_10 passed
## 1 2.6 1
## 2 4.3 1
## 3 3.8 1
## 4 5.6 1
## 5 7.6 1
## 6 6.8 0
## 7 5.4 1
## 8 3.2 1
## 9 4.1 1
## 10 4.8 1
str(df)
## 'data.frame': 10000 obs. of 16 variables:
## $ student_id : int 93207890 61581758 75844861 57564202 57864707 78728520 73540890 83052348 60636531 55920596 ...
## $ school_id : int 110 128 158 153 100 100 144 125 143 120 ...
## $ grade_level : int 9 11 9 9 12 11 12 9 11 11 ...
## $ intervention : chr "blended" "blended" "tutoring" "flipped" ...
## $ ses_index : num 1.379 2.037 0.403 -0.442 -0.572 ...
## $ attendance_rate : num 0.996 0.93 0.933 0.924 0.971 0.875 0.982 0.936 0.865 0.81 ...
## $ study_hours_wk : num 5.93 2.52 5.32 5.26 7.47 4.12 3.61 2.78 1.05 2.73 ...
## $ teacher_experience_years: num 0 6.7 8.3 9.2 10.6 15.7 8.3 4.1 7.6 0.2 ...
## $ class_size : int 28 32 23 30 21 25 34 24 14 37 ...
## $ baseline_math : num 76.9 78.9 69.4 55.9 55.3 59.4 56.4 67.9 67.5 81.5 ...
## $ baseline_reading : num 65.3 82.3 83.9 61.8 66.4 67.2 71.1 47.9 79.6 69.9 ...
## $ baseline_motivation_1_10: num 7.7 7.1 5.8 6.7 4.6 7.4 6 9.7 7.1 5.9 ...
## $ post_math : num 87.8 88.9 84.8 72.7 75.7 69.1 66.5 82.1 76.6 85.3 ...
## $ post_reading : num 75.1 89 95.2 68.3 83.4 78.9 87.1 58.6 87.1 76 ...
## $ test_anxiety_1_10 : num 2.6 4.3 3.8 5.6 7.6 6.8 5.4 3.2 4.1 4.8 ...
## $ passed : int 1 1 1 1 1 0 1 1 1 1 ...
summary(df)
## student_id school_id grade_level intervention
## Min. :10001836 Min. :100.0 Min. : 9.00 Length:10000
## 1st Qu.:31792792 1st Qu.:115.0 1st Qu.: 9.00 Class :character
## Median :54680352 Median :130.0 Median :10.00 Mode :character
## Mean :54791854 Mean :129.6 Mean :10.42
## 3rd Qu.:77195169 3rd Qu.:145.0 3rd Qu.:11.00
## Max. :99992163 Max. :159.0 Max. :12.00
## ses_index attendance_rate study_hours_wk teacher_experience_years
## Min. :-2.500000 Min. :0.6670 Min. : 0.020 Min. : 0.000
## 1st Qu.:-0.678250 1st Qu.:0.8730 1st Qu.: 2.250 1st Qu.: 5.600
## Median : 0.001000 Median :0.9160 Median : 3.910 Median : 9.000
## Mean :-0.002791 Mean :0.9129 Mean : 4.576 Mean : 9.057
## 3rd Qu.: 0.671000 3rd Qu.:0.9570 3rd Qu.: 6.140 3rd Qu.:12.400
## Max. : 2.500000 Max. :1.0000 Max. :22.850 Max. :28.700
## class_size baseline_math baseline_reading baseline_motivation_1_10
## Min. :10.00 Min. : 20.00 Min. : 23.30 Min. : 1.000
## 1st Qu.:22.00 1st Qu.: 61.00 1st Qu.: 63.60 1st Qu.: 5.300
## Median :26.00 Median : 70.00 Median : 71.80 Median : 6.500
## Mean :26.06 Mean : 70.03 Mean : 71.77 Mean : 6.456
## 3rd Qu.:30.00 3rd Qu.: 79.20 3rd Qu.: 79.92 3rd Qu.: 7.600
## Max. :40.00 Max. :100.00 Max. :100.00 Max. :10.000
## post_math post_reading test_anxiety_1_10 passed
## Min. : 29.7 Min. : 35.90 Min. : 1.000 Min. :0.0000
## 1st Qu.: 71.9 1st Qu.: 72.50 1st Qu.: 3.700 1st Qu.:1.0000
## Median : 81.4 Median : 81.40 Median : 4.800 Median :1.0000
## Mean : 80.9 Mean : 80.87 Mean : 4.764 Mean :0.9042
## 3rd Qu.: 91.5 3rd Qu.: 90.20 3rd Qu.: 5.900 3rd Qu.:1.0000
## Max. :100.0 Max. :100.00 Max. :10.000 Max. :1.0000
colSums(is.na(df))
## student_id school_id grade_level
## 0 0 0
## intervention ses_index attendance_rate
## 0 0 0
## study_hours_wk teacher_experience_years class_size
## 0 0 0
## baseline_math baseline_reading baseline_motivation_1_10
## 0 0 0
## post_math post_reading test_anxiety_1_10
## 0 0 0
## passed
## 0
##EDA
# histogram numerik
df %>%
dplyr::select_if(is.numeric) %>%
tidyr::pivot_longer(cols = everything()) %>%
ggplot(aes(x = value)) +
geom_histogram(bins = 20, fill = "skyblue", color = "black") +
facet_wrap(~name, scales = "free") +
theme_minimal() +
labs(title = "Distribusi Variabel Numerik")
# barplot kategorik
df$intervention <- as.factor(df$intervention)
ggplot(df, aes(x = intervention)) +
geom_bar(fill = "orange") +
theme_minimal() +
labs(title = "Distribusi Intervention")
df_mancova <- dplyr::select(df,
intervention,
post_math,
post_reading,
baseline_math,
baseline_reading)
head(df_mancova)
## intervention post_math post_reading baseline_math baseline_reading
## 1 blended 87.8 75.1 76.9 65.3
## 2 blended 88.9 89.0 78.9 82.3
## 3 tutoring 84.8 95.2 69.4 83.9
## 4 flipped 72.7 68.3 55.9 61.8
## 5 tutoring 75.7 83.4 55.3 66.4
## 6 blended 69.1 78.9 59.4 67.2
index_terpilih <- c(
8747, 3695, 8717, 8634, 8955, 2419, 9575, 3038, 9573, 6551,
1050, 8139, 6222, 7402, 7763, 9160, 3635, 9036, 7985, 4812,
9112, 4410, 5178, 3125, 6227, 2701, 6765, 1049, 5174, 3188,
5027, 6060, 3738, 8708, 8904, 4130, 2220, 7498, 3525, 2953,
6573, 3576, 7062, 9698, 2214, 1407, 4584, 8532, 3082, 8982,
7425, 5315, 3645, 3622, 3532, 7177, 7865, 4934, 7860, 5125,
6243, 5960, 6838, 6017, 2539, 2623, 1530, 1863, 7956, 4028,
8818, 4055, 6209, 5666, 3445, 8624, 4668, 7284, 3043, 2113,
5572, 2328, 8170, 8278, 2869, 3538, 6042, 6918, 4358, 8195,
7281, 4960, 7854, 8472, 4099, 6655, 4010, 7258, 3493, 9247
)
# biar index aman
df <- df %>% mutate(row_id = row_number())
df_mancova <- df %>%
filter(row_id %in% index_terpilih) %>%
dplyr::select(
intervention,
post_math,
post_reading,
baseline_math,
baseline_reading
)
dim(df_mancova)
## [1] 100 5
df_mancova %>%
pivot_longer(cols = -intervention) %>%
ggplot(aes(x = value)) +
geom_histogram(bins = 20, fill = "skyblue", color = "black") +
facet_wrap(~name, scales = "free") +
theme_minimal()
boxplot(df_mancova[,2:3],
main = "Boxplot Dependent Variables",
col = "lightblue")
df_mancova$intervention <- as.factor(df_mancova$intervention)
mvn_result <- mvn(
data = df_mancova[,2:3],
mvn_test = "mardia"
)
mvn_result$multivariate_normality
## Test Statistic p.value Method MVN
## 1 Mardia Skewness 7.160 0.128 asymptotic ✓ Normal
## 2 Mardia Kurtosis -1.338 0.181 asymptotic ✓ Normal
biotools::boxM(df_mancova[,2:3], df_mancova$intervention)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df_mancova[, 2:3]
## Chi-Sq (approx.) = 5.7976, df = 9, p-value = 0.76
cor_matrix <- cor(df_mancova[,2:3])
cor_matrix
## post_math post_reading
## post_math 1.0000000 0.2540452
## post_reading 0.2540452 1.0000000
cortest.bartlett(cor_matrix, n = nrow(df_mancova))
## $chisq
## [1] 6.504791
##
## $p.value
## [1] 0.01075842
##
## $df
## [1] 1
pairs(~ baseline_math +
baseline_reading +
post_math +
post_reading,
data = df_mancova,
col = df_mancova$intervention)
ggplot(df_mancova,
aes(x = baseline_math,
y = post_math,
color = intervention)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
# MANOVA
model_manova <- manova(
cbind(post_math, post_reading) ~ intervention,
data = df_mancova
)
summary(model_manova, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## intervention 3 0.051208 0.84086 6 192 0.5398
## Residuals 96
# follow-up (per variabel)
summary.aov(model_manova)
## Response post_math :
## Df Sum Sq Mean Sq F value Pr(>F)
## intervention 3 519.9 173.29 1.1284 0.3415
## Residuals 96 14742.6 153.57
##
## Response post_reading :
## Df Sum Sq Mean Sq F value Pr(>F)
## intervention 3 82.7 27.569 0.2097 0.8895
## Residuals 96 12623.8 131.498
# cek interaksi (opsional tapi bagus)
# sebenarnya di MANOVA jarang dipakai, tapi bisa untuk eksplor
# ANCOVA Math
ancova_math <- aov(
post_math ~ intervention + baseline_math,
data = df_mancova
)
summary(ancova_math)
## Df Sum Sq Mean Sq F value Pr(>F)
## intervention 3 520 173 12.62 5.17e-07 ***
## baseline_math 1 13438 13438 978.54 < 2e-16 ***
## Residuals 95 1305 14
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# cek interaksi (WAJIB)
ancova_math_interaksi <- aov(
post_math ~ intervention * baseline_math,
data = df_mancova
)
summary(ancova_math_interaksi)
## Df Sum Sq Mean Sq F value Pr(>F)
## intervention 3 520 173 13.259 2.88e-07 ***
## baseline_math 1 13438 13438 1028.171 < 2e-16 ***
## intervention:baseline_math 3 102 34 2.606 0.0564 .
## Residuals 92 1202 13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ANCOVA Reading
ancova_reading <- aov(
post_reading ~ intervention + baseline_reading,
data = df_mancova
)
summary(ancova_reading)
## Df Sum Sq Mean Sq F value Pr(>F)
## intervention 3 83 28 1.677 0.177
## baseline_reading 1 11062 11062 672.675 <2e-16 ***
## Residuals 95 1562 16
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# cek interaksi (WAJIB)
ancova_reading_interaksi <- aov(
post_reading ~ intervention * baseline_reading,
data = df_mancova
)
summary(ancova_reading_interaksi)
## Df Sum Sq Mean Sq F value Pr(>F)
## intervention 3 83 28 1.637 0.186
## baseline_reading 1 11062 11062 656.885 <2e-16 ***
## intervention:baseline_reading 3 13 4 0.257 0.856
## Residuals 92 1549 17
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# MANCOVA (utama)
model <- manova(
cbind(post_math, post_reading) ~
intervention + baseline_math + baseline_reading,
data = df_mancova
)
summary(model, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## intervention 3 0.32591 6.10 6 188 7.383e-06 ***
## baseline_math 1 0.91384 493.17 2 93 < 2.2e-16 ***
## baseline_reading 1 0.86630 301.29 2 93 < 2.2e-16 ***
## Residuals 94
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# follow-up
summary.aov(model)
## Response post_math :
## Df Sum Sq Mean Sq F value Pr(>F)
## intervention 3 519.9 173.3 12.5033 5.976e-07 ***
## baseline_math 1 13438.0 13438.0 969.5848 < 2.2e-16 ***
## baseline_reading 1 1.8 1.8 0.1303 0.7189
## Residuals 94 1302.8 13.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response post_reading :
## Df Sum Sq Mean Sq F value Pr(>F)
## intervention 3 82.7 27.6 1.659 0.1812
## baseline_math 1 1045.7 1045.7 62.927 4.419e-12 ***
## baseline_reading 1 10016.1 10016.1 602.735 < 2.2e-16 ***
## Residuals 94 1562.1 16.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# cek interaksi (biar aman)
model_interaksi <- manova(
cbind(post_math, post_reading) ~
intervention * baseline_math +
intervention * baseline_reading,
data = df_mancova
)
summary(model_interaksi, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## intervention 3 0.35038 6.23 6 176 5.951e-06 ***
## baseline_math 1 0.92090 506.42 2 87 < 2.2e-16 ***
## baseline_reading 1 0.86958 290.04 2 87 < 2.2e-16 ***
## intervention:baseline_math 3 0.09676 1.49 6 176 0.1836
## intervention:baseline_reading 3 0.03112 0.46 6 176 0.8346
## Residuals 88
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1