MANOVA
Import Library
suppressMessages(suppressWarnings({
library(tidyverse)
library(magrittr)
library(kableExtra)
library(rstatix)
library(car)
library(heplots)
library(broom)
}))
Load Dataset Manova
df <- read.csv("student_performance_data.csv")
# Dataset Anda
df_manova <- df %>%
select(parent_education, midterm_score, final_exam_score) %>%
drop_na()
df_manova$parent_education <- as.factor(df_manova$parent_education)
df_manova %>%
head(10) %>%
kable(format = "simple", caption = "Tabel Data MANOVA (10 Baris Pertama)")
Tabel Data MANOVA (10 Baris Pertama)
| Master |
70.70 |
53.10 |
| Bachelor |
27.92 |
87.17 |
| PhD |
70.92 |
99.61 |
| Bachelor |
31.73 |
88.85 |
| Bachelor |
78.28 |
54.23 |
| Bachelor |
54.26 |
79.79 |
| High School |
87.83 |
67.37 |
| Master |
95.69 |
85.06 |
| Bachelor |
56.28 |
40.72 |
| Bachelor |
59.54 |
74.67 |
Check Size Sampling
tabel_sampel <- df_manova %>%
group_by(parent_education) %>%
summarise(N = n()) %>%
rename(`Pendidikan Orang Tua` = parent_education,
`Jumlah Sampel (N)` = N)
tabel_sampel %>%
kable(format = "simple", caption = "Ringkasan Jumlah Sampel per Grup")
Ringkasan Jumlah Sampel per Grup
| Bachelor |
2527 |
| High School |
2528 |
| Master |
2487 |
| PhD |
2458 |
Check Outliers
outliers_midterm <- df_manova %>% identify_outliers(midterm_score)
outliers_final <- df_manova %>% identify_outliers(final_exam_score)
df_mahalanobis <- df_manova %>%
doo(~mahalanobis_distance(., cols = c("midterm_score", "final_exam_score")))
ringkasan_outlier <- data.frame(
Jenis_Pemeriksaan = c("Univariat: Midterm Score",
"Univariat: Final Exam Score",
"Multivariat: Mahalanobis Distance"),
Total_Outlier = c(
nrow(outliers_midterm),
nrow(outliers_final),
sum(df_mahalanobis$is.outlier == TRUE, na.rm = TRUE)
),
Outlier_Ekstrem = c(
nrow(filter(outliers_midterm, is.extreme == TRUE)),
nrow(filter(outliers_final, is.extreme == TRUE)),
sum(df_mahalanobis$is.outlier == TRUE, na.rm = TRUE)
)
)
ringkasan_outlier %>%
kbl(caption = "Ringkasan Deteksi Outlier (Perbaikan Kolom Mahalanobis)",
align = "lcc") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = F) %>%
row_spec(0, bold = TRUE, background = "#f2f2f2") %>%
column_spec(3, color = if_else(ringkasan_outlier$Outlier_Ekstrem > 0, "red", "black"),
bold = if_else(ringkasan_outlier$Outlier_Ekstrem > 0, TRUE, FALSE))
Ringkasan Deteksi Outlier (Perbaikan Kolom Mahalanobis)
|
Jenis_Pemeriksaan
|
Total_Outlier
|
Outlier_Ekstrem
|
|
Univariat: Midterm Score
|
0
|
0
|
|
Univariat: Final Exam Score
|
0
|
0
|
|
Multivariat: Mahalanobis Distance
|
0
|
0
|
Uji Normalitas Shapiro Wilk
set.seed(42)
df_sample <- df_manova %>% sample_n(5000)
tabel_normalitas <- df_sample %>%
group_by(parent_education) %>%
shapiro_test(midterm_score, final_exam_score) %>%
arrange(variable) %>%
select(
`Pendidikan Orang Tua` = parent_education,
`Variabel` = variable,
`Statistik (W)` = statistic,
`P-Value` = p
)
tabel_normalitas %>%
kable(
format = "simple",
caption = "Hasil Uji Normalitas Shapiro-Wilk (n=5000)",
digits = 4
)
Hasil Uji Normalitas Shapiro-Wilk (n=5000)
| Bachelor |
final_exam_score |
0.9485 |
0 |
| High School |
final_exam_score |
0.9570 |
0 |
| Master |
final_exam_score |
0.9553 |
0 |
| PhD |
final_exam_score |
0.9579 |
0 |
| Bachelor |
midterm_score |
0.9505 |
0 |
| High School |
midterm_score |
0.9570 |
0 |
| Master |
midterm_score |
0.9448 |
0 |
| PhD |
midterm_score |
0.9508 |
0 |
# plot(density(df_manova$midterm_score), main="Density Plot Midterm")
# plot(density(df_manova$final_exam_score), main="Density Plot Final")
Uji Asumsi Multikolinearitas Korelasi Pearson
korelasi <- cor.test(df_manova$midterm_score, df_manova$final_exam_score, method = "pearson")
tabel_korelasi <- data.frame(
Parameter = c("Koefisien Korelasi (r)", "P-Value", "Metode", "Status Multikolinearitas"),
Nilai = c(
round(korelasi$estimate, 4),
format.pval(korelasi$p.value, digits = 4),
korelasi$method,
if(abs(korelasi$estimate) < 0.8) "AMAN (Memenuhi)" else "BAHAYA (Dilanggar)"
)
)
tabel_korelasi %>%
kable(
format = "simple",
caption = "Hasil Uji Korelasi Pearson (Asumsi Multikolinearitas)",
col.names = c("Indikator", "Hasil Analisis")
)
Hasil Uji Korelasi Pearson (Asumsi Multikolinearitas)
| Koefisien Korelasi (r) |
-0.0037 |
| P-Value |
0.7121 |
| Metode |
Pearson’s product-moment correlation |
| Status Multikolinearitas |
AMAN (Memenuhi) |
Uji Asumsi Homogenitas Varians (Levene’s Test)
# Menjalankan tes
levene_midterm <- df_manova %>% levene_test(midterm_score ~ parent_education) %>% mutate(Variable = "Midterm Score")
levene_final <- df_manova %>% levene_test(final_exam_score ~ parent_education) %>% mutate(Variable = "Final Exam Score")
# Menggabungkan dan merapikan
tabel_levene <- bind_rows(levene_midterm, status_levene = levene_final) %>%
select(Variable, df1, df2, statistic, p) %>%
rename(`Statistik (F)` = statistic, `P-Value` = p)
# Menampilkan tabel
tabel_levene %>%
kable(format = "simple", caption = "Hasil Uji Homogenitas Varians (Levene's Test)", digits = 4)
Hasil Uji Homogenitas Varians (Levene’s Test)
| Midterm Score |
3 |
9996 |
1.2180 |
0.3014 |
| Final Exam Score |
3 |
9996 |
0.4389 |
0.7252 |
Perhitungan Manova
hasil_manova <- manova(cbind(midterm_score, final_exam_score) ~ parent_education, data = df_manova)
tabel_pillai <- tidy(hasil_manova, test = "Pillai")
tabel_wilks <- tidy(hasil_manova, test = "Wilks")
tabel_pillai %>%
kable(format = "simple", digits = 4, caption = "Hasil MANOVA: Pillai's Trace")
Hasil MANOVA: Pillai’s Trace
| parent_education |
3 |
8e-04 |
1.3477 |
6 |
19992 |
0.2319 |
| Residuals |
9996 |
NA |
NA |
NA |
NA |
NA |
tabel_wilks %>%
kable(format = "simple", digits = 4, caption = "Hasil MANOVA: Wilks' Lambda")
Hasil MANOVA: Wilks’ Lambda
| parent_education |
3 |
0.9992 |
1.3477 |
6 |
19990 |
0.2319 |
| Residuals |
9996 |
NA |
NA |
NA |
NA |
NA |
MANCOVA
Load Dataset
df <- read.csv("student_performance_data.csv")
df_mancova <- df %>%
select(parent_education, midterm_score, final_exam_score, overall_score) %>%
drop_na()
df_mancova$parent_education <- as.factor(df_mancova$parent_education)
# 1. Menampilkan 10 baris pertama dari data frame
df_mancova %>%
head(10) %>%
kable(format = "simple", caption = "Preview Data MANCOVA (10 Baris Pertama)")
Preview Data MANCOVA (10 Baris Pertama)
| Master |
70.70 |
53.10 |
52.3480 |
| Bachelor |
27.92 |
87.17 |
53.9485 |
| PhD |
70.92 |
99.61 |
82.0375 |
| Bachelor |
31.73 |
88.85 |
66.4110 |
| Bachelor |
78.28 |
54.23 |
65.6005 |
| Bachelor |
54.26 |
79.79 |
59.3525 |
| High School |
87.83 |
67.37 |
65.4890 |
| Master |
95.69 |
85.06 |
78.4685 |
| Bachelor |
56.28 |
40.72 |
53.7460 |
| Bachelor |
59.54 |
74.67 |
60.1865 |
Pemilihan Variabel Dependent, Independent dan Kovariat
struktur_data <- data.frame(
Kolom = names(df_mancova),
Tipe = sapply(df_mancova, class),
Contoh_Data = sapply(df_mancova, function(x) paste0(head(x, 2), collapse = ", "))
)
struktur_data %>%
kable(format = "simple", caption = "Struktur Variabel df_mancova", row.names = FALSE)
Struktur Variabel df_mancova
| parent_education |
factor |
Master, Bachelor |
| midterm_score |
numeric |
70.7, 27.92 |
| final_exam_score |
numeric |
53.1, 87.17 |
| overall_score |
numeric |
52.348, 53.9485 |
Uji Linearitas (Kovariat vs Variabel Dependen)
# 1. Hitung Korelasi
cor_midterm <- cor.test(df_mancova$overall_score, df_mancova$midterm_score)
cor_final <- cor.test(df_mancova$overall_score, df_mancova$final_exam_score)
# 2. Gabungkan ke dalam satu tabel (Data Frame)
tabel_linearitas <- data.frame(
Pasangan_Variabel = c("Overall Score vs Midterm", "Overall Score vs Final Exam"),
Korelasi_r = c(cor_midterm$estimate, cor_final$estimate),
P_Value = c(cor_midterm$p.value, cor_final$p.value)
) %>%
mutate(
Status_Asumsi = if_else(P_Value < 0.05, "TERPENUHI (Linear)", "TIDAK TERPENUHI")
)
# 3. Render ke HTML dengan kableExtra
tabel_linearitas %>%
kbl(
caption = "Hasil Uji Linearitas (Kovariat vs Variabel Dependen)",
digits = 4,
align = "lccc" # Aligment: Left, Center, Center, Center
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F,
position = "center"
) %>%
column_spec(4, bold = TRUE, color = "white",
background = if_else(tabel_linearitas$P_Value < 0.05, "#28a745", "#dc3545"))
Hasil Uji Linearitas (Kovariat vs Variabel Dependen)
|
Pasangan_Variabel
|
Korelasi_r
|
P_Value
|
Status_Asumsi
|
|
Overall Score vs Midterm
|
0.5289
|
0
|
TERPENUHI (Linear)
|
|
Overall Score vs Final Exam
|
0.6893
|
0
|
TERPENUHI (Linear)
|
Uji Homogenitas Kemiringan Regresi
# 1. Hitung Anova Type III untuk Midterm
model_midterm <- aov(midterm_score ~ parent_education * overall_score, data = df_mancova)
anova_midterm <- tidy(Anova(model_midterm, type = "III")) %>%
mutate(Variable = "Midterm Score")
# 2. Hitung Anova Type III untuk Final
model_final <- aov(final_exam_score ~ parent_education * overall_score, data = df_mancova)
anova_final <- tidy(Anova(model_final, type = "III")) %>%
mutate(Variable = "Final Exam Score")
# 3. Gabungkan dan filter hanya baris interaksi
tabel_homogenitas_slopes <- bind_rows(anova_midterm, anova_final) %>%
filter(term == "parent_education:overall_score") %>%
select(Variable, term, df, statistic, p.value) %>%
mutate(Status = if_else(p.value > 0.05, "Aman (Homogen)", "Langgar (Tidak Homogen)"))
# 4. Tampilkan dengan kable
tabel_homogenitas_slopes %>%
kable(
format = "simple",
digits = 4,
caption = "Uji Homogenitas Kemiringan Regresi (Interaksi Faktor * Kovariat)",
col.names = c("Variabel Dependen", "Efek Interaksi", "df", "F-Statistik", "P-Value", "Status Asumsi")
)
Uji Homogenitas Kemiringan Regresi (Interaksi Faktor *
Kovariat)
| Midterm Score |
parent_education:overall_score |
3 |
0.5920 |
0.6202 |
Aman (Homogen) |
| Final Exam Score |
parent_education:overall_score |
3 |
0.5399 |
0.6550 |
Aman (Homogen) |
Uji Multikolinearitas
# 1. Hitung Korelasi
korelasi_dvs <- cor.test(df_mancova$midterm_score, df_mancova$final_exam_score)
# 2. Buat Data Frame Hasil
tabel_multiko <- data.frame(
Variabel_1 = "Midterm Score",
Variabel_2 = "Final Exam Score",
Korelasi_r = as.numeric(korelasi_dvs$estimate),
P_Value = as.numeric(korelasi_dvs$p.value)
) %>%
mutate(
Status = if_else(abs(Korelasi_r) < 0.8,
"AMAN (Terpenuhi)",
"BAHAYA (Multikolinearitas)")
)
# 3. Tampilkan dengan format HTML yang cantik
tabel_multiko %>%
kbl(
caption = "Uji Multikolinearitas antar Variabel Dependen",
digits = 4,
align = "llccc"
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = F,
position = "center"
) %>%
column_spec(5,
bold = TRUE,
color = "white",
background = if_else(abs(tabel_multiko$Korelasi_r) < 0.8, "#28a745", "#dc3545"))
Uji Multikolinearitas antar Variabel Dependen
|
Variabel_1
|
Variabel_2
|
Korelasi_r
|
P_Value
|
Status
|
|
Midterm Score
|
Final Exam Score
|
-0.0037
|
0.7121
|
AMAN (Terpenuhi)
|
Uji Homogenitas varians
# Hitung Tes
levene_mid <- df_mancova %>% levene_test(midterm_score ~ parent_education) %>% mutate(Variable = "Midterm Score")
levene_fin <- df_mancova %>% levene_test(final_exam_score ~ parent_education) %>% mutate(Variable = "Final Exam Score")
# Gabung dan Rapikan
tabel_levene_final <- bind_rows(levene_mid, levene_fin) %>%
select(Variable, df1, df2, statistic, p) %>%
mutate(Status = if_else(p > 0.05, "Homogen (Terpenuhi)", "Heterogen (Dilanggar)"))
# Render Tabel
tabel_levene_final %>%
kbl(caption = "Hasil Uji Homogenitas Varians (Levene's Test)", digits = 4) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = F) %>%
column_spec(5, bold = TRUE, color = "white",
background = if_else(tabel_levene_final$p > 0.05, "#28a745", "#dc3545"))
Hasil Uji Homogenitas Varians (Levene’s Test)
|
Variable
|
df1
|
df2
|
statistic
|
p
|
Status
|
|
Midterm Score
|
3
|
9996
|
1.2180
|
0.3014
|
Homogen (Terpenuhi)
|
|
Final Exam Score
|
3
|
9996
|
0.4389
|
0.7252
|
Homogen (Terpenuhi)
|
Perhitungan Mancova
# 1. Menjalankan Model
Y <- cbind(df_mancova$midterm_score, df_mancova$final_exam_score)
mancova_model <- manova(Y ~ overall_score + parent_education, data = df_mancova)
# 2. Ekstrak hasil dan gabungkan
res_pillai <- tidy(mancova_model, test = "Pillai") %>% mutate(Test = "Pillai")
res_wilks <- tidy(mancova_model, test = "Wilks") %>% mutate(Test = "Wilks")
tabel_mancova_multivariate <- bind_rows(res_pillai, res_wilks) %>%
filter(term != "Residuals")
# 3. Tampilkan dengan kable (Menggunakan kolom yang tersedia secara otomatis)
tabel_mancova_multivariate %>%
select(Test, term, df, statistic, everything()) %>% # Mengambil semua kolom yang ada
kbl(caption = "Hasil Uji MANCOVA (Multivariate Test)", digits = 7) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
# Mengelompokkan baris berdasarkan jenis test
pack_rows("Pillai's Trace", 1, 2) %>%
pack_rows("Wilks' Lambda", 3, 4)
Hasil Uji MANCOVA (Multivariate Test)
|
Test
|
term
|
df
|
statistic
|
pillai
|
num.df
|
den.df
|
p.value
|
wilks
|
|
Pillai’s Trace
|
|
Pillai
|
overall_score
|
1
|
15622.743961
|
0.7576595
|
2
|
9994
|
0.0000000
|
NA
|
|
Pillai
|
parent_education
|
3
|
1.153909
|
0.0006925
|
6
|
19990
|
0.3280318
|
NA
|
|
Wilks’ Lambda
|
|
Wilks
|
overall_score
|
1
|
15622.743961
|
NA
|
2
|
9994
|
0.0000000
|
0.2423405
|
|
Wilks
|
parent_education
|
3
|
1.153938
|
NA
|
6
|
19988
|
0.3280153
|
0.9993076
|