MANOVA

Import Library

suppressMessages(suppressWarnings({
  library(tidyverse)
  library(magrittr)
  library(kableExtra)
  library(rstatix)
  library(car)
  library(heplots)
  library(broom)
}))

Load Dataset Manova

df <- read.csv("student_performance_data.csv")

# Dataset Anda
df_manova <- df %>%
  select(parent_education, midterm_score, final_exam_score) %>%
  drop_na()

df_manova$parent_education <- as.factor(df_manova$parent_education)

df_manova %>%
  head(10) %>%
  kable(format = "simple", caption = "Tabel Data MANOVA (10 Baris Pertama)")
Tabel Data MANOVA (10 Baris Pertama)
parent_education midterm_score final_exam_score
Master 70.70 53.10
Bachelor 27.92 87.17
PhD 70.92 99.61
Bachelor 31.73 88.85
Bachelor 78.28 54.23
Bachelor 54.26 79.79
High School 87.83 67.37
Master 95.69 85.06
Bachelor 56.28 40.72
Bachelor 59.54 74.67

Check Size Sampling

tabel_sampel <- df_manova %>%
  group_by(parent_education) %>%
  summarise(N = n()) %>%
  rename(`Pendidikan Orang Tua` = parent_education, 
         `Jumlah Sampel (N)` = N)

tabel_sampel %>%
  kable(format = "simple", caption = "Ringkasan Jumlah Sampel per Grup")
Ringkasan Jumlah Sampel per Grup
Pendidikan Orang Tua Jumlah Sampel (N)
Bachelor 2527
High School 2528
Master 2487
PhD 2458

Check Outliers

outliers_midterm <- df_manova %>% identify_outliers(midterm_score)
outliers_final <- df_manova %>% identify_outliers(final_exam_score)

df_mahalanobis <- df_manova %>%
  doo(~mahalanobis_distance(., cols = c("midterm_score", "final_exam_score")))

ringkasan_outlier <- data.frame(
  Jenis_Pemeriksaan = c("Univariat: Midterm Score", 
                        "Univariat: Final Exam Score", 
                        "Multivariat: Mahalanobis Distance"),
  Total_Outlier = c(
    nrow(outliers_midterm), 
    nrow(outliers_final), 
    sum(df_mahalanobis$is.outlier == TRUE, na.rm = TRUE)
  ),
  Outlier_Ekstrem = c(
    nrow(filter(outliers_midterm, is.extreme == TRUE)), 
    nrow(filter(outliers_final, is.extreme == TRUE)), 
    sum(df_mahalanobis$is.outlier == TRUE, na.rm = TRUE) 
  )
)

ringkasan_outlier %>%
  kbl(caption = "Ringkasan Deteksi Outlier (Perbaikan Kolom Mahalanobis)",
      align = "lcc") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = F) %>%
  row_spec(0, bold = TRUE, background = "#f2f2f2") %>%
  column_spec(3, color = if_else(ringkasan_outlier$Outlier_Ekstrem > 0, "red", "black"),
              bold = if_else(ringkasan_outlier$Outlier_Ekstrem > 0, TRUE, FALSE))
Ringkasan Deteksi Outlier (Perbaikan Kolom Mahalanobis)
Jenis_Pemeriksaan Total_Outlier Outlier_Ekstrem
Univariat: Midterm Score 0 0
Univariat: Final Exam Score 0 0
Multivariat: Mahalanobis Distance 0 0

Uji Normalitas Shapiro Wilk

set.seed(42)
df_sample <- df_manova %>% sample_n(5000)

tabel_normalitas <- df_sample %>%
  group_by(parent_education) %>%
  shapiro_test(midterm_score, final_exam_score) %>%
  arrange(variable) %>%
  select(
    `Pendidikan Orang Tua` = parent_education,
    `Variabel` = variable,
    `Statistik (W)` = statistic,
    `P-Value` = p
  )

tabel_normalitas %>%
  kable(
    format = "simple", 
    caption = "Hasil Uji Normalitas Shapiro-Wilk (n=5000)",
    digits = 4 
  )
Hasil Uji Normalitas Shapiro-Wilk (n=5000)
Pendidikan Orang Tua Variabel Statistik (W) P-Value
Bachelor final_exam_score 0.9485 0
High School final_exam_score 0.9570 0
Master final_exam_score 0.9553 0
PhD final_exam_score 0.9579 0
Bachelor midterm_score 0.9505 0
High School midterm_score 0.9570 0
Master midterm_score 0.9448 0
PhD midterm_score 0.9508 0
# plot(density(df_manova$midterm_score), main="Density Plot Midterm")
# plot(density(df_manova$final_exam_score), main="Density Plot Final")

Uji Asumsi Multikolinearitas Korelasi Pearson

korelasi <- cor.test(df_manova$midterm_score, df_manova$final_exam_score, method = "pearson")

tabel_korelasi <- data.frame(
  Parameter = c("Koefisien Korelasi (r)", "P-Value", "Metode", "Status Multikolinearitas"),
  Nilai = c(
    round(korelasi$estimate, 4),
    format.pval(korelasi$p.value, digits = 4),
    korelasi$method,
    if(abs(korelasi$estimate) < 0.8) "AMAN (Memenuhi)" else "BAHAYA (Dilanggar)"
  )
)

tabel_korelasi %>%
  kable(
    format = "simple", 
    caption = "Hasil Uji Korelasi Pearson (Asumsi Multikolinearitas)",
    col.names = c("Indikator", "Hasil Analisis")
  )
Hasil Uji Korelasi Pearson (Asumsi Multikolinearitas)
Indikator Hasil Analisis
Koefisien Korelasi (r) -0.0037
P-Value 0.7121
Metode Pearson’s product-moment correlation
Status Multikolinearitas AMAN (Memenuhi)

Uji Asumsi Homogenitas Varians (Levene’s Test)

# Menjalankan tes
levene_midterm <- df_manova %>% levene_test(midterm_score ~ parent_education) %>% mutate(Variable = "Midterm Score")
levene_final <- df_manova %>% levene_test(final_exam_score ~ parent_education) %>% mutate(Variable = "Final Exam Score")

# Menggabungkan dan merapikan
tabel_levene <- bind_rows(levene_midterm, status_levene = levene_final) %>%
  select(Variable, df1, df2, statistic, p) %>%
  rename(`Statistik (F)` = statistic, `P-Value` = p)

# Menampilkan tabel
tabel_levene %>%
  kable(format = "simple", caption = "Hasil Uji Homogenitas Varians (Levene's Test)", digits = 4)
Hasil Uji Homogenitas Varians (Levene’s Test)
Variable df1 df2 Statistik (F) P-Value
Midterm Score 3 9996 1.2180 0.3014
Final Exam Score 3 9996 0.4389 0.7252

Perhitungan Manova

hasil_manova <- manova(cbind(midterm_score, final_exam_score) ~ parent_education, data = df_manova)
tabel_pillai <- tidy(hasil_manova, test = "Pillai")
tabel_wilks <- tidy(hasil_manova, test = "Wilks")

tabel_pillai %>%
  kable(format = "simple", digits = 4, caption = "Hasil MANOVA: Pillai's Trace")
Hasil MANOVA: Pillai’s Trace
term df pillai statistic num.df den.df p.value
parent_education 3 8e-04 1.3477 6 19992 0.2319
Residuals 9996 NA NA NA NA NA
tabel_wilks %>%
  kable(format = "simple", digits = 4, caption = "Hasil MANOVA: Wilks' Lambda")
Hasil MANOVA: Wilks’ Lambda
term df wilks statistic num.df den.df p.value
parent_education 3 0.9992 1.3477 6 19990 0.2319
Residuals 9996 NA NA NA NA NA

MANCOVA

Load Dataset

df <- read.csv("student_performance_data.csv")

df_mancova <- df %>%
  select(parent_education, midterm_score, final_exam_score, overall_score) %>%
  drop_na()

df_mancova$parent_education <- as.factor(df_mancova$parent_education)

# 1. Menampilkan 10 baris pertama dari data frame
df_mancova %>%
  head(10) %>%
  kable(format = "simple", caption = "Preview Data MANCOVA (10 Baris Pertama)")
Preview Data MANCOVA (10 Baris Pertama)
parent_education midterm_score final_exam_score overall_score
Master 70.70 53.10 52.3480
Bachelor 27.92 87.17 53.9485
PhD 70.92 99.61 82.0375
Bachelor 31.73 88.85 66.4110
Bachelor 78.28 54.23 65.6005
Bachelor 54.26 79.79 59.3525
High School 87.83 67.37 65.4890
Master 95.69 85.06 78.4685
Bachelor 56.28 40.72 53.7460
Bachelor 59.54 74.67 60.1865

Pemilihan Variabel Dependent, Independent dan Kovariat

struktur_data <- data.frame(
  Kolom = names(df_mancova),
  Tipe = sapply(df_mancova, class),
  Contoh_Data = sapply(df_mancova, function(x) paste0(head(x, 2), collapse = ", "))
)

struktur_data %>%
  kable(format = "simple", caption = "Struktur Variabel df_mancova", row.names = FALSE)
Struktur Variabel df_mancova
Kolom Tipe Contoh_Data
parent_education factor Master, Bachelor
midterm_score numeric 70.7, 27.92
final_exam_score numeric 53.1, 87.17
overall_score numeric 52.348, 53.9485

Uji Linearitas (Kovariat vs Variabel Dependen)

# 1. Hitung Korelasi
cor_midterm <- cor.test(df_mancova$overall_score, df_mancova$midterm_score)
cor_final <- cor.test(df_mancova$overall_score, df_mancova$final_exam_score)

# 2. Gabungkan ke dalam satu tabel (Data Frame)
tabel_linearitas <- data.frame(
  Pasangan_Variabel = c("Overall Score vs Midterm", "Overall Score vs Final Exam"),
  Korelasi_r = c(cor_midterm$estimate, cor_final$estimate),
  P_Value = c(cor_midterm$p.value, cor_final$p.value)
) %>%
  mutate(
    Status_Asumsi = if_else(P_Value < 0.05, "TERPENUHI (Linear)", "TIDAK TERPENUHI")
  )

# 3. Render ke HTML dengan kableExtra
tabel_linearitas %>%
  kbl(
    caption = "Hasil Uji Linearitas (Kovariat vs Variabel Dependen)",
    digits = 4,
    align = "lccc" # Aligment: Left, Center, Center, Center
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = F,
    position = "center"
  ) %>%
  column_spec(4, bold = TRUE, color = "white", 
              background = if_else(tabel_linearitas$P_Value < 0.05, "#28a745", "#dc3545"))
Hasil Uji Linearitas (Kovariat vs Variabel Dependen)
Pasangan_Variabel Korelasi_r P_Value Status_Asumsi
Overall Score vs Midterm 0.5289 0 TERPENUHI (Linear)
Overall Score vs Final Exam 0.6893 0 TERPENUHI (Linear)

Uji Homogenitas Kemiringan Regresi

# 1. Hitung Anova Type III untuk Midterm
model_midterm <- aov(midterm_score ~ parent_education * overall_score, data = df_mancova)
anova_midterm <- tidy(Anova(model_midterm, type = "III")) %>% 
  mutate(Variable = "Midterm Score")

# 2. Hitung Anova Type III untuk Final
model_final <- aov(final_exam_score ~ parent_education * overall_score, data = df_mancova)
anova_final <- tidy(Anova(model_final, type = "III")) %>% 
  mutate(Variable = "Final Exam Score")

# 3. Gabungkan dan filter hanya baris interaksi
tabel_homogenitas_slopes <- bind_rows(anova_midterm, anova_final) %>%
  filter(term == "parent_education:overall_score") %>%
  select(Variable, term, df, statistic, p.value) %>%
  mutate(Status = if_else(p.value > 0.05, "Aman (Homogen)", "Langgar (Tidak Homogen)"))

# 4. Tampilkan dengan kable
tabel_homogenitas_slopes %>%
  kable(
    format = "simple", 
    digits = 4,
    caption = "Uji Homogenitas Kemiringan Regresi (Interaksi Faktor * Kovariat)",
    col.names = c("Variabel Dependen", "Efek Interaksi", "df", "F-Statistik", "P-Value", "Status Asumsi")
  )
Uji Homogenitas Kemiringan Regresi (Interaksi Faktor * Kovariat)
Variabel Dependen Efek Interaksi df F-Statistik P-Value Status Asumsi
Midterm Score parent_education:overall_score 3 0.5920 0.6202 Aman (Homogen)
Final Exam Score parent_education:overall_score 3 0.5399 0.6550 Aman (Homogen)

Uji Multikolinearitas

# 1. Hitung Korelasi
korelasi_dvs <- cor.test(df_mancova$midterm_score, df_mancova$final_exam_score)

# 2. Buat Data Frame Hasil
tabel_multiko <- data.frame(
  Variabel_1 = "Midterm Score",
  Variabel_2 = "Final Exam Score",
  Korelasi_r = as.numeric(korelasi_dvs$estimate),
  P_Value = as.numeric(korelasi_dvs$p.value)
) %>%
  mutate(
    Status = if_else(abs(Korelasi_r) < 0.8, 
                     "AMAN (Terpenuhi)", 
                     "BAHAYA (Multikolinearitas)")
  )

# 3. Tampilkan dengan format HTML yang cantik
tabel_multiko %>%
  kbl(
    caption = "Uji Multikolinearitas antar Variabel Dependen",
    digits = 4,
    align = "llccc"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    full_width = F,
    position = "center"
  ) %>%
  column_spec(5, 
              bold = TRUE, 
              color = "white",
              background = if_else(abs(tabel_multiko$Korelasi_r) < 0.8, "#28a745", "#dc3545"))
Uji Multikolinearitas antar Variabel Dependen
Variabel_1 Variabel_2 Korelasi_r P_Value Status
Midterm Score Final Exam Score -0.0037 0.7121 AMAN (Terpenuhi)

Uji Homogenitas varians

# Hitung Tes
levene_mid <- df_mancova %>% levene_test(midterm_score ~ parent_education) %>% mutate(Variable = "Midterm Score")
levene_fin <- df_mancova %>% levene_test(final_exam_score ~ parent_education) %>% mutate(Variable = "Final Exam Score")

# Gabung dan Rapikan
tabel_levene_final <- bind_rows(levene_mid, levene_fin) %>%
  select(Variable, df1, df2, statistic, p) %>%
  mutate(Status = if_else(p > 0.05, "Homogen (Terpenuhi)", "Heterogen (Dilanggar)"))

# Render Tabel
tabel_levene_final %>%
  kbl(caption = "Hasil Uji Homogenitas Varians (Levene's Test)", digits = 4) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = F) %>%
  column_spec(5, bold = TRUE, color = "white", 
              background = if_else(tabel_levene_final$p > 0.05, "#28a745", "#dc3545"))
Hasil Uji Homogenitas Varians (Levene’s Test)
Variable df1 df2 statistic p Status
Midterm Score 3 9996 1.2180 0.3014 Homogen (Terpenuhi)
Final Exam Score 3 9996 0.4389 0.7252 Homogen (Terpenuhi)

Perhitungan Mancova

# 1. Menjalankan Model
Y <- cbind(df_mancova$midterm_score, df_mancova$final_exam_score)
mancova_model <- manova(Y ~ overall_score + parent_education, data = df_mancova)

# 2. Ekstrak hasil dan gabungkan
res_pillai <- tidy(mancova_model, test = "Pillai") %>% mutate(Test = "Pillai")
res_wilks  <- tidy(mancova_model, test = "Wilks") %>% mutate(Test = "Wilks")

tabel_mancova_multivariate <- bind_rows(res_pillai, res_wilks) %>%
  filter(term != "Residuals")

# 3. Tampilkan dengan kable (Menggunakan kolom yang tersedia secara otomatis)
tabel_mancova_multivariate %>%
  select(Test, term, df, statistic, everything()) %>% # Mengambil semua kolom yang ada
  kbl(caption = "Hasil Uji MANCOVA (Multivariate Test)", digits = 7) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  # Mengelompokkan baris berdasarkan jenis test
  pack_rows("Pillai's Trace", 1, 2) %>%
  pack_rows("Wilks' Lambda", 3, 4)
Hasil Uji MANCOVA (Multivariate Test)
Test term df statistic pillai num.df den.df p.value wilks
Pillai’s Trace
Pillai overall_score 1 15622.743961 0.7576595 2 9994 0.0000000 NA
Pillai parent_education 3 1.153909 0.0006925 6 19990 0.3280318 NA
Wilks’ Lambda
Wilks overall_score 1 15622.743961 NA 2 9994 0.0000000 0.2423405
Wilks parent_education 3 1.153938 NA 6 19988 0.3280153 0.9993076