Tahap 1 — Load Data & Inspeksi Awal

Install & load library

library(tidyverse)     
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'ggplot2' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ─────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)       
library(knitr)        
## Warning: package 'knitr' was built under R version 4.5.3
library(kableExtra)    
## Warning: package 'kableExtra' was built under R version 4.5.3
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(gridExtra)    
## Warning: package 'gridExtra' was built under R version 4.5.3
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(skimr)         
## Warning: package 'skimr' was built under R version 4.5.3
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.5.3
if (!require(MVN))       install.packages("MVN")
## Loading required package: MVN
## Warning: package 'MVN' was built under R version 4.5.3
## Registered S3 method overwritten by 'lme4':
##   method           from
##   na.action.merMod car
if (!require(ggpubr))    install.packages("ggpubr")
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.5.3
if (!require(car))     install.packages("car")
## Loading required package: car
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.3
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
if (!require(heplots)) install.packages("heplots")
## Loading required package: heplots
## Warning: package 'heplots' was built under R version 4.5.3
## Loading required package: broom
## Warning: package 'broom' was built under R version 4.5.3
library(car)
library(heplots)  

library(MVN)
library(ggpubr)   

Baca data

df_raw <- read.csv("student-mat.csv", sep = ";", stringsAsFactors = FALSE)

cat("Jumlah baris  :", nrow(df_raw), "\n")
## Jumlah baris  : 395
cat("Jumlah kolom  :", ncol(df_raw), "\n")
## Jumlah kolom  : 33
cat("Nama kolom    :\n")
## Nama kolom    :
print(names(df_raw))
##  [1] "school"     "sex"        "age"        "address"    "famsize"   
##  [6] "Pstatus"    "Medu"       "Fedu"       "Mjob"       "Fjob"      
## [11] "reason"     "guardian"   "traveltime" "studytime"  "failures"  
## [16] "schoolsup"  "famsup"     "paid"       "activities" "nursery"   
## [21] "higher"     "internet"   "romantic"   "famrel"     "freetime"  
## [26] "goout"      "Dalc"       "Walc"       "health"     "absences"  
## [31] "G1"         "G2"         "G3"

Cek struktur data

str(df_raw)
## 'data.frame':    395 obs. of  33 variables:
##  $ school    : chr  "GP" "GP" "GP" "GP" ...
##  $ sex       : chr  "F" "F" "F" "F" ...
##  $ age       : int  18 17 15 15 16 16 16 17 15 15 ...
##  $ address   : chr  "U" "U" "U" "U" ...
##  $ famsize   : chr  "GT3" "GT3" "LE3" "GT3" ...
##  $ Pstatus   : chr  "A" "T" "T" "T" ...
##  $ Medu      : int  4 1 1 4 3 4 2 4 3 3 ...
##  $ Fedu      : int  4 1 1 2 3 3 2 4 2 4 ...
##  $ Mjob      : chr  "at_home" "at_home" "at_home" "health" ...
##  $ Fjob      : chr  "teacher" "other" "other" "services" ...
##  $ reason    : chr  "course" "course" "other" "home" ...
##  $ guardian  : chr  "mother" "father" "mother" "mother" ...
##  $ traveltime: int  2 1 1 1 1 1 1 2 1 1 ...
##  $ studytime : int  2 2 2 3 2 2 2 2 2 2 ...
##  $ failures  : int  0 0 3 0 0 0 0 0 0 0 ...
##  $ schoolsup : chr  "yes" "no" "yes" "no" ...
##  $ famsup    : chr  "no" "yes" "no" "yes" ...
##  $ paid      : chr  "no" "no" "yes" "yes" ...
##  $ activities: chr  "no" "no" "no" "yes" ...
##  $ nursery   : chr  "yes" "no" "yes" "yes" ...
##  $ higher    : chr  "yes" "yes" "yes" "yes" ...
##  $ internet  : chr  "no" "yes" "yes" "yes" ...
##  $ romantic  : chr  "no" "no" "no" "yes" ...
##  $ famrel    : int  4 5 4 3 4 5 4 4 4 5 ...
##  $ freetime  : int  3 3 3 2 3 4 4 1 2 5 ...
##  $ goout     : int  4 3 2 2 2 2 4 4 2 1 ...
##  $ Dalc      : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ Walc      : int  1 1 3 1 2 2 1 1 1 1 ...
##  $ health    : int  3 3 3 5 5 5 3 1 1 5 ...
##  $ absences  : int  6 4 10 2 4 10 0 6 0 0 ...
##  $ G1        : int  5 5 7 15 6 15 12 6 16 14 ...
##  $ G2        : int  6 5 8 14 10 15 12 5 18 15 ...
##  $ G3        : int  6 6 10 15 10 15 11 6 19 15 ...

Preview 10 baris pertama

kable(head(df_raw, 10),
      caption = "Preview 10 Baris Pertama Dataset") %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    full_width  = FALSE,
    font_size   = 12
  ) %>%
  scroll_box(width = "100%", height = "320px")
Preview 10 Baris Pertama Dataset
school sex age address famsize Pstatus Medu Fedu Mjob Fjob reason guardian traveltime studytime failures schoolsup famsup paid activities nursery higher internet romantic famrel freetime goout Dalc Walc health absences G1 G2 G3
GP F 18 U GT3 A 4 4 at_home teacher course mother 2 2 0 yes no no no yes yes no no 4 3 4 1 1 3 6 5 6 6
GP F 17 U GT3 T 1 1 at_home other course father 1 2 0 no yes no no no yes yes no 5 3 3 1 1 3 4 5 5 6
GP F 15 U LE3 T 1 1 at_home other other mother 1 2 3 yes no yes no yes yes yes no 4 3 2 2 3 3 10 7 8 10
GP F 15 U GT3 T 4 2 health services home mother 1 3 0 no yes yes yes yes yes yes yes 3 2 2 1 1 5 2 15 14 15
GP F 16 U GT3 T 3 3 other other home father 1 2 0 no yes yes no yes yes no no 4 3 2 1 2 5 4 6 10 10
GP M 16 U LE3 T 4 3 services other reputation mother 1 2 0 no yes yes yes yes yes yes no 5 4 2 1 2 5 10 15 15 15
GP M 16 U LE3 T 2 2 other other home mother 1 2 0 no no no no yes yes yes no 4 4 4 1 1 3 0 12 12 11
GP F 17 U GT3 A 4 4 other teacher home mother 2 2 0 yes yes no no yes yes no no 4 1 4 1 1 1 6 6 5 6
GP M 15 U LE3 A 3 2 services other home mother 1 2 0 no yes yes no yes yes yes no 4 2 2 1 1 1 0 16 18 19
GP M 15 U GT3 T 3 4 other other home mother 1 2 0 no yes yes yes yes yes yes no 5 5 1 1 1 5 0 14 15 15

Cek tipe data setiap kolom

tipe_df <- data.frame(
  Kolom      = names(df_raw),
  Tipe       = sapply(df_raw, class),
  Contoh_Isi = sapply(df_raw, function(x) paste(head(unique(x), 3), collapse = " | ")),
  row.names  = NULL
)

kable(tipe_df, caption = "Tipe Data Setiap Kolom") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE, font_size = 12) %>%
  scroll_box(width = "100%", height = "400px")
Tipe Data Setiap Kolom
Kolom Tipe Contoh_Isi
school character GP &#124; MS
sex character F &#124; M
age integer 18 &#124; 17 &#124; 15
address character U &#124; R
famsize character GT3 &#124; LE3
Pstatus character A &#124; T
Medu integer 4 &#124; 1 &#124; 3
Fedu integer 4 &#124; 1 &#124; 2
Mjob character at_home &#124; health &#124; other
Fjob character teacher &#124; other &#124; services
reason character course &#124; other &#124; home
guardian character mother &#124; father &#124; other
traveltime integer 2 &#124; 1 &#124; 3
studytime integer 2 &#124; 3 &#124; 1
failures integer 0 &#124; 3 &#124; 2
schoolsup character yes &#124; no
famsup character no &#124; yes
paid character no &#124; yes
activities character no &#124; yes
nursery character yes &#124; no
higher character yes &#124; no
internet character no &#124; yes
romantic character no &#124; yes
famrel integer 4 &#124; 5 &#124; 3
freetime integer 3 &#124; 2 &#124; 4
goout integer 4 &#124; 3 &#124; 2
Dalc integer 1 &#124; 2 &#124; 5
Walc integer 1 &#124; 3 &#124; 2
health integer 3 &#124; 5 &#124; 1
absences integer 6 &#124; 4 &#124; 10
G1 integer 5 &#124; 7 &#124; 15
G2 integer 6 &#124; 5 &#124; 8
G3 integer 6 &#124; 10 &#124; 15

Nilai unik per kolom kategorikal

# Kolom yang bertipe karakter (kandidat kategorikal)
kolom_char <- names(df_raw)[sapply(df_raw, is.character)]

cat("Kolom kategorikal ditemukan:", paste(kolom_char, collapse = ", "), "\n\n")
## Kolom kategorikal ditemukan: school, sex, address, famsize, Pstatus, Mjob, Fjob, reason, guardian, schoolsup, famsup, paid, activities, nursery, higher, internet, romantic
for (kol in kolom_char) {
  cat(">> Kolom:", kol, "\n")
  print(table(df_raw[[kol]]))
  cat("\n")
}
## >> Kolom: school 
## 
##  GP  MS 
## 349  46 
## 
## >> Kolom: sex 
## 
##   F   M 
## 208 187 
## 
## >> Kolom: address 
## 
##   R   U 
##  88 307 
## 
## >> Kolom: famsize 
## 
## GT3 LE3 
## 281 114 
## 
## >> Kolom: Pstatus 
## 
##   A   T 
##  41 354 
## 
## >> Kolom: Mjob 
## 
##  at_home   health    other services  teacher 
##       59       34      141      103       58 
## 
## >> Kolom: Fjob 
## 
##  at_home   health    other services  teacher 
##       20       18      217      111       29 
## 
## >> Kolom: reason 
## 
##     course       home      other reputation 
##        145        109         36        105 
## 
## >> Kolom: guardian 
## 
## father mother  other 
##     90    273     32 
## 
## >> Kolom: schoolsup 
## 
##  no yes 
## 344  51 
## 
## >> Kolom: famsup 
## 
##  no yes 
## 153 242 
## 
## >> Kolom: paid 
## 
##  no yes 
## 214 181 
## 
## >> Kolom: activities 
## 
##  no yes 
## 194 201 
## 
## >> Kolom: nursery 
## 
##  no yes 
##  81 314 
## 
## >> Kolom: higher 
## 
##  no yes 
##  20 375 
## 
## >> Kolom: internet 
## 
##  no yes 
##  66 329 
## 
## >> Kolom: romantic 
## 
##  no yes 
## 263 132

Ringkasan distribusi semua variabel (skimr)

# Ringkasan lengkap: mean, sd, min, max, histogram mini, % missing
skim(df_raw)
Data summary
Name df_raw
Number of rows 395
Number of columns 33
_______________________
Column type frequency:
character 17
numeric 16
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
school 0 1 2 2 0 2 0
sex 0 1 1 1 0 2 0
address 0 1 1 1 0 2 0
famsize 0 1 3 3 0 2 0
Pstatus 0 1 1 1 0 2 0
Mjob 0 1 5 8 0 5 0
Fjob 0 1 5 8 0 5 0
reason 0 1 4 10 0 4 0
guardian 0 1 5 6 0 3 0
schoolsup 0 1 2 3 0 2 0
famsup 0 1 2 3 0 2 0
paid 0 1 2 3 0 2 0
activities 0 1 2 3 0 2 0
nursery 0 1 2 3 0 2 0
higher 0 1 2 3 0 2 0
internet 0 1 2 3 0 2 0
romantic 0 1 2 3 0 2 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 1 16.70 1.28 15 16 17 18 22 ▇▅▅▁▁
Medu 0 1 2.75 1.09 0 2 3 4 4 ▁▃▆▆▇
Fedu 0 1 2.52 1.09 0 2 2 3 4 ▁▆▇▇▇
traveltime 0 1 1.45 0.70 1 1 1 2 4 ▇▃▁▁▁
studytime 0 1 2.04 0.84 1 1 2 2 4 ▅▇▁▂▁
failures 0 1 0.33 0.74 0 0 0 0 3 ▇▁▁▁▁
famrel 0 1 3.94 0.90 1 4 4 5 5 ▁▁▃▇▅
freetime 0 1 3.24 1.00 1 3 3 4 5 ▁▃▇▆▂
goout 0 1 3.11 1.11 1 2 3 4 5 ▂▆▇▅▃
Dalc 0 1 1.48 0.89 1 1 1 2 5 ▇▂▁▁▁
Walc 0 1 2.29 1.29 1 1 2 3 5 ▇▅▅▃▂
health 0 1 3.55 1.39 1 3 4 5 5 ▂▂▅▃▇
absences 0 1 5.71 8.00 0 0 4 8 75 ▇▁▁▁▁
G1 0 1 10.91 3.32 3 8 11 13 19 ▂▇▇▆▂
G2 0 1 10.71 3.76 0 9 11 13 19 ▁▂▇▆▂
G3 0 1 10.42 4.58 0 8 11 14 20 ▂▃▇▅▁

Tahap 2 — Seleksi & Definisi Variabel

Peta variabel yang digunakan

Peta Variabel untuk MANOVA / ANCOVA / MANCOVA
Kode Kolom Tipe Deskripsi Dipakai_di
Y1 G1 Kontinu Nilai periode 1 (0–20) MANOVA, MANCOVA
Y2 G2 Kontinu Nilai periode 2 (0–20) MANOVA, MANCOVA
X1 sex Kategorikal Jenis kelamin: F = Perempuan, M = Laki-laki MANOVA, ANCOVA, MANCOVA
X2 address Kategorikal Alamat rumah: U = Urban, R = Rural MANOVA, MANCOVA
X3 absences Numerik (covariate) Jumlah hari tidak masuk sekolah ANCOVA, MANCOVA

Subset & konversi tipe data

df <- df_raw %>%
  select(
    sex,      # X1 — faktor kategorikal
    address,  # X2 — faktor kategorikal
    absences, # X3 — covariate numerik
    G1,       # Y1 — dependent variable 1
    G2        # Y2 — dependent variable 2
  ) %>%
  mutate(
    sex     = factor(sex,     levels = c("F", "M"),
                     labels  = c("Perempuan", "Laki-laki")),
    address = factor(address, levels = c("R", "U"),
                     labels  = c("Rural", "Urban")),
    G1      = as.numeric(G1),
    G2      = as.numeric(G2)
  )

cat("===== DATASET SETELAH SELEKSI =====\n")
## ===== DATASET SETELAH SELEKSI =====
cat("Dimensi:", nrow(df), "baris x", ncol(df), "kolom\n\n")
## Dimensi: 395 baris x 5 kolom
str(df)
## 'data.frame':    395 obs. of  5 variables:
##  $ sex     : Factor w/ 2 levels "Perempuan","Laki-laki": 1 1 1 1 1 2 2 1 2 2 ...
##  $ address : Factor w/ 2 levels "Rural","Urban": 2 2 2 2 2 2 2 2 2 2 ...
##  $ absences: int  6 4 10 2 4 10 0 6 0 0 ...
##  $ G1      : num  5 5 7 15 6 15 12 6 16 14 ...
##  $ G2      : num  6 5 8 14 10 15 12 5 18 15 ...

Cek hasil konversi

cat("Level X1 (sex)    :", levels(df$sex),     "\n")
## Level X1 (sex)    : Perempuan Laki-laki
cat("Level X2 (address):", levels(df$address), "\n")
## Level X2 (address): Rural Urban
cat("\nJumlah per grup sex:\n")
## 
## Jumlah per grup sex:
print(table(df$sex))
## 
## Perempuan Laki-laki 
##       208       187
cat("\nJumlah per grup address:\n")
## 
## Jumlah per grup address:
print(table(df$address))
## 
## Rural Urban 
##    88   307
cat("\nKombinasi X1 x X2:\n")
## 
## Kombinasi X1 x X2:
print(table(df$sex, df$address))
##            
##             Rural Urban
##   Perempuan    44   164
##   Laki-laki    44   143

Preview data bersih

kable(head(df, 10),
      caption = "Preview Dataset Setelah Seleksi Variabel") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Preview Dataset Setelah Seleksi Variabel
sex address absences G1 G2
Perempuan Urban 6 5 6
Perempuan Urban 4 5 5
Perempuan Urban 10 7 8
Perempuan Urban 2 15 14
Perempuan Urban 4 6 10
Laki-laki Urban 10 15 15
Laki-laki Urban 0 12 12
Perempuan Urban 6 6 5
Laki-laki Urban 0 16 18
Laki-laki Urban 0 14 15

Tahap 3 — Cek & Tangani Missing Values

Deteksi missing values

mv_count <- colSums(is.na(df))
mv_pct   <- round(colMeans(is.na(df)) * 100, 2)

mv_tabel <- data.frame(
  Variabel     = names(mv_count),
  Jumlah_NA    = as.integer(mv_count),
  Persen_NA    = paste0(mv_pct, "%"),
  Status       = ifelse(mv_count == 0, "Lengkap ✓", "Ada missing ✗")
)

kable(mv_tabel, caption = "Ringkasan Missing Values per Variabel") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  column_spec(4,
    color     = ifelse(mv_tabel$Jumlah_NA == 0, "darkgreen", "red"),
    bold      = TRUE
  )
Ringkasan Missing Values per Variabel
Variabel Jumlah_NA Persen_NA Status
sex sex 0 0% Lengkap ✓
address address 0 0% Lengkap ✓
absences absences 0 0% Lengkap ✓
G1 G1 0 0% Lengkap ✓
G2 G2 0 0% Lengkap ✓
total_mv <- sum(is.na(df))
cat("Total keseluruhan missing values:", total_mv, "\n")
## Total keseluruhan missing values: 0
if (total_mv == 0) {
  cat(">> Dataset BERSIH — tidak ada missing values.\n")
  cat(">> Analisis dapat langsung dilanjutkan ke tahap EDA.\n")
} else {
  cat(">> Ditemukan missing values! Lakukan penanganan di bawah.\n")
}
## >> Dataset BERSIH — tidak ada missing values.
## >> Analisis dapat langsung dilanjutkan ke tahap EDA.

Visualisasi pola missing values

plot_missing(df,
             title    = "Persentase Missing Values per Variabel",
             ggtheme  = theme_minimal(base_size = 13),
             theme_config = list(legend.position = "bottom"))

Panduan penanganan (jika ada missing values)

# --- Opsi A: Hapus baris yang mengandung NA (listwise deletion) ---
# Aman jika jumlah NA < 5% dari total data
df_clean <- df %>% drop_na()
cat("Baris setelah drop_na:", nrow(df_clean), "\n")

# --- Opsi B: Imputasi median untuk variabel NUMERIK ---
# Digunakan untuk: absences, G1, G2
df_imputasi <- df %>%
  mutate(
    absences = ifelse(is.na(absences), median(absences, na.rm = TRUE), absences),
    G1       = ifelse(is.na(G1),       median(G1,       na.rm = TRUE), G1),
    G2       = ifelse(is.na(G2),       median(G2,       na.rm = TRUE), G2)
  )

# --- Opsi C: Imputasi modus untuk variabel KATEGORIKAL ---
# Digunakan untuk: sex, address
modus <- function(x) {
  ux <- unique(x[!is.na(x)])
  ux[which.max(tabulate(match(x, ux)))]
}

df_imputasi <- df_imputasi %>%
  mutate(
    sex     = ifelse(is.na(sex),     modus(sex),     sex),
    address = ifelse(is.na(address), modus(address), address)
  )

# --- Verifikasi setelah imputasi ---
cat("Missing values setelah imputasi:", sum(is.na(df_imputasi)), "\n")

Konfirmasi data siap dianalisis

cat("===== KONFIRMASI DATASET FINAL =====\n")
## ===== KONFIRMASI DATASET FINAL =====
cat("Dimensi akhir   :", nrow(df), "baris x", ncol(df), "kolom\n")
## Dimensi akhir   : 395 baris x 5 kolom
cat("Missing values  :", sum(is.na(df)), "\n")
## Missing values  : 0
cat("Variabel        :", paste(names(df), collapse = ", "), "\n")
## Variabel        : sex, address, absences, G1, G2
cat("\nDataset siap untuk EDA dan uji asumsi.\n")
## 
## Dataset siap untuk EDA dan uji asumsi.

Tahap 4 — Eksplorasi Data (EDA)

Statistik deskriptif keseluruhan

deskriptif <- df %>%
  summarise(
    across(
      c(G1, G2, absences),
      list(
        N      = ~n(),
        Mean   = ~round(mean(.),   2),
        SD     = ~round(sd(.),     2),
        Median = ~round(median(.), 2),
        Min    = ~min(.),
        Max    = ~max(.)
      ),
      .names = "{.col}__{.fn}"
    )
  ) %>%
  pivot_longer(everything(),
               names_to  = c("Variabel", "Statistik"),
               names_sep = "__") %>%
  pivot_wider(names_from = Statistik, values_from = value)

kable(deskriptif,
      caption = "Statistik Deskriptif Variabel Numerik (Seluruh Data)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Statistik Deskriptif Variabel Numerik (Seluruh Data)
Variabel N Mean SD Median Min Max
G1 395 10.91 3.32 11 3 19
G2 395 10.71 3.76 11 0 19
absences 395 5.71 8.00 4 0 75

Statistik deskriptif per grup X1 (sex)

df %>%
  group_by(sex) %>%
  summarise(
    N        = n(),
    Mean_G1  = round(mean(G1),       2),
    SD_G1    = round(sd(G1),         2),
    Mean_G2  = round(mean(G2),       2),
    SD_G2    = round(sd(G2),         2),
    Mean_Abs = round(mean(absences), 2),
    SD_Abs   = round(sd(absences),   2)
  ) %>%
  kable(caption = "Statistik Deskriptif per Kelompok Jenis Kelamin (X1)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Statistik Deskriptif per Kelompok Jenis Kelamin (X1)
sex N Mean_G1 SD_G1 Mean_G2 SD_G2 Mean_Abs SD_Abs
Perempuan 208 10.62 3.23 10.39 3.64 6.22 9.45
Laki-laki 187 11.23 3.39 11.07 3.87 5.14 5.98

Statistik deskriptif per grup X2 (address)

df %>%
  group_by(address) %>%
  summarise(
    N        = n(),
    Mean_G1  = round(mean(G1),       2),
    SD_G1    = round(sd(G1),         2),
    Mean_G2  = round(mean(G2),       2),
    SD_G2    = round(sd(G2),         2),
    Mean_Abs = round(mean(absences), 2),
    SD_Abs   = round(sd(absences),   2)
  ) %>%
  kable(caption = "Statistik Deskriptif per Kelompok Alamat (X2)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Statistik Deskriptif per Kelompok Alamat (X2)
address N Mean_G1 SD_G1 Mean_G2 SD_G2 Mean_Abs SD_Abs
Rural 88 10.48 3.42 9.83 3.89 6.12 9.57
Urban 307 11.03 3.29 10.97 3.69 5.59 7.51

Statistik deskriptif kombinasi X1 × X2

df %>%
  group_by(sex, address) %>%
  summarise(
    N        = n(),
    Mean_G1  = round(mean(G1), 2),
    SD_G1    = round(sd(G1),   2),
    Mean_G2  = round(mean(G2), 2),
    SD_G2    = round(sd(G2),   2),
    .groups  = "drop"
  ) %>%
  kable(caption = "Statistik Deskriptif Kombinasi X1 × X2") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Statistik Deskriptif Kombinasi X1 × X2
sex address N Mean_G1 SD_G1 Mean_G2 SD_G2
Perempuan Rural 44 10.30 3.14 9.64 3.72
Perempuan Urban 164 10.71 3.26 10.59 3.61
Laki-laki Rural 44 10.66 3.70 10.02 4.09
Laki-laki Urban 143 11.41 3.29 11.40 3.75

Visualisasi distribusi

Histogram Y1 (G1) dan Y2 (G2)

df_long_hist <- df %>%
  pivot_longer(cols = c(G1, G2),
               names_to  = "Periode",
               values_to = "Nilai")

mean_lines <- df_long_hist %>%
  group_by(Periode) %>%
  summarise(mean_val = mean(Nilai, na.rm = TRUE))

df_long_hist %>%
  ggplot(aes(x = Nilai, fill = Periode)) +
  geom_histogram(binwidth = 1, color = "white", alpha = 0.85) +
  geom_vline(data = mean_lines,
             aes(xintercept = mean_val),
             linetype = "dashed", color = "gray40", linewidth = 0.5) +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1 — Nilai Periode 1",
                                              G2 = "Y2: G2 — Nilai Periode 2"))) +
  scale_fill_manual(values = c("G1" = "#7B1FA2", "G2" = "#0288D1")) +
  labs(title = "Distribusi Nilai G1 (Y1) dan G2 (Y2)",
       x = "Nilai (0–20)", y = "Frekuensi", fill = "DV") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

Boxplot Y1 & Y2 per X1 (sex)

df %>%
  pivot_longer(cols = c(G1, G2),
               names_to  = "Periode",
               values_to = "Nilai") %>%
  ggplot(aes(x = sex, y = Nilai, fill = sex)) +
  geom_boxplot(alpha = 0.75, outlier.shape = 21,
               outlier.fill = "white", outlier.color = "gray50") +
  geom_jitter(width = 0.15, alpha = 0.2, size = 1.2, color = "gray40") +
  stat_summary(fun = mean, geom = "point",
               shape = 18, size = 3, color = "black") +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1 — Periode 1",
                                              G2 = "Y2: G2 — Periode 2"))) +
  scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title    = "Distribusi Nilai G1 & G2 berdasarkan Jenis Kelamin (X1)",
       subtitle = "Berlian hitam = rata-rata kelompok",
       x = "Jenis Kelamin", y = "Nilai", fill = "Sex") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

Boxplot Y1 & Y2 per X2 (address)

df %>%
  pivot_longer(cols = c(G1, G2),
               names_to  = "Periode",
               values_to = "Nilai") %>%
  ggplot(aes(x = address, y = Nilai, fill = address)) +
  geom_boxplot(alpha = 0.75, outlier.shape = 21,
               outlier.fill = "white", outlier.color = "gray50") +
  geom_jitter(width = 0.15, alpha = 0.2, size = 1.2, color = "gray40") +
  stat_summary(fun = mean, geom = "point",
               shape = 18, size = 3, color = "black") +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1 — Periode 1",
                                              G2 = "Y2: G2 — Periode 2"))) +
  scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  labs(title    = "Distribusi Nilai G1 & G2 berdasarkan Alamat (X2)",
       subtitle = "Berlian hitam = rata-rata kelompok",
       x = "Alamat", y = "Nilai", fill = "Address") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

Boxplot kombinasi X1 × X2

df %>%
  pivot_longer(cols = c(G1, G2),
               names_to  = "Periode",
               values_to = "Nilai") %>%
  ggplot(aes(x = sex, y = Nilai, fill = address)) +
  geom_boxplot(alpha = 0.75, position = position_dodge(width = 0.8)) +
  stat_summary(fun = mean, geom = "point",
               shape = 18, size = 2.5, color = "black",
               position = position_dodge(width = 0.8)) +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1", G2 = "Y2: G2"))) +
  scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  labs(title    = "Nilai G1 & G2 berdasarkan Kombinasi Sex × Address",
       subtitle = "Melihat pola interaksi X1 × X2",
       x = "Jenis Kelamin", y = "Nilai", fill = "Address") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

Scatter plot X3 (absences) vs Y1 & Y2

p_abs_G1 <- ggplot(df, aes(x = absences, y = G1, color = sex, shape = address)) +
  geom_point(alpha = 0.55, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 0.9, aes(group = sex)) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title = "X3 (Absences) vs Y1 (G1)",
       x = "Jumlah Absen", y = "Nilai G1",
       color = "Sex", shape = "Address") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_abs_G2 <- ggplot(df, aes(x = absences, y = G2, color = sex, shape = address)) +
  geom_point(alpha = 0.55, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 0.9, aes(group = sex)) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title = "X3 (Absences) vs Y2 (G2)",
       x = "Jumlah Absen", y = "Nilai G2",
       color = "Sex", shape = "Address") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_abs_G1, p_abs_G2, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?

Distribusi X3 (absences) per grup

df %>%
  pivot_longer(cols = c(sex, address),
               names_to  = "Faktor",
               values_to = "Grup") %>%
  ggplot(aes(x = Grup, y = absences, fill = Grup)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21,
               outlier.fill = "white") +
  geom_jitter(width = 0.15, alpha = 0.2, size = 1, color = "gray40") +
  facet_wrap(~Faktor, scales = "free_x",
             labeller = labeller(Faktor = c(sex     = "X1: Sex",
                                             address = "X2: Address"))) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Distribusi Covariate X3 (Absences) per Grup",
       x = "Grup", y = "Jumlah Absen") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")


Korelasi antar variabel numerik

# Matriks korelasi antara Y1, Y2, dan X3
mat_cor <- cor(df[, c("G1", "G2", "absences")], method = "pearson")

kable(round(mat_cor, 4),
      caption = "Matriks Korelasi Pearson — Y1, Y2, X3") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Matriks Korelasi Pearson — Y1, Y2, X3
G1 G2 absences
G1 1.0000 0.8521 -0.0310
G2 0.8521 1.0000 -0.0318
absences -0.0310 -0.0318 1.0000
# Visualisasi korelasi dengan ggplot
cor_data <- as.data.frame(mat_cor) %>%
  rownames_to_column("Var1") %>%
  pivot_longer(-Var1, names_to = "Var2", values_to = "r")

ggplot(cor_data, aes(x = Var1, y = Var2, fill = r)) +
  geom_tile(color = "white", linewidth = 0.8) +
  geom_text(aes(label = round(r, 3)), size = 4.5, color = "white", fontface = "bold") +
  scale_fill_gradient2(low = "#1976D2", mid = "white", high = "#E91E8C",
                       midpoint = 0, limits = c(-1, 1)) +
  labs(title = "Heatmap Korelasi Pearson",
       subtitle = "Y1 (G1), Y2 (G2), X3 (absences)",
       x = NULL, y = NULL, fill = "r") +
  coord_fixed() +
  theme_minimal(base_size = 13) +
  theme(axis.text = element_text(size = 12, face = "bold"))

r_G1_G2  <- round(mat_cor["G1", "G2"],       3)
r_G1_abs <- round(mat_cor["G1", "absences"],  3)
r_G2_abs <- round(mat_cor["G2", "absences"],  3)

cat("===== INTERPRETASI KORELASI =====\n\n")
## ===== INTERPRETASI KORELASI =====
cat("r (G1, G2)       =", r_G1_G2,
    "—", ifelse(abs(r_G1_G2) < 0.90,
                "OK: tidak ada multikolinearitas ekstrem ✓",
                "PERINGATAN: korelasi terlalu tinggi ✗"), "\n")
## r (G1, G2)       = 0.852 — OK: tidak ada multikolinearitas ekstrem ✓
cat("r (G1, absences) =", r_G1_abs, "— hubungan covariate vs Y1\n")
## r (G1, absences) = -0.031 — hubungan covariate vs Y1
cat("r (G2, absences) =", r_G2_abs, "— hubungan covariate vs Y2\n")
## r (G2, absences) = -0.032 — hubungan covariate vs Y2
cat("\n>> Syarat MANOVA/MANCOVA: r antar DV < 0.90\n")
## 
## >> Syarat MANOVA/MANCOVA: r antar DV < 0.90
cat(">> Hasil:", ifelse(abs(r_G1_G2) < 0.90,
                        "Asumsi terpenuhi — lanjut ke uji asumsi berikutnya.",
                        "Pertimbangkan ulang pilihan variabel DV."), "\n")
## >> Hasil: Asumsi terpenuhi — lanjut ke uji asumsi berikutnya.

Profil data otomatis (DataExplorer)

# Menghasilkan laporan ringkas: distribusi, missing, korelasi
introduce(df) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("Info") %>%
  rename(Nilai = V1) %>%
  kable(caption = "Profil Dataset (DataExplorer)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Profil Dataset (DataExplorer)
Info Nilai
rows 395
columns 5
discrete_columns 2
continuous_columns 3
all_missing_columns 0
total_missing_values 0
complete_rows 395
total_observations 1975
memory_usage 14040

Uji Normalitas

##    UJI NORMALITAS (Univariat & Multivariat)
## Variabel DV : Y1 = G1,  Y2 = G2
## Faktor      : X1 = sex, X2 = address
## Syarat lulus: p > 0.05 → data berdistribusi normal

Uji Shapiro-Wilk Univariat per Variabel Keseluruhan

sw_G1 <- shapiro.test(df$G1)
sw_G2 <- shapiro.test(df$G2)

tabel_sw_global <- data.frame(
  Variabel  = c("Y1 (G1)", "Y2 (G2)"),
  Statistik = c(round(sw_G1$statistic, 4), round(sw_G2$statistic, 4)),
  p_value   = c(round(sw_G1$p.value,   4), round(sw_G2$p.value,   4)),
  Kesimpulan = c(
    ifelse(sw_G1$p.value > 0.05, "Normal (p > 0.05)", "Tidak Normal (p ≤ 0.05)"),
    ifelse(sw_G2$p.value > 0.05, "Normal (p > 0.05)", "Tidak Normal (p ≤ 0.05)")
  )
)

kable(tabel_sw_global,
      caption = "Shapiro-Wilk — Keseluruhan Data (Y1 & Y2)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(4,
    color = ifelse(tabel_sw_global$p_value > 0.05, "darkgreen", "red"),
    bold  = TRUE)
Shapiro-Wilk — Keseluruhan Data (Y1 & Y2)
Variabel Statistik p_value Kesimpulan
Y1 (G1) 0.9749 0 Tidak Normal (p ≤ 0.05)
Y2 (G2) 0.9691 0 Tidak Normal (p ≤ 0.05)

Uji Shapiro-Wilk per Grup X1 (sex)

hasil_sw_sex <- df %>%
  group_by(sex) %>%
  summarise(
    W_G1  = round(shapiro.test(G1)$statistic, 4),
    p_G1  = round(shapiro.test(G1)$p.value,   4),
    W_G2  = round(shapiro.test(G2)$statistic, 4),
    p_G2  = round(shapiro.test(G2)$p.value,   4),
    .groups = "drop"
  ) %>%
  mutate(
    Ket_G1 = ifelse(p_G1 > 0.05, "Normal", "Tidak Normal"),
    Ket_G2 = ifelse(p_G2 > 0.05, "Normal", "Tidak Normal")
  )

kable(hasil_sw_sex,
      caption = "Shapiro-Wilk per Grup X1 (sex) — Y1 (G1) & Y2 (G2)",
      col.names = c("Grup (sex)", "W (G1)", "p (G1)", "W (G2)", "p (G2)",
                    "Ket. G1", "Ket. G2")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(6, color = ifelse(hasil_sw_sex$p_G1 > 0.05, "darkgreen", "red"), bold = TRUE) %>%
  column_spec(7, color = ifelse(hasil_sw_sex$p_G2 > 0.05, "darkgreen", "red"), bold = TRUE)
Shapiro-Wilk per Grup X1 (sex) — Y1 (G1) & Y2 (G2)
Grup (sex) W (G1) p (G1) W (G2) p (G2) Ket. G1 Ket. G2
Perempuan 0.9655 0.0001 0.9665 1e-04 Tidak Normal Tidak Normal
Laki-laki 0.9814 0.0136 0.9680 3e-04 Tidak Normal Tidak Normal

Uji Shapiro-Wilk per Grup X2 (address)

hasil_sw_address <- df %>%
  group_by(address) %>%
  summarise(
    W_G1  = round(shapiro.test(G1)$statistic, 4),
    p_G1  = round(shapiro.test(G1)$p.value,   4),
    W_G2  = round(shapiro.test(G2)$statistic, 4),
    p_G2  = round(shapiro.test(G2)$p.value,   4),
    .groups = "drop"
  ) %>%
  mutate(
    Ket_G1 = ifelse(p_G1 > 0.05, "Normal", "Tidak Normal"),
    Ket_G2 = ifelse(p_G2 > 0.05, "Normal", "Tidak Normal")
  )

kable(hasil_sw_address,
      caption = "Shapiro-Wilk per Grup X2 (address) — Y1 (G1) & Y2 (G2)",
      col.names = c("Grup (address)", "W (G1)", "p (G1)", "W (G2)", "p (G2)",
                    "Ket. G1", "Ket. G2")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(6, color = ifelse(hasil_sw_address$p_G1 > 0.05, "darkgreen", "red"), bold = TRUE) %>%
  column_spec(7, color = ifelse(hasil_sw_address$p_G2 > 0.05, "darkgreen", "red"), bold = TRUE)
Shapiro-Wilk per Grup X2 (address) — Y1 (G1) & Y2 (G2)
Grup (address) W (G1) p (G1) W (G2) p (G2) Ket. G1 Ket. G2
Rural 0.9687 0.0316 0.9698 0.0375 Tidak Normal Tidak Normal
Urban 0.9740 0.0000 0.9668 0.0000 Tidak Normal Tidak Normal

QQ-Plot Univariat (Y1 & Y2 per Grup)

p_qq_G1_sex <- ggqqplot(df, x = "G1", color = "sex",
                         palette = c("#E91E8C", "#1976D2"),
                         title   = "QQ-Plot Y1 (G1) per Sex") +
  theme_minimal(base_size = 11)

p_qq_G2_sex <- ggqqplot(df, x = "G2", color = "sex",
                         palette = c("#E91E8C", "#1976D2"),
                         title   = "QQ-Plot Y2 (G2) per Sex") +
  theme_minimal(base_size = 11)

p_qq_G1_addr <- ggqqplot(df, x = "G1", color = "address",
                          palette = c("#FF8F00", "#388E3C"),
                          title   = "QQ-Plot Y1 (G1) per Address") +
  theme_minimal(base_size = 11)

p_qq_G2_addr <- ggqqplot(df, x = "G2", color = "address",
                          palette = c("#FF8F00", "#388E3C"),
                          title   = "QQ-Plot Y2 (G2) per Address") +
  theme_minimal(base_size = 11)

grid.arrange(p_qq_G1_sex, p_qq_G2_sex,
             p_qq_G1_addr, p_qq_G2_addr,
             ncol = 2,
             top  = "QQ-Plot Univariat — Y1 & Y2 per Grup (X1 & X2)")

Uji Normalitas Multivariat (Mardia) dan cbind(G1, G2) keseluruhan

mvn_global <- MVN::mvn(data         = df[, c("G1", "G2")],
                       mvn_test     = "mardia",
                       univariate_test = "SW")

cat("MARDIA'S TEST — Keseluruhan Data\n")
## MARDIA'S TEST — Keseluruhan Data
cat(" Multivariat:\n")
##  Multivariat:
summary(mvn_global, select = "mvn")
## 
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness   291.175  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis    19.052  <0.001 asymptotic ✗ Not normal
cat("\n Univariat (Shapiro-Wilk via MVN):\n")
## 
##  Univariat (Shapiro-Wilk via MVN):
summary(mvn_global, select = "univariate")
## 
## ── Univariate Normality Test Results ──────────────────────────────────────────────────────────────────────────────────────────────────────────
##           Test Variable Statistic p.value    Normality
## 1 Shapiro-Wilk       G1     0.975  <0.001 ✗ Not normal
## 2 Shapiro-Wilk       G2     0.969  <0.001 ✗ Not normal

Uji Normalitas Multivariat (Royston)

mvn_royston <- mvn(data    = df[, c("G1", "G2")],
                   mvn_test = "royston")

cat("ROYSTON'S TEST — Keseluruhan Data\n")
## ROYSTON'S TEST — Keseluruhan Data
summary(mvn_royston, select = "mvn")
## 
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
##      Test Statistic p.value     Method          MVN
## 1 Royston    39.332  <0.001 asymptotic ✗ Not normal

Uji Normalitas Multivariat per Grup X1 (sex)

cat("MARDIA'S TEST per Grup X1 (sex)\n")
## MARDIA'S TEST per Grup X1 (sex)
for (grp in levels(df$sex)) {
  cat(" Grup:", grp, "\n")
  sub_df <- df %>% filter(sex == grp) %>% select(G1, G2)
  
  hasil <- mvn(data    = sub_df,
               mvn_test = "mardia",
               desc    = FALSE)
  
  # print(hasil$multivariateNormality)
  summary(hasil, select = "mvn")
  cat("\n")
}
##  Grup: Perempuan
## 
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness   216.849  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis    17.812  <0.001 asymptotic ✗ Not normal
## 
##  Grup: Laki-laki
## 
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness    90.266  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis     8.687  <0.001 asymptotic ✗ Not normal

Uji Normalitas Multivariat per Grup X2 (address)

# ── Mardia per grup X2 (address) — syarat MANOVA & MANCOVA ──
cat("MARDIA'S TEST per Grup X2 (address)\n")
## MARDIA'S TEST per Grup X2 (address)
for (grp in levels(df$address)) {
  cat("─── Grup:", grp, "───\n")
  sub_df <- df %>% filter(address == grp) %>% select(G1, G2)
  
  hasil <- mvn(data    = sub_df,
               mvn_test = "mardia",
               desc    = FALSE)
  
  # print(hasil$multivariateNormality)
  summary(hasil, select = "mvn")
  cat("\n")
}
## ─── Grup: Rural ───
## 
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness    63.074  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis     6.598  <0.001 asymptotic ✗ Not normal
## 
## ─── Grup: Urban ───
## 
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness   193.528  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis    15.134  <0.001 asymptotic ✗ Not normal

QQ-Plot Multivariat (Chi-Square Plot)

# Keseluruhan
res_global <- MVN::mvn(data     = df[, c("G1", "G2")],
                       mvn_test = "mardia")
p1 <- plot(res_global, diagnostic = "multivariate", type = "qq")
p1 <- p1 + ggplot2::ggtitle("Chi-Square QQ-Plot\n(Keseluruhan Data)")

# Per grup: Perempuan
sub_p <- df %>% filter(sex == "Perempuan") %>% select(G1, G2)
res_p <- MVN::mvn(data     = sub_p,
                  mvn_test = "mardia")
p2 <- plot(res_p, diagnostic = "multivariate", type = "qq")
p2 <- p2 + ggplot2::ggtitle("Chi-Square QQ-Plot\n(Grup: Perempuan)")

gridExtra::grid.arrange(p1, p2, ncol = 2)

Uji Homogenitas Varians & Kovarians

Levene’s Test: Y1 (G1) per X1 (sex)

levene_G1_sex <- leveneTest(G1 ~ sex, data = df, center = mean)

cat("LEVENE'S TEST: Y1 (G1) ~ X1 (sex)\n")
## LEVENE'S TEST: Y1 (G1) ~ X1 (sex)
print(levene_G1_sex)
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.3946 0.5303
##       393
p_val <- levene_G1_sex$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
    ifelse(p_val > 0.05,
           "Varians HOMOGEN (p > 0.05)",
           "Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
## 
## Kesimpulan: p = 0.5303 → Varians HOMOGEN (p > 0.05)

Levene’s Test: Y2 (G2) per X1 (sex)

levene_G2_sex <- leveneTest(G2 ~ sex, data = df, center = mean)

cat("LEVENE'S TEST: Y2 (G2) ~ X1 (sex)\n")
## LEVENE'S TEST: Y2 (G2) ~ X1 (sex)
print(levene_G2_sex)
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.6075 0.4362
##       393
p_val <- levene_G2_sex$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
    ifelse(p_val > 0.05,
           "Varians HOMOGEN (p > 0.05)",
           "Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
## 
## Kesimpulan: p = 0.4362 → Varians HOMOGEN (p > 0.05)

Levene’s Test: Y1 (G1) per X2 (address)

levene_G1_addr <- leveneTest(G1 ~ address, data = df, center = mean)

cat("LEVENE'S TEST: Y1 (G1) ~ X2 (address)\n")
## LEVENE'S TEST: Y1 (G1) ~ X2 (address)
print(levene_G1_addr)
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.0033 0.9543
##       393
p_val <- levene_G1_addr$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
    ifelse(p_val > 0.05,
           "Varians HOMOGEN (p > 0.05)",
           "Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
## 
## Kesimpulan: p = 0.9543 → Varians HOMOGEN (p > 0.05)

Levene’s Test: Y2 (G2) per X2 (address)

levene_G2_addr <- leveneTest(G2 ~ address, data = df, center = mean)

cat("LEVENE'S TEST: Y2 (G2) ~ X2 (address)\n")
## LEVENE'S TEST: Y2 (G2) ~ X2 (address)
print(levene_G2_addr)
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.0509 0.8215
##       393
p_val <- levene_G2_addr$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
    ifelse(p_val > 0.05,
           "Varians HOMOGEN (p > 0.05)",
           "Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
## 
## Kesimpulan: p = 0.8215 → Varians HOMOGEN (p > 0.05)

Ringkasan Levene’s Test (Tabel Gabungan)

tabel_levene <- data.frame(
  DV      = c("Y1 (G1)", "Y2 (G2)", "Y1 (G1)", "Y2 (G2)"),
  Faktor  = c("X1 (sex)", "X1 (sex)", "X2 (address)", "X2 (address)"),
  F_stat  = c(
    round(levene_G1_sex$`F value`[1],   4),
    round(levene_G2_sex$`F value`[1],   4),
    round(levene_G1_addr$`F value`[1],  4),
    round(levene_G2_addr$`F value`[1],  4)
  ),
  p_value = c(
    round(levene_G1_sex$`Pr(>F)`[1],    4),
    round(levene_G2_sex$`Pr(>F)`[1],    4),
    round(levene_G1_addr$`Pr(>F)`[1],   4),
    round(levene_G2_addr$`Pr(>F)`[1],   4)
  )
)

tabel_levene$Kesimpulan <- ifelse(
  tabel_levene$p_value > 0.05,
  "Homogen",
  "Tidak Homogen"
)

kable(tabel_levene,
      caption = "Ringkasan Levene's Test — Homogenitas Varians Y1 & Y2") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(5,
    color = ifelse(tabel_levene$p_value > 0.05, "darkgreen", "red"),
    bold  = TRUE)
Ringkasan Levene’s Test — Homogenitas Varians Y1 & Y2
DV Faktor F_stat p_value Kesimpulan
Y1 (G1) X1 (sex) 0.3946 0.5303 Homogen
Y2 (G2) X1 (sex) 0.6075 0.4362 Homogen
Y1 (G1) X2 (address) 0.0033 0.9543 Homogen
Y2 (G2) X2 (address) 0.0509 0.8215 Homogen

Visualisasi Homogenitas Varians (Boxplot Levene)

p_var_sex <- df %>%
  pivot_longer(cols = c(G1, G2), names_to = "Periode", values_to = "Nilai") %>%
  ggplot(aes(x = sex, y = Nilai, fill = sex)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21,
               outlier.fill = "white", outlier.color = "gray50") +
  stat_summary(fun = mean, geom = "point",
               shape = 18, size = 3, color = "black") +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1", G2 = "Y2: G2"))) +
  scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title    = "Distribusi Y1 & Y2 per X1 (sex)",
       subtitle = "Berlian = rata-rata | Levene's Test: cek kesamaan penyebaran",
       x = "Jenis Kelamin", y = "Nilai", fill = "Sex") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_var_addr <- df %>%
  pivot_longer(cols = c(G1, G2), names_to = "Periode", values_to = "Nilai") %>%
  ggplot(aes(x = address, y = Nilai, fill = address)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21,
               outlier.fill = "white", outlier.color = "gray50") +
  stat_summary(fun = mean, geom = "point",
               shape = 18, size = 3, color = "black") +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1", G2 = "Y2: G2"))) +
  scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  labs(title    = "Distribusi Y1 & Y2 per X2 (address)",
       subtitle = "Berlian = rata-rata | Levene's Test: cek kesamaan penyebaran",
       x = "Alamat", y = "Nilai", fill = "Address") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_var_sex, p_var_addr, ncol = 2,
             top = "Visualisasi Homogenitas Varians — Pendukung Levene's Test")

Box’s M Test per X1 (sex) — untuk MANOVA & MANCOVA

boxm_sex <- boxM(cbind(G1, G2) ~ sex, data = df)

cat("BOX'S M TEST: cbind(G1,G2) ~ X1 (sex)\n")
## BOX'S M TEST: cbind(G1,G2) ~ X1 (sex)
print(boxm_sex)
## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  df 
## Chi-Sq (approx.) = 0.8588, df = 3, p-value = 0.8354
p_val_boxm_sex <- boxm_sex$p.value
cat("\nKesimpulan: p =", round(p_val_boxm_sex, 4), "→",
    ifelse(p_val_boxm_sex > 0.05,
           "Matriks kovarian HOMOGEN (p > 0.05)",
           "Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
## 
## Kesimpulan: p = 0.8354 → Matriks kovarian HOMOGEN (p > 0.05)
cat("\n[Catatan] Box's M sangat sensitif pada sampel besar.\n")
## 
## [Catatan] Box's M sangat sensitif pada sampel besar.
cat("Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa\n")
## Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa
cat("dilanjutkan dengan kehati-hatian (lihat Pillai's trace).\n")
## dilanjutkan dengan kehati-hatian (lihat Pillai's trace).

Box’s M Test per X2 (address) — untuk MANOVA & MANCOVA

boxm_addr <- boxM(cbind(G1, G2) ~ address, data = df)

cat("BOX'S M TEST: cbind(G1,G2) ~ X2 (address)\n")
## BOX'S M TEST: cbind(G1,G2) ~ X2 (address)
print(boxm_addr)
## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  df 
## Chi-Sq (approx.) = 20.035, df = 3, p-value = 0.0001669
p_val_boxm_addr <- boxm_addr$p.value
cat("\nKesimpulan: p =", round(p_val_boxm_addr, 4), "→",
    ifelse(p_val_boxm_addr > 0.05,
           "Matriks kovarian HOMOGEN (p > 0.05)",
           "Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
## 
## Kesimpulan: p = 2e-04 → Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)
cat("\n[Catatan] Box's M sangat sensitif pada sampel besar.\n")
## 
## [Catatan] Box's M sangat sensitif pada sampel besar.
cat("Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa\n")
## Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa
cat("dilanjutkan dengan kehati-hatian (lihat Pillai's trace).\n")
## dilanjutkan dengan kehati-hatian (lihat Pillai's trace).

Box’s M Test gabungan X1 × X2 (kombinasi faktor)

df_gabungan <- df %>%
  mutate(grup = interaction(sex, address, sep = " × "))

boxm_gabungan <- boxM(cbind(G1, G2) ~ grup, data = df_gabungan)

cat("BOX'S M TEST: cbind(G1,G2) ~ sex × address\n")
## BOX'S M TEST: cbind(G1,G2) ~ sex × address
print(boxm_gabungan)
## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  df_gabungan 
## Chi-Sq (approx.) = 27.3438, df = 9, p-value = 0.001227
p_val_boxm_gab <- boxm_gabungan$p.value
cat("\nKesimpulan: p =", round(p_val_boxm_gab, 4), "→",
    ifelse(p_val_boxm_gab > 0.05,
           "Matriks kovarian HOMOGEN (p > 0.05)",
           "Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
## 
## Kesimpulan: p = 0.0012 → Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)

Visualisasi Box’s M: Heatmap Matriks Korelasi per Grup

# ── Visualisasi: matriks korelasi per grup sebagai proxy kovarian ──

plot_cor_grup <- function(data_sub, label_judul) {
  mat <- cor(data_sub[, c("G1", "G2")], method = "pearson")
  df_cor <- as.data.frame(mat) %>%
    rownames_to_column("Var1") %>%
    pivot_longer(-Var1, names_to = "Var2", values_to = "r")
  
  ggplot(df_cor, aes(x = Var1, y = Var2, fill = r)) +
    geom_tile(color = "white", linewidth = 0.8) +
    geom_text(aes(label = round(r, 3)), size = 5,
              color = "white", fontface = "bold") +
    scale_fill_gradient2(low = "#1976D2", mid = "white", high = "#E91E8C",
                         midpoint = 0, limits = c(-1, 1)) +
    labs(title = label_judul, x = NULL, y = NULL, fill = "r") +
    coord_fixed() +
    theme_minimal(base_size = 11) +
    theme(axis.text = element_text(size = 11, face = "bold"))
}

p_cor_perempuan <- plot_cor_grup(df %>% filter(sex == "Perempuan"),
                                  "Korelasi G1-G2\n(Perempuan)")
p_cor_lakilaki  <- plot_cor_grup(df %>% filter(sex == "Laki-laki"),
                                  "Korelasi G1-G2\n(Laki-laki)")
p_cor_rural     <- plot_cor_grup(df %>% filter(address == "Rural"),
                                  "Korelasi G1-G2\n(Rural)")
p_cor_urban     <- plot_cor_grup(df %>% filter(address == "Urban"),
                                  "Korelasi G1-G2\n(Urban)")

grid.arrange(p_cor_perempuan, p_cor_lakilaki,
             p_cor_rural,     p_cor_urban,
             ncol = 4,
             top  = "Matriks Korelasi G1-G2 per Grup — Proxy Homogenitas Kovarian")

Ringkasan & Kesimpulan Tahap 6

p_lev_G1_sex  <- levene_G1_sex$`Pr(>F)`[1]
p_lev_G2_sex  <- levene_G2_sex$`Pr(>F)`[1]
p_lev_G1_addr <- levene_G1_addr$`Pr(>F)`[1]
p_lev_G2_addr <- levene_G2_addr$`Pr(>F)`[1]

tabel_ringkasan <- data.frame(
  Uji = c(
    "Levene (G1 ~ sex)",
    "Levene (G2 ~ sex)",
    "Levene (G1 ~ address)",
    "Levene (G2 ~ address)",
    "Box's M (G1,G2 ~ sex)",
    "Box's M (G1,G2 ~ address)",
    "Box's M (G1,G2 ~ sex×address)"
  ),
  p_value = c(
    round(p_lev_G1_sex,        4),
    round(p_lev_G2_sex,        4),
    round(p_lev_G1_addr,       4),
    round(p_lev_G2_addr,       4),
    round(p_val_boxm_sex,      4),
    round(p_val_boxm_addr,     4),
    round(p_val_boxm_gab,      4)
  ),
  Berlaku_untuk = c(
    "Semua metode",
    "Semua metode",
    "Semua metode",
    "Semua metode",
    "MANOVA & MANCOVA",
    "MANOVA & MANCOVA",
    "MANOVA & MANCOVA"
  )
)

tabel_ringkasan$Kesimpulan <- ifelse(
  tabel_ringkasan$p_value > 0.05,
  "Homogen",
  "Tidak Homogen"
)

kable(tabel_ringkasan,
      caption = "Ringkasan Tahap 6 — Semua Uji Homogenitas Varians & Kovarians") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(4,
    color = ifelse(tabel_ringkasan$p_value > 0.05, "darkgreen", "red"),
    bold  = TRUE) %>%
  row_spec(5:7, background = "#FFF9C4")
Ringkasan Tahap 6 — Semua Uji Homogenitas Varians & Kovarians
Uji p_value Berlaku_untuk Kesimpulan
Levene (G1 ~ sex) 0.5303 Semua metode Homogen
Levene (G2 ~ sex) 0.4362 Semua metode Homogen
Levene (G1 ~ address) 0.9543 Semua metode Homogen
Levene (G2 ~ address) 0.8215 Semua metode Homogen
Box’s M (G1,G2 ~ sex) 0.8354 MANOVA & MANCOVA Homogen
Box’s M (G1,G2 ~ address) 0.0002 MANOVA & MANCOVA Tidak Homogen
Box’s M (G1,G2 ~ sex×address) 0.0012 MANOVA & MANCOVA Tidak Homogen

Cek Multikolinearitas antar DV

Hitung Korelasi Pearson Y1 vs Y2

# ── Korelasi Pearson antara Y1 (G1) dan Y2 (G2) ──
r_G1_G2 <- cor(df$G1, df$G2, method = "pearson")

cat("===== KORELASI PEARSON: Y1 (G1) vs Y2 (G2) =====\n\n")
## ===== KORELASI PEARSON: Y1 (G1) vs Y2 (G2) =====
cat("r (G1, G2) =", round(r_G1_G2, 4), "\n\n")
## r (G1, G2) = 0.8521
cat("Kesimpulan:\n")
## Kesimpulan:
if (abs(r_G1_G2) < 0.90) {
  cat("  r =", round(r_G1_G2, 4), "< 0.90\n")
  cat("  → TIDAK ada multikolinearitas ekstrem\n")
  cat("  → G1 dan G2 cukup berbeda sebagai DV\n")
  cat("  → Analisis MANOVA & MANCOVA dapat dilanjutkan\n")
} else {
  cat("  r =", round(r_G1_G2, 4), "≥ 0.90\n")
  cat("  → MULTIKOLINEARITAS EKSTREM\n")
  cat("  → G1 dan G2 hampir identik, pertimbangkan ulang pilihan DV\n")
}
##   r = 0.8521 < 0.90
##   → TIDAK ada multikolinearitas ekstrem
##   → G1 dan G2 cukup berbeda sebagai DV
##   → Analisis MANOVA & MANCOVA dapat dilanjutkan

Uji Signifikansi Korelasi (cor.test)

cor_test_hasil <- cor.test(df$G1, df$G2, method = "pearson")

cat("UJI SIGNIFIKANSI KORELASI: G1 vs G2\n")
## UJI SIGNIFIKANSI KORELASI: G1 vs G2
cat("Statistik t :", round(cor_test_hasil$statistic, 4), "\n")
## Statistik t : 32.2778
cat("df          :", cor_test_hasil$parameter, "\n")
## df          : 393
cat("p-value     :", format(cor_test_hasil$p.value, scientific = TRUE), "\n")
## p-value     : 1.441347e-112
cat("95% CI      : [",
    round(cor_test_hasil$conf.int[1], 4), ",",
    round(cor_test_hasil$conf.int[2], 4), "]\n\n")
## 95% CI      : [ 0.8226 , 0.877 ]
cat("Interpretasi:\n")
## Interpretasi:
cat("  Korelasi r =", round(r_G1_G2, 4), "\n")
##   Korelasi r = 0.8521
if (cor_test_hasil$p.value < 0.05) {
  cat("  Signifikan secara statistik (p < 0.05)\n")
  cat("  → G1 dan G2 memang berkorelasi (bukan kebetulan)\n")
} else {
  cat("  Tidak signifikan (p ≥ 0.05)\n")
}
##   Signifikan secara statistik (p < 0.05)
##   → G1 dan G2 memang berkorelasi (bukan kebetulan)

Matriks Korelasi Lengkap (Y1, Y2, X3)

mat_cor_full <- cor(df[, c("G1", "G2", "absences")], method = "pearson")

cat("MATRIKS KORELASI PEARSON (G1, G2, absences)\n")
## MATRIKS KORELASI PEARSON (G1, G2, absences)
print(round(mat_cor_full, 4))
##               G1      G2 absences
## G1        1.0000  0.8521  -0.0310
## G2        0.8521  1.0000  -0.0318
## absences -0.0310 -0.0318   1.0000
kable(round(mat_cor_full, 4),
      caption = "Matriks Korelasi Pearson — Y1 (G1), Y2 (G2), X3 (absences)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE)
Matriks Korelasi Pearson — Y1 (G1), Y2 (G2), X3 (absences)
G1 G2 absences
G1 1.0000 0.8521 -0.0310
G2 0.8521 1.0000 -0.0318
absences -0.0310 -0.0318 1.0000

Visualisasi: Heatmap Korelasi

cor_data <- as.data.frame(mat_cor_full) %>%
  rownames_to_column("Var1") %>%
  pivot_longer(-Var1, names_to = "Var2", values_to = "r")

ggplot(cor_data, aes(x = Var1, y = Var2, fill = r)) +
  geom_tile(color = "white", linewidth = 1) +
  geom_text(aes(label = round(r, 3)),
            size = 5, color = "white", fontface = "bold") +
  scale_fill_gradient2(
    low = "#1976D2", mid = "white", high = "#E91E8C",
    midpoint = 0, limits = c(-1, 1)
  ) +
  labs(
    title    = "Heatmap Korelasi Pearson",
    subtitle = "Y1 (G1), Y2 (G2), X3 (absences)",
    x = NULL, y = NULL, fill = "r"
  ) +
  coord_fixed() +
  theme_minimal(base_size = 13) +
  theme(axis.text = element_text(size = 12, face = "bold"))

Visualisasi: Scatter Plot Y1 vs Y2 dengan Regresi

ggplot(df, aes(x = G1, y = G2, color = sex, shape = address)) +
  geom_point(alpha = 0.5, size = 2.5) +
  geom_smooth(method = "lm", se = TRUE,
              aes(group = 1),
              color = "gray30", linewidth = 1.2) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C",
                                "Laki-laki" = "#1976D2")) +
  annotate("text",
           x = max(df$G1, na.rm = TRUE) * 0.7,
           y = min(df$G2, na.rm = TRUE) * 1.1,
           label = paste0("r = ", round(r_G1_G2, 3)),
           size = 5, color = "gray20", fontface = "bold") +
  labs(
    title    = "Scatter Plot Y1 (G1) vs Y2 (G2)",
    subtitle = paste0("r = ", round(r_G1_G2, 3),
                      " | Korelasi kuat positif — DV berkorelasi tapi tidak identik"),
    x     = "Y1 (G1 — Nilai Periode 1)",
    y     = "Y2 (G2 — Nilai Periode 2)",
    color = "Sex", shape = "Address"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?

Visualisasi: Scatter Plot per Grup

# ── Scatter plot G1 vs G2 per grup (sex dan address) ──
p_scatter_sex <- ggplot(df, aes(x = G1, y = G2, color = sex)) +
  geom_point(alpha = 0.5, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C",
                                "Laki-laki" = "#1976D2")) +
  labs(
    title    = "G1 vs G2 per Grup Sex (X1)",
    x = "Y1 (G1)", y = "Y2 (G2)", color = "Sex"
  ) +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom")

p_scatter_addr <- ggplot(df, aes(x = G1, y = G2, color = address)) +
  geom_point(alpha = 0.5, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
  scale_color_manual(values = c("Rural" = "#FF8F00",
                                "Urban" = "#388E3C")) +
  labs(
    title    = "G1 vs G2 per Grup Address (X2)",
    x = "Y1 (G1)", y = "Y2 (G2)", color = "Address"
  ) +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom")

grid.arrange(p_scatter_sex, p_scatter_addr, ncol = 2,
             top = "Scatter Plot Y1 vs Y2 per Grup — Cek Multikolinearitas")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Korelasi per Grup (sex × address)

# ── Korelasi G1 vs G2 per setiap grup ──
cat("===== KORELASI G1 vs G2 PER GRUP =====\n\n")
## ===== KORELASI G1 vs G2 PER GRUP =====
# Per sex
cat("--- Per Grup X1 (sex) ---\n")
## --- Per Grup X1 (sex) ---
for (grp in levels(df$sex)) {
  sub <- df %>% filter(sex == grp)
  r   <- round(cor(sub$G1, sub$G2, method = "pearson"), 4)
  cat("  Grup:", grp, "→ r =", r,
      ifelse(abs(r) < 0.90, "✓ OK (< 0.90)", "✗ Terlalu Tinggi (≥ 0.90)"), "\n")
}
##   Grup: Perempuan → r = 0.8395 ✓ OK (< 0.90) 
##   Grup: Laki-laki → r = 0.8623 ✓ OK (< 0.90)
cat("\n--- Per Grup X2 (address) ---\n")
## 
## --- Per Grup X2 (address) ---
for (grp in levels(df$address)) {
  sub <- df %>% filter(address == grp)
  r   <- round(cor(sub$G1, sub$G2, method = "pearson"), 4)
  cat("  Grup:", grp, "→ r =", r,
      ifelse(abs(r) < 0.90, "✓ OK (< 0.90)", "✗ Terlalu Tinggi (≥ 0.90)"), "\n")
}
##   Grup: Rural → r = 0.77 ✓ OK (< 0.90) 
##   Grup: Urban → r = 0.8778 ✓ OK (< 0.90)
cat("\n--- Per Grup Kombinasi sex × address ---\n")
## 
## --- Per Grup Kombinasi sex × address ---
df_cek <- df %>%
  group_by(sex, address) %>%
  summarise(
    n   = n(),
    r   = round(cor(G1, G2, method = "pearson"), 4),
    .groups = "drop"
  ) %>%
  mutate(Status = ifelse(abs(r) < 0.90, "OK ✓", "Terlalu Tinggi ✗"))

print(df_cek)
## # A tibble: 4 × 5
##   sex       address     n     r Status          
##   <fct>     <fct>   <int> <dbl> <chr>           
## 1 Perempuan Rural      44 0.785 OK ✓            
## 2 Perempuan Urban     164 0.855 OK ✓            
## 3 Laki-laki Rural      44 0.758 OK ✓            
## 4 Laki-laki Urban     143 0.900 Terlalu Tinggi ✗

#8A Uji Linearitas: Scatter Plot dengan Garis Regresi

Scatter Plot X3 (absences) vs Y1 (G1) & Y2 (G2) - Keseluruhan

p_lin_G1 <- ggplot(df, aes(x = absences, y = G1)) +
  geom_point(alpha = 0.4, size = 2, color = "#7B1FA2") +
  geom_smooth(method = "lm",  se = TRUE, color = "#7B1FA2",
              linewidth = 1.2, linetype = "solid") +
  geom_smooth(method = "loess", se = FALSE, color = "gray40",
              linewidth = 0.9, linetype = "dashed") +
  labs(
    title    = "Linearitas X3 (absences) → Y1 (G1)",
    subtitle = "Ungu = garis regresi linear | Abu = LOESS (non-linear smoother)",
    x = "X3: Jumlah Absen (absences)",
    y = "Y1: Nilai Periode 1 (G1)"
  ) +
  theme_minimal(base_size = 13)

p_lin_G2 <- ggplot(df, aes(x = absences, y = G2)) +
  geom_point(alpha = 0.4, size = 2, color = "#0288D1") +
  geom_smooth(method = "lm",  se = TRUE, color = "#0288D1",
              linewidth = 1.2, linetype = "solid") +
  geom_smooth(method = "loess", se = FALSE, color = "gray40",
              linewidth = 0.9, linetype = "dashed") +
  labs(
    title    = "Linearitas X3 (absences) → Y2 (G2)",
    subtitle = "Biru = garis regresi linear | Abu = LOESS (non-linear smoother)",
    x = "X3: Jumlah Absen (absences)",
    y = "Y2: Nilai Periode 2 (G2)"
  ) +
  theme_minimal(base_size = 13)

grid.arrange(p_lin_G1, p_lin_G2, ncol = 2,
             top = "Uji Linearitas: X3 (absences) vs Y1 (G1) & Y2 (G2)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Scatter Plot per Grup X1 (sex)

p_lins_G1 <- ggplot(df, aes(x = absences, y = G1, color = sex)) +
  geom_point(alpha = 0.4, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = sex)) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  scale_fill_manual(values  = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(
    title    = "Linearitas per X1 (sex) → Y1 (G1)",
    subtitle = "Garis regresi terpisah per grup",
    x = "X3: Absences", y = "Y1: G1", color = "Sex", fill = "Sex"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_lins_G2 <- ggplot(df, aes(x = absences, y = G2, color = sex)) +
  geom_point(alpha = 0.4, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = sex)) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  scale_fill_manual(values  = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(
    title    = "Linearitas per X1 (sex) → Y2 (G2)",
    subtitle = "Garis regresi terpisah per grup",
    x = "X3: Absences", y = "Y2: G2", color = "Sex", fill = "Sex"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_lins_G1, p_lins_G2, ncol = 2,
             top = "Linearitas X3 → Y1 & Y2 per Grup X1 (sex)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Scatter Plot per Grup X2 (address)

p_lina_G1 <- ggplot(df, aes(x = absences, y = G1, color = address)) +
  geom_point(alpha = 0.4, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = address)) +
  scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  scale_fill_manual(values  = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  labs(
    title    = "Linearitas per X2 (address) → Y1 (G1)",
    subtitle = "Garis regresi terpisah per grup",
    x = "X3: Absences", y = "Y1: G1", color = "Address", fill = "Address"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_lina_G2 <- ggplot(df, aes(x = absences, y = G2, color = address)) +
  geom_point(alpha = 0.4, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = address)) +
  scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  scale_fill_manual(values  = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  labs(
    title    = "Linearitas per X2 (address) → Y2 (G2)",
    subtitle = "Garis regresi terpisah per grup",
    x = "X3: Absences", y = "Y2: G2", color = "Address", fill = "Address"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_lina_G1, p_lina_G2, ncol = 2,
             top = "Linearitas X3 → Y1 & Y2 per Grup X2 (address)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

8B Uji Linearitas: Korelasi Pearson X3 vs Y1, Y2

# ── Korelasi Pearson sebagai konfirmasi numerik linearitas ──
kor_global <- data.frame(
  Perbandingan = c("X3 (absences) vs Y1 (G1)", "X3 (absences) vs Y2 (G2)"),
  r            = c(
    round(cor(df$absences, df$G1, method = "pearson"), 4),
    round(cor(df$absences, df$G2, method = "pearson"), 4)
  ),
  p_value      = c(
    round(cor.test(df$absences, df$G1)$p.value, 4),
    round(cor.test(df$absences, df$G2)$p.value, 4)
  )
)

kor_global$Signifikan <- ifelse(kor_global$p_value < 0.05,
                                 "Signifikan (p < 0.05)",
                                 "Tidak Signifikan (p ≥ 0.05)")
kor_global$Keterangan <- ifelse(kor_global$p_value < 0.05,
                                 "Ada hubungan linear — covariate relevan ✓",
                                 "Hubungan linear lemah — covariate kurang kuat")

kable(kor_global,
      caption = "Korelasi Pearson: X3 (absences) vs Y1 (G1) & Y2 (G2)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(5, color = ifelse(kor_global$p_value < 0.05, "darkgreen", "orange"),
              bold = TRUE)
Korelasi Pearson: X3 (absences) vs Y1 (G1) & Y2 (G2)
Perbandingan r p_value Signifikan Keterangan
X3 (absences) vs Y1 (G1) -0.0310 0.5390 Tidak Signifikan (p ≥ 0.05) Hubungan linear lemah — covariate kurang kuat
X3 (absences) vs Y2 (G2) -0.0318 0.5289 Tidak Signifikan (p ≥ 0.05) Hubungan linear lemah — covariate kurang kuat
# ── Korelasi per grup ──
grp_kor <- bind_rows(
  df %>%
    group_by(Faktor = "sex", Grup = as.character(sex)) %>%
    summarise(
      r_G1    = round(cor(absences, G1), 4),
      p_G1    = round(cor.test(absences, G1)$p.value, 4),
      r_G2    = round(cor(absences, G2), 4),
      p_G2    = round(cor.test(absences, G2)$p.value, 4),
      .groups = "drop"
    ),
  df %>%
    group_by(Faktor = "address", Grup = as.character(address)) %>%
    summarise(
      r_G1    = round(cor(absences, G1), 4),
      p_G1    = round(cor.test(absences, G1)$p.value, 4),
      r_G2    = round(cor(absences, G2), 4),
      p_G2    = round(cor.test(absences, G2)$p.value, 4),
      .groups = "drop"
    )
)

kable(grp_kor,
      caption = "Korelasi Pearson X3 vs Y1 & Y2 per Grup",
      col.names = c("Faktor", "Grup", "r (G1)", "p (G1)", "r (G2)", "p (G2)")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE)
Korelasi Pearson X3 vs Y1 & Y2 per Grup
Faktor Grup r (G1) p (G1) r (G2) p (G2)
sex Laki-laki -0.1097 0.1349 -0.1188 0.1055
sex Perempuan 0.0244 0.7267 0.0290 0.6776
address Rural 0.0287 0.7905 0.0428 0.6920
address Urban -0.0511 0.3719 -0.0559 0.3287

8C Uji Homogenitas Slope (Interaksi Faktor × Covariate)

Model Interaksi: sex × absences → G1

model_int_G1_sex <- lm(G1 ~ sex * absences, data = df)

cat("MODEL: G1 ~ sex * absences\n")
## MODEL: G1 ~ sex * absences
cat(rep("─", 50), "\n", sep = "")
## ──────────────────────────────────────────────────
print(Anova(model_int_G1_sex, type = 2))
## Anova Table (Type II tests)
## 
## Response: G1
##              Sum Sq  Df F value  Pr(>F)  
## sex            35.1   1  3.2118 0.07388 .
## absences        2.7   1  0.2462 0.62003  
## sex:absences   24.4   1  2.2288 0.13627  
## Residuals    4277.0 391                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_int_G1_sex <- Anova(model_int_G1_sex, type = 2)["sex:absences", "Pr(>F)"]
cat("\nInteraksi sex × absences → G1: p =", round(p_int_G1_sex, 4), "→",
    ifelse(p_int_G1_sex > 0.05,
           "Slope HOMOGEN — asumsi terpenuhi ✓",
           "Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")
## 
## Interaksi sex × absences → G1: p = 0.1363 → Slope HOMOGEN — asumsi terpenuhi ✓

Model Interaksi: sex × absences → G2

model_int_G2_sex <- lm(G2 ~ sex * absences, data = df)

cat("MODEL: G2 ~ sex * absences\n")
## MODEL: G2 ~ sex * absences
cat(rep("─", 50), "\n", sep = "")
## ──────────────────────────────────────────────────
print(Anova(model_int_G2_sex, type = 2))
## Anova Table (Type II tests)
## 
## Response: G2
##              Sum Sq  Df F value  Pr(>F)  
## sex            44.3   1  3.1588 0.07629 .
## absences        3.7   1  0.2631 0.60830  
## sex:absences   37.9   1  2.6985 0.10125  
## Residuals    5486.8 391                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_int_G2_sex <- Anova(model_int_G2_sex, type = 2)["sex:absences", "Pr(>F)"]
cat("\nInteraksi sex × absences → G2: p =", round(p_int_G2_sex, 4), "→",
    ifelse(p_int_G2_sex > 0.05,
           "Slope HOMOGEN — asumsi terpenuhi ✓",
           "Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")
## 
## Interaksi sex × absences → G2: p = 0.1012 → Slope HOMOGEN — asumsi terpenuhi ✓

Model Interaksi: address × absences → G1

model_int_G1_addr <- lm(G1 ~ address * absences, data = df)

cat("MODEL: G1 ~ address * absences\n")
## MODEL: G1 ~ address * absences
cat(rep("─", 50), "\n", sep = "")
## ──────────────────────────────────────────────────
print(Anova(model_int_G1_addr, type = 2))
## Anova Table (Type II tests)
## 
## Response: G1
##                  Sum Sq  Df F value Pr(>F)
## address            20.6   1  1.8675 0.1725
## absences            3.7   1  0.3328 0.5644
## address:absences    5.8   1  0.5268 0.4684
## Residuals        4310.2 391
p_int_G1_addr <- Anova(model_int_G1_addr, type = 2)["address:absences", "Pr(>F)"]
cat("\nInteraksi address × absences → G1: p =", round(p_int_G1_addr, 4), "→",
    ifelse(p_int_G1_addr > 0.05,
           "Slope HOMOGEN — asumsi terpenuhi ✓",
           "Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")
## 
## Interaksi address × absences → G1: p = 0.4684 → Slope HOMOGEN — asumsi terpenuhi ✓

Model Interaksi: address × absences → G2

model_int_G2_addr <- lm(G2 ~ address * absences, data = df)

cat("MODEL: G2 ~ address * absences\n")
## MODEL: G2 ~ address * absences
cat(rep("─", 50), "\n", sep = "")
## ──────────────────────────────────────────────────
print(Anova(model_int_G2_addr, type = 2))
## Anova Table (Type II tests)
## 
## Response: G2
##                  Sum Sq  Df F value  Pr(>F)  
## address            87.4   1  6.2455 0.01286 *
## absences            4.5   1  0.3185 0.57282  
## address:absences   11.0   1  0.7861 0.37581  
## Residuals        5470.7 391                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_int_G2_addr <- Anova(model_int_G2_addr, type = 2)["address:absences", "Pr(>F)"]
cat("\nInteraksi address × absences → G2: p =", round(p_int_G2_addr, 4), "→",
    ifelse(p_int_G2_addr > 0.05,
           "Slope HOMOGEN — asumsi terpenuhi ✓",
           "Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")
## 
## Interaksi address × absences → G2: p = 0.3758 → Slope HOMOGEN — asumsi terpenuhi ✓

8D Visualisasi Homogenitas Slope

# ── Visualisasi slope per grup X1 (sex) ──
slope_sex <- df %>%
  group_by(sex) %>%
  summarise(
    slope_G1 = round(coef(lm(G1 ~ absences))[2], 4),
    slope_G2 = round(coef(lm(G2 ~ absences))[2], 4),
    .groups  = "drop"
  )

cat("Slope X3 → Y1 (G1) per Grup sex:\n")
## Slope X3 → Y1 (G1) per Grup sex:
print(slope_sex)
## # A tibble: 2 × 3
##   sex       slope_G1 slope_G2
##   <fct>        <dbl>    <dbl>
## 1 Perempuan   0.0083   0.0112
## 2 Laki-laki  -0.0623  -0.0768
p_slope_G1_sex <- ggplot(df, aes(x = absences, y = G1, color = sex)) +
  geom_point(alpha = 0.3, size = 1.8) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  annotate("text", x = 50, y = 18,
           label = paste0("Interaksi: p = ", round(p_int_G1_sex, 3)),
           size = 3.5, color = "gray30") +
  labs(title    = "Slope X3 → Y1 (G1) per Sex",
       subtitle = paste0("p-interaksi = ", round(p_int_G1_sex, 4),
                         ifelse(p_int_G1_sex > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
       x = "X3: Absences", y = "Y1: G1", color = "Sex") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_slope_G2_sex <- ggplot(df, aes(x = absences, y = G2, color = sex)) +
  geom_point(alpha = 0.3, size = 1.8) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  annotate("text", x = 50, y = 18,
           label = paste0("Interaksi: p = ", round(p_int_G2_sex, 3)),
           size = 3.5, color = "gray30") +
  labs(title    = "Slope X3 → Y2 (G2) per Sex",
       subtitle = paste0("p-interaksi = ", round(p_int_G2_sex, 4),
                         ifelse(p_int_G2_sex > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
       x = "X3: Absences", y = "Y2: G2", color = "Sex") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_slope_G1_sex, p_slope_G2_sex, ncol = 2,
             top = "Homogenitas Slope: X3 (absences) per Grup X1 (sex)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# ── Visualisasi slope per grup X2 (address) ──
slope_addr <- df %>%
  group_by(address) %>%
  summarise(
    slope_G1 = round(coef(lm(G1 ~ absences))[2], 4),
    slope_G2 = round(coef(lm(G2 ~ absences))[2], 4),
    .groups  = "drop"
  )

cat("Slope X3 → Y1 (G1) per Grup address:\n")
## Slope X3 → Y1 (G1) per Grup address:
print(slope_addr)
## # A tibble: 2 × 3
##   address slope_G1 slope_G2
##   <fct>      <dbl>    <dbl>
## 1 Rural     0.0102   0.0174
## 2 Urban    -0.0224  -0.0275
p_slope_G1_addr <- ggplot(df, aes(x = absences, y = G1, color = address)) +
  geom_point(alpha = 0.3, size = 1.8) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
  scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  annotate("text", x = 50, y = 18,
           label = paste0("Interaksi: p = ", round(p_int_G1_addr, 3)),
           size = 3.5, color = "gray30") +
  labs(title    = "Slope X3 → Y1 (G1) per Address",
       subtitle = paste0("p-interaksi = ", round(p_int_G1_addr, 4),
                         ifelse(p_int_G1_addr > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
       x = "X3: Absences", y = "Y1: G1", color = "Address") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_slope_G2_addr <- ggplot(df, aes(x = absences, y = G2, color = address)) +
  geom_point(alpha = 0.3, size = 1.8) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
  scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  annotate("text", x = 50, y = 18,
           label = paste0("Interaksi: p = ", round(p_int_G2_addr, 3)),
           size = 3.5, color = "gray30") +
  labs(title    = "Slope X3 → Y2 (G2) per Address",
       subtitle = paste0("p-interaksi = ", round(p_int_G2_addr, 4),
                         ifelse(p_int_G2_addr > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
       x = "X3: Absences", y = "Y2: G2", color = "Address") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_slope_G1_addr, p_slope_G2_addr, ncol = 2,
             top = "Homogenitas Slope: X3 (absences) per Grup X2 (address)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

8E Ringkasan Akhir Tahap 8

# ── Tabel ringkasan lengkap ──
tabel_step8 <- data.frame(
  Uji = c(
    "Linearitas (Pearson): X3 vs Y1 — Global",
    "Linearitas (Pearson): X3 vs Y2 — Global",
    "Homogenitas Slope: sex × absences → Y1 (G1)",
    "Homogenitas Slope: sex × absences → Y2 (G2)",
    "Homogenitas Slope: address × absences → Y1 (G1)",
    "Homogenitas Slope: address × absences → Y2 (G2)"
  ),
  p_value = c(
    round(cor.test(df$absences, df$G1)$p.value, 4),
    round(cor.test(df$absences, df$G2)$p.value, 4),
    round(p_int_G1_sex,  4),
    round(p_int_G2_sex,  4),
    round(p_int_G1_addr, 4),
    round(p_int_G2_addr, 4)
  ),
  Berlaku_untuk = c(
    "ANCOVA & MANCOVA",
    "ANCOVA & MANCOVA",
    "ANCOVA & MANCOVA",
    "ANCOVA & MANCOVA",
    "ANCOVA & MANCOVA",
    "ANCOVA & MANCOVA"
  )
)

tabel_step8$Kesimpulan <- c(
  ifelse(tabel_step8$p_value[1] < 0.05, "Hubungan linear signifikan ✓", "Hubungan linear lemah ⚠"),
  ifelse(tabel_step8$p_value[2] < 0.05, "Hubungan linear signifikan ✓", "Hubungan linear lemah ⚠"),
  ifelse(tabel_step8$p_value[3] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗"),
  ifelse(tabel_step8$p_value[4] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗"),
  ifelse(tabel_step8$p_value[5] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗"),
  ifelse(tabel_step8$p_value[6] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗")
)

# Warna baris
warna_kes <- c(
  ifelse(tabel_step8$p_value[1] < 0.05, "darkgreen", "orange"),
  ifelse(tabel_step8$p_value[2] < 0.05, "darkgreen", "orange"),
  ifelse(tabel_step8$p_value[3] > 0.05, "darkgreen", "red"),
  ifelse(tabel_step8$p_value[4] > 0.05, "darkgreen", "red"),
  ifelse(tabel_step8$p_value[5] > 0.05, "darkgreen", "red"),
  ifelse(tabel_step8$p_value[6] > 0.05, "darkgreen", "red")
)

kable(tabel_step8,
      caption = "Ringkasan Tahap 8 — Uji Linearitas & Homogenitas Slope") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(4, color = warna_kes, bold = TRUE) %>%
  row_spec(1:2, background = "#F3E5F5") %>%   # Linearitas — ungu muda
  row_spec(3:6, background = "#E3F2FD")         # Slope homogen — biru muda
Ringkasan Tahap 8 — Uji Linearitas & Homogenitas Slope
Uji p_value Berlaku_untuk Kesimpulan
Linearitas (Pearson): X3 vs Y1 — Global 0.5390 ANCOVA & MANCOVA Hubungan linear lemah ⚠
Linearitas (Pearson): X3 vs Y2 — Global 0.5289 ANCOVA & MANCOVA Hubungan linear lemah ⚠
Homogenitas Slope: sex × absences → Y1 (G1) 0.1363 ANCOVA & MANCOVA Slope Homogen ✓
Homogenitas Slope: sex × absences → Y2 (G2) 0.1012 ANCOVA & MANCOVA Slope Homogen ✓
Homogenitas Slope: address × absences → Y1 (G1) 0.4684 ANCOVA & MANCOVA Slope Homogen ✓
Homogenitas Slope: address × absences → Y2 (G2) 0.3758 ANCOVA & MANCOVA Slope Homogen ✓

Tahap 9 — Uji Utama: MANOVA, ANCOVA, dan MANCOVA


9A — MANOVA (Multivariate Analysis of Variance)

Model: Y1 + Y2 ~ X1 + X2 (tanpa covariate)
Statistik uji: Wilks’ Lambda
Pertanyaan: Apakah sex dan/atau address berpengaruh signifikan terhadap kombinasi G1 dan G2?

Fit model MANOVA

if (!require(heplots)) install.packages("heplots")
library(heplots)

# Fit MANOVA
manova_model <- manova(cbind(G1, G2) ~ sex + address, data = df)

cat("===== MANOVA: cbind(G1, G2) ~ sex + address =====\n")
## ===== MANOVA: cbind(G1, G2) ~ sex + address =====
summary(manova_model, test = "Wilks")
##            Df   Wilks approx F num Df den Df  Pr(>F)  
## sex         1 0.99087   1.8022      2    391 0.16630  
## address     1 0.97826   4.3442      2    391 0.01361 *
## Residuals 392                                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Tabel MANOVA — semua statistik multivariat

cat("\n--- Pillai's Trace ---\n")
## 
## --- Pillai's Trace ---
print(summary(manova_model, test = "Pillai"))
##            Df    Pillai approx F num Df den Df  Pr(>F)  
## sex         1 0.0091344   1.8022      2    391 0.16630  
## address     1 0.0217377   4.3442      2    391 0.01361 *
## Residuals 392                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Hotelling-Lawley Trace ---\n")
## 
## --- Hotelling-Lawley Trace ---
print(summary(manova_model, test = "Hotelling-Lawley"))
##            Df Hotelling-Lawley approx F num Df den Df  Pr(>F)  
## sex         1        0.0092186   1.8022      2    391 0.16630  
## address     1        0.0222208   4.3442      2    391 0.01361 *
## Residuals 392                                                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Roy's Largest Root ---\n")
## 
## --- Roy's Largest Root ---
print(summary(manova_model, test = "Roy"))
##            Df       Roy approx F num Df den Df  Pr(>F)  
## sex         1 0.0092186   1.8022      2    391 0.16630  
## address     1 0.0222208   4.3442      2    391 0.01361 *
## Residuals 392                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Univariate follow-up MANOVA

cat("===== ANOVA Univariat (follow-up MANOVA) =====\n")
## ===== ANOVA Univariat (follow-up MANOVA) =====
summary.aov(manova_model)
##  Response G1 :
##              Df Sum Sq Mean Sq F value  Pr(>F)  
## sex           1   36.6  36.611  3.3521 0.06788 .
## address       1   22.7  22.723  2.0805 0.14999  
## Residuals   392 4281.4  10.922                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response G2 :
##              Df Sum Sq Mean Sq F value  Pr(>F)  
## sex           1   46.3  46.265  3.3362 0.06853 .
## address       1   92.3  92.318  6.6571 0.01024 *
## Residuals   392 5436.1  13.868                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Ringkasan hasil MANOVA

wil_sex  <- summary(manova_model, test = "Wilks")$stats["sex",     ]
wil_addr <- summary(manova_model, test = "Wilks")$stats["address", ]

tabel_manova <- data.frame(
  Efek    = c("sex (X1)", "address (X2)"),
  Lambda  = round(c(wil_sex["Wilks"], wil_addr["Wilks"]), 4),
  F_value = round(c(wil_sex["approx F"], wil_addr["approx F"]), 4),
  p_value = round(c(wil_sex["Pr(>F)"], wil_addr["Pr(>F)"]), 4)
)
tabel_manova$Kesimpulan <- ifelse(
  tabel_manova$p_value < 0.05,
  "Signifikan — ada perbedaan multivariat ✓",
  "Tidak signifikan ✗"
)

kable(tabel_manova,
      caption = "Ringkasan MANOVA — Wilks' Lambda",
      col.names = c("Efek", "Wilks λ", "F", "p-value", "Kesimpulan")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(5,
    color = ifelse(tabel_manova$p_value < 0.05, "darkgreen", "red"),
    bold  = TRUE)
Ringkasan MANOVA — Wilks’ Lambda
Efek Wilks λ F p-value Kesimpulan
sex (X1) 0.9909 1.8022 0.1663 Tidak signifikan ✗
address (X2) 0.9783 4.3442 0.0136 Signifikan — ada perbedaan multivariat ✓

Visualisasi HE plot MANOVA

par(mfrow = c(1, 2))

heplot(manova_model,
       main  = "HE Plot: sex (X1)\n[G1 vs G2]",
       xlab  = "Y1: G1 (Nilai Periode 1)",
       ylab  = "Y2: G2 (Nilai Periode 2)",
       col   = c("blue", "red"),
       lwd   = 2)

heplot(manova_model,
       terms = "address",
       main  = "HE Plot: address (X2)\n[G1 vs G2]",
       xlab  = "Y1: G1 (Nilai Periode 1)",
       ylab  = "Y2: G2 (Nilai Periode 2)",
       col   = c("darkgreen", "orange"),
       lwd   = 2)

par(mfrow = c(1, 1))

9B — ANCOVA (Analysis of Covariance)

Model: Y1 ~ X1 + X3 (covariate: absences)
Tujuan: Mengendalikan pengaruh absensi (X3) sebelum menguji efek sex (X1) terhadap G1
Output utama: Adjusted means (rata-rata terkoreksi covariate)

Install package tambahan

if (!require(emmeans))  install.packages("emmeans")
## Loading required package: emmeans
## Warning: package 'emmeans' was built under R version 4.5.3
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
if (!require(effectsize)) install.packages("effectsize")
## Loading required package: effectsize
## Warning: package 'effectsize' was built under R version 4.5.3
library(emmeans)
library(effectsize)

Fit model ANCOVA

# ANCOVA: G1 ~ sex + absences
ancova_model <- lm(G1 ~ sex + absences, data = df)

cat("===== ANCOVA: G1 ~ sex + absences =====\n")
## ===== ANCOVA: G1 ~ sex + absences =====
print(Anova(ancova_model, type = 2))
## Anova Table (Type II tests)
## 
## Response: G1
##           Sum Sq  Df F value  Pr(>F)  
## sex         35.1   1  3.2017 0.07433 .
## absences     2.7   1  0.2454 0.62058  
## Residuals 4301.4 392                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Adjusted means (Estimated Marginal Means)

emm_ancova <- emmeans(ancova_model, ~ sex)

cat("===== Adjusted Means (Controlling for absences) =====\n")
## ===== Adjusted Means (Controlling for absences) =====
print(summary(emm_ancova))
##  sex       emmean    SE  df lower.CL upper.CL
##  Perempuan   10.6 0.230 392     10.2     11.1
##  Laki-laki   11.2 0.243 392     10.7     11.7
## 
## Confidence level used: 0.95
# Tabel rapi
emm_df <- as.data.frame(summary(emm_ancova))
kable(emm_df,
      caption = "Adjusted Means G1 per Sex (Setelah Kontrol Absences)",
      digits  = 3,
      col.names = c("Sex", "Adj. Mean", "SE", "df", "Lower CI", "Upper CI")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Adjusted Means G1 per Sex (Setelah Kontrol Absences)
Sex Adj. Mean SE df Lower CI Upper CI
Perempuan 10.625 0.230 392 10.173 11.077
Laki-laki 11.224 0.243 392 10.747 11.701

Perbandingan adjusted means (contrast)

cat("===== Pairwise Contrast (Adjusted Means) =====\n")
## ===== Pairwise Contrast (Adjusted Means) =====
pairs(emm_ancova)
##  contrast                estimate    SE  df t.ratio p.value
##  Perempuan - (Laki-laki)   -0.599 0.335 392  -1.789  0.0743

Effect size ANCOVA (Partial η²)

eta_ancova <- eta_squared(Anova(ancova_model, type = 2), partial = TRUE)

kable(eta_ancova,
      caption = "Effect Size ANCOVA — Partial η²",
      digits  = 4) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)
Effect Size ANCOVA — Partial η²
Parameter Eta2_partial CI CI_low CI_high
sex 0.0081 0.95 0 1
absences 0.0006 0.95 0 1

Visualisasi ANCOVA — adjusted means dengan CI

emm_plot_df <- as.data.frame(summary(emm_ancova))

ggplot(emm_plot_df, aes(x = sex, y = emmean, fill = sex)) +
  geom_col(alpha = 0.8, width = 0.5, color = "white") +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
                width = 0.15, linewidth = 0.9, color = "gray30") +
  geom_text(aes(label = round(emmean, 2)),
            vjust = -0.6, size = 4.5, fontface = "bold") +
  scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title    = "ANCOVA — Adjusted Means G1 per Sex",
       subtitle = "Setelah dikontrol covariate: Absences (X3)",
       x = "Jenis Kelamin (X1)", y = "Adjusted Mean G1", fill = "Sex") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

Visualisasi ANCOVA — scatter dengan garis regresi

ggplot(df, aes(x = absences, y = G1, color = sex)) +
  geom_point(alpha = 0.35, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.3) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title    = "ANCOVA — Hubungan Absences & G1 per Sex",
       subtitle = "Garis regresi paralel = asumsi homogenitas slope terpenuhi",
       x = "X3: Absences", y = "Y1: G1", color = "Sex") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'

Ringkasan ANCOVA

anv_ancova <- Anova(ancova_model, type = 2)
anv_df <- as.data.frame(anv_ancova)
anv_df$Variabel <- rownames(anv_df)

# Buang baris Residuals yang tidak punya p-value (NA)
anv_df <- anv_df[!is.na(anv_df$`Pr(>F)`), ]

anv_df$Kesimpulan <- ifelse(anv_df$`Pr(>F)` < 0.05,
                             "Signifikan ✓", "Tidak Signifikan ✗")
warna_kes <- ifelse(anv_df$`Pr(>F)` < 0.05, "darkgreen", "red")

kable(anv_df[, c("Variabel", "Sum Sq", "Df", "F value", "Pr(>F)", "Kesimpulan")],
      caption   = "Tabel ANCOVA: G1 ~ sex + absences",
      digits    = 4,
      row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(6, color = warna_kes, bold = TRUE)
Tabel ANCOVA: G1 ~ sex + absences
Variabel Sum Sq Df F value Pr(>F) Kesimpulan
sex 35.1325 1 3.2017 0.0743 Tidak Signifikan ✗
absences 2.6933 1 0.2454 0.6206 Tidak Signifikan ✗

9C — MANCOVA (Multivariate Analysis of Covariance)

Fit model MANCOVA

mancova_model <- manova(cbind(G1, G2) ~ sex + address + absences, data = df)

cat("===== MANCOVA: cbind(G1, G2) ~ sex + address + absences =====\n")
## ===== MANCOVA: cbind(G1, G2) ~ sex + address + absences =====
cat("\n--- Pillai's Trace ---\n")
## 
## --- Pillai's Trace ---
print(summary(mancova_model, test = "Pillai"))
##            Df    Pillai approx F num Df den Df  Pr(>F)  
## sex         1 0.0091394   1.7986      2    390 0.16690  
## address     1 0.0217429   4.3341      2    390 0.01375 *
## absences    1 0.0005512   0.1075      2    390 0.89807  
## Residuals 391                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Semua statistik multivariat MANCOVA

cat("\n--- Wilks' Lambda ---\n")
## 
## --- Wilks' Lambda ---
print(summary(mancova_model, test = "Wilks"))
##            Df   Wilks approx F num Df den Df  Pr(>F)  
## sex         1 0.99086   1.7986      2    390 0.16690  
## address     1 0.97826   4.3341      2    390 0.01375 *
## absences    1 0.99945   0.1075      2    390 0.89807  
## Residuals 391                                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Hotelling-Lawley Trace ---\n")
## 
## --- Hotelling-Lawley Trace ---
print(summary(mancova_model, test = "Hotelling-Lawley"))
##            Df Hotelling-Lawley approx F num Df den Df  Pr(>F)  
## sex         1        0.0092237   1.7986      2    390 0.16690  
## address     1        0.0222262   4.3341      2    390 0.01375 *
## absences    1        0.0005515   0.1075      2    390 0.89807  
## Residuals 391                                                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Roy's Largest Root ---\n")
## 
## --- Roy's Largest Root ---
print(summary(mancova_model, test = "Roy"))
##            Df       Roy approx F num Df den Df  Pr(>F)  
## sex         1 0.0092237   1.7986      2    390 0.16690  
## address     1 0.0222262   4.3341      2    390 0.01375 *
## absences    1 0.0005515   0.1075      2    390 0.89807  
## Residuals 391                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Univariate tests per DV setelah kontrol X3

cat("===== ANOVA Univariat per DV (setelah kontrol absences) =====\n")
## ===== ANOVA Univariat per DV (setelah kontrol absences) =====
summary.aov(mancova_model)
##  Response G1 :
##              Df Sum Sq Mean Sq F value  Pr(>F)  
## sex           1   36.6  36.611  3.3453 0.06816 .
## address       1   22.7  22.723  2.0762 0.15041  
## absences      1    2.2   2.248  0.2054 0.65062  
## Residuals   391 4279.1  10.944                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response G2 :
##              Df Sum Sq Mean Sq F value  Pr(>F)  
## sex           1   46.3  46.265  3.3293 0.06882 .
## address       1   92.3  92.318  6.6434 0.01032 *
## absences      1    2.7   2.674  0.1924 0.66115  
## Residuals   391 5433.4  13.896                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size MANCOVA

# Pillai ringkasan per efek
pil <- summary(mancova_model, test = "Pillai")$stats

tabel_mancova <- data.frame(
  Efek    = rownames(pil)[1:(nrow(pil)-1)],
  Pillai  = round(pil[1:(nrow(pil)-1), "Pillai"],      4),
  F_value = round(pil[1:(nrow(pil)-1), "approx F"],    4),
  p_value = round(pil[1:(nrow(pil)-1), "Pr(>F)"],      4)
)
tabel_mancova$Kesimpulan <- ifelse(
  tabel_mancova$p_value < 0.05,
  "Signifikan ✓",
  "Tidak Signifikan ✗"
)

kable(tabel_mancova,
      caption = "Ringkasan MANCOVA — Pillai's Trace",
      col.names = c("Efek", "Pillai", "F", "p-value", "Kesimpulan")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(5,
    color = ifelse(tabel_mancova$p_value < 0.05, "darkgreen", "red"),
    bold  = TRUE)
Ringkasan MANCOVA — Pillai’s Trace
Efek Pillai F p-value Kesimpulan
sex sex 0.0091 1.7986 0.1669 Tidak Signifikan ✗
address address 0.0217 4.3341 0.0138 Signifikan ✓
absences absences 0.0006 0.1075 0.8981 Tidak Signifikan ✗

Adjusted means per DV dari MANCOVA (emmeans)

# Model univariat per DV (kontrol absences)
lm_G1 <- lm(G1 ~ sex + address + absences, data = df)
lm_G2 <- lm(G2 ~ sex + address + absences, data = df)

emm_G1_sex  <- emmeans(lm_G1, ~ sex)
emm_G2_sex  <- emmeans(lm_G2, ~ sex)
emm_G1_addr <- emmeans(lm_G1, ~ address)
emm_G2_addr <- emmeans(lm_G2, ~ address)

cat("=== Adjusted Means G1 per sex ===\n")
## === Adjusted Means G1 per sex ===
print(summary(emm_G1_sex))
##  sex       emmean    SE  df lower.CL upper.CL
##  Perempuan   10.5 0.257 391     9.95     11.0
##  Laki-laki   11.1 0.264 391    10.55     11.6
## 
## Results are averaged over the levels of: address 
## Confidence level used: 0.95
cat("\n=== Adjusted Means G2 per sex ===\n")
## 
## === Adjusted Means G2 per sex ===
print(summary(emm_G2_sex))
##  sex       emmean    SE  df lower.CL upper.CL
##  Perempuan   10.1 0.290 391     9.49     10.6
##  Laki-laki   10.8 0.298 391    10.18     11.3
## 
## Results are averaged over the levels of: address 
## Confidence level used: 0.95
cat("\n=== Adjusted Means G1 per address ===\n")
## 
## === Adjusted Means G1 per address ===
print(summary(emm_G1_addr))
##  address emmean    SE  df lower.CL upper.CL
##  Rural     10.5 0.353 391     9.79     11.2
##  Urban     11.1 0.189 391    10.68     11.4
## 
## Results are averaged over the levels of: sex 
## Confidence level used: 0.95
cat("\n=== Adjusted Means G2 per address ===\n")
## 
## === Adjusted Means G2 per address ===
print(summary(emm_G2_addr))
##  address emmean    SE  df lower.CL upper.CL
##  Rural     9.83 0.398 391     9.05     10.6
##  Urban    10.99 0.213 391    10.57     11.4
## 
## Results are averaged over the levels of: sex 
## Confidence level used: 0.95

Visualisasi adjusted means MANCOVA

make_emm_plot <- function(emm_obj, fill_col, title_str, y_lab) {
  df_plot <- as.data.frame(summary(emm_obj))
  x_var   <- names(df_plot)[1]   # kolom pertama = faktor

  ggplot(df_plot, aes_string(x = x_var, y = "emmean", fill = x_var)) +
    geom_col(alpha = 0.82, width = 0.5, color = "white") +
    geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
                  width = 0.15, linewidth = 0.8, color = "gray30") +
    geom_text(aes(label = round(emmean, 2)),
              vjust = -0.55, size = 4, fontface = "bold") +
    scale_fill_manual(values = fill_col) +
    labs(title = title_str,
         x = x_var, y = y_lab) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "none")
}

p1 <- make_emm_plot(emm_G1_sex,
  c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
  "Adj. Mean G1 per Sex\n(dikontrol absences)", "Adj. Mean G1")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
p2 <- make_emm_plot(emm_G2_sex,
  c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
  "Adj. Mean G2 per Sex\n(dikontrol absences)", "Adj. Mean G2")

p3 <- make_emm_plot(emm_G1_addr,
  c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
  "Adj. Mean G1 per Address\n(dikontrol absences)", "Adj. Mean G1")

p4 <- make_emm_plot(emm_G2_addr,
  c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
  "Adj. Mean G2 per Address\n(dikontrol absences)", "Adj. Mean G2")

grid.arrange(p1, p2, p3, p4, ncol = 2,
             top = "MANCOVA — Adjusted Means (G1 & G2) setelah Kontrol Absences")

Perbandingan MANOVA vs MANCOVA

wil_manova <- summary(manova_model,  test = "Wilks")$stats
pil_mancova <- summary(mancova_model, test = "Pillai")$stats

efek_names <- c("sex", "address")
rows_manova  <- efek_names[efek_names %in% rownames(wil_manova)]
rows_mancova <- efek_names[efek_names %in% rownames(pil_mancova)]

compare_df <- data.frame(
  Efek          = efek_names,
  MANOVA_p      = round(wil_manova[rows_manova,  "Pr(>F)"], 4),
  MANCOVA_p     = round(pil_mancova[rows_mancova, "Pr(>F)"], 4)
)
compare_df$Perubahan <- ifelse(
  (compare_df$MANOVA_p < 0.05) == (compare_df$MANCOVA_p < 0.05),
  "Konsisten ✓", "Berubah setelah kontrol covariate ⚠"
)

kable(compare_df,
      caption = "Perbandingan Signifikansi MANOVA vs MANCOVA",
      col.names = c("Efek", "p-value MANOVA\n(Wilks)", "p-value MANCOVA\n(Pillai)", "Perubahan")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(4, bold = TRUE)
Perbandingan Signifikansi MANOVA vs MANCOVA
Efek p-value MANOVA (Wilks
p-value MANCOVA
(Pilla
)|Perubahan
sex sex 0.1663 0.1669 Konsisten ✓
address address 0.0136 0.0138 Konsisten ✓

Tahap 10 — Post-hoc & Interpretasi Hasil


10A — Post-hoc MANOVA: Tukey pairwise per DV

Post-hoc dilakukan pada variabel yang signifikan di MANOVA.
Karena X1 (sex) hanya 2 level, pairwise langsung informatif.

if (!require(emmeans))   install.packages("emmeans")
if (!require(multcomp))  install.packages("multcomp")
## Loading required package: multcomp
## Warning: package 'multcomp' was built under R version 4.5.3
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.5.3
## 
## Attaching package: 'mvtnorm'
## The following object is masked from 'package:effectsize':
## 
##     standardize
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:MVN':
## 
##     royston
## Loading required package: TH.data
## Warning: package 'TH.data' was built under R version 4.5.3
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
library(emmeans)
library(multcomp)

Post-hoc sex × G1

lm_G1_sex <- lm(G1 ~ sex, data = df)
emm_ph_G1_sex <- emmeans(lm_G1_sex, ~ sex)

cat("===== Post-hoc: sex → G1 (Tukey) =====\n")
## ===== Post-hoc: sex → G1 (Tukey) =====
print(pairs(emm_ph_G1_sex, adjust = "tukey"))
##  contrast                estimate    SE  df t.ratio p.value
##  Perempuan - (Laki-laki)    -0.61 0.333 393  -1.828  0.0683

Post-hoc sex × G2

lm_G2_sex <- lm(G2 ~ sex, data = df)
emm_ph_G2_sex <- emmeans(lm_G2_sex, ~ sex)

cat("===== Post-hoc: sex → G2 (Tukey) =====\n")
## ===== Post-hoc: sex → G2 (Tukey) =====
print(pairs(emm_ph_G2_sex, adjust = "tukey"))
##  contrast                estimate    SE  df t.ratio p.value
##  Perempuan - (Laki-laki)   -0.685 0.378 393  -1.814  0.0705

Post-hoc address × G1

lm_G1_addr <- lm(G1 ~ address, data = df)
emm_ph_G1_addr <- emmeans(lm_G1_addr, ~ address)

cat("===== Post-hoc: address → G1 (Tukey) =====\n")
## ===== Post-hoc: address → G1 (Tukey) =====
print(pairs(emm_ph_G1_addr, adjust = "tukey"))
##  contrast      estimate    SE  df t.ratio p.value
##  Rural - Urban   -0.555 0.401 393  -1.385  0.1668

Post-hoc address × G2

lm_G2_addr <- lm(G2 ~ address, data = df)
emm_ph_G2_addr <- emmeans(lm_G2_addr, ~ address)

cat("===== Post-hoc: address → G2 (Tukey) =====\n")
## ===== Post-hoc: address → G2 (Tukey) =====
print(pairs(emm_ph_G2_addr, adjust = "tukey"))
##  contrast      estimate    SE  df t.ratio p.value
##  Rural - Urban    -1.14 0.452 393  -2.519  0.0122

Visualisasi post-hoc: means + CI per grup

plot_posthoc <- function(emm_obj, fill_vals, title_str, y_lbl) {
  df_p <- as.data.frame(summary(emm_obj))
  xv   <- names(df_p)[1]
  ggplot(df_p, aes_string(x = xv, y = "emmean", fill = xv)) +
    geom_col(alpha = 0.8, width = 0.5, color = "white") +
    geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
                  width = 0.15, linewidth = 0.8, color = "gray40") +
    geom_text(aes(label = round(emmean, 2)),
              vjust = -0.5, size = 4.2, fontface = "bold") +
    scale_fill_manual(values = fill_vals) +
    labs(title = title_str, x = xv, y = y_lbl) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "none")
}

p_ph1 <- plot_posthoc(emm_ph_G1_sex,
  c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
  "Post-hoc: sex → G1", "Mean G1")

p_ph2 <- plot_posthoc(emm_ph_G2_sex,
  c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
  "Post-hoc: sex → G2", "Mean G2")

p_ph3 <- plot_posthoc(emm_ph_G1_addr,
  c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
  "Post-hoc: address → G1", "Mean G1")

p_ph4 <- plot_posthoc(emm_ph_G2_addr,
  c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
  "Post-hoc: address → G2", "Mean G2")

grid.arrange(p_ph1, p_ph2, p_ph3, p_ph4, ncol = 2,
             top = "Post-hoc Pairwise Comparisons (Tukey) — MANOVA")


10B — Effect Size (Partial η²) Semua Model

library(effectsize)

# MANOVA — univariate follow-up
aov_manova_G1 <- aov(G1 ~ sex + address, data = df)
aov_manova_G2 <- aov(G2 ~ sex + address, data = df)

eta_G1 <- eta_squared(aov_manova_G1, partial = TRUE)
eta_G2 <- eta_squared(aov_manova_G2, partial = TRUE)

cat("===== Effect Size MANOVA — Y1 (G1) =====\n")
## ===== Effect Size MANOVA — Y1 (G1) =====
print(eta_G1)
## # Effect Size for ANOVA (Type I)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## sex       |       8.48e-03 | [0.00, 1.00]
## address   |       5.28e-03 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].
cat("\n===== Effect Size MANOVA — Y2 (G2) =====\n")
## 
## ===== Effect Size MANOVA — Y2 (G2) =====
print(eta_G2)
## # Effect Size for ANOVA (Type I)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## sex       |       8.44e-03 | [0.00, 1.00]
## address   |           0.02 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].
# ANCOVA
cat("\n===== Effect Size ANCOVA — G1 ~ sex + absences =====\n")
## 
## ===== Effect Size ANCOVA — G1 ~ sex + absences =====
print(eta_squared(Anova(ancova_model, type = 2), partial = TRUE))
## # Effect Size for ANOVA (Type II)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## sex       |       8.10e-03 | [0.00, 1.00]
## absences  |       6.26e-04 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].
# MANCOVA univariate
aov_mc_G1 <- aov(G1 ~ sex + address + absences, data = df)
aov_mc_G2 <- aov(G2 ~ sex + address + absences, data = df)

cat("\n===== Effect Size MANCOVA — Y1 (G1) =====\n")
## 
## ===== Effect Size MANCOVA — Y1 (G1) =====
print(eta_squared(aov_mc_G1, partial = TRUE))
## # Effect Size for ANOVA (Type I)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## sex       |       8.48e-03 | [0.00, 1.00]
## address   |       5.28e-03 | [0.00, 1.00]
## absences  |       5.25e-04 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].
cat("\n===== Effect Size MANCOVA — Y2 (G2) =====\n")
## 
## ===== Effect Size MANCOVA — Y2 (G2) =====
print(eta_squared(aov_mc_G2, partial = TRUE))
## # Effect Size for ANOVA (Type I)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## sex       |       8.44e-03 | [0.00, 1.00]
## address   |           0.02 | [0.00, 1.00]
## absences  |       4.92e-04 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

Tabel gabungan effect size

# Ambil nilai partial eta squared
get_eta <- function(model, dv_name, model_name) {
  eta_obj <- tryCatch(
    eta_squared(model, partial = TRUE),
    error = function(e) NULL
  )
  if (is.null(eta_obj)) return(NULL)
  df_eta <- as.data.frame(eta_obj)
  df_eta$DV    <- dv_name
  df_eta$Model <- model_name
  df_eta
}

eta_rows <- rbind(
  get_eta(aov_manova_G1, "G1", "MANOVA"),
  get_eta(aov_manova_G2, "G2", "MANOVA"),
  get_eta(aov_mc_G1,     "G1", "MANCOVA"),
  get_eta(aov_mc_G2,     "G2", "MANCOVA")
)

if (!is.null(eta_rows)) {
  eta_rows$Interpretasi <- ifelse(
    eta_rows$Eta2_partial >= 0.14, "Besar (≥.14)",
    ifelse(eta_rows$Eta2_partial >= 0.06, "Sedang (≥.06)",
    ifelse(eta_rows$Eta2_partial >= 0.01, "Kecil (≥.01)", "Trivial (<.01)"))
  )

  kable(eta_rows[, c("Model", "DV", "Parameter", "Eta2_partial", "CI_low", "CI_high", "Interpretasi")],
        caption = "Tabel Effect Size (Partial η²) — MANOVA & MANCOVA",
        digits  = 4,
        row.names = FALSE) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                  full_width = TRUE) %>%
    column_spec(7, bold = TRUE,
      color = ifelse(eta_rows$Eta2_partial >= 0.06, "darkgreen",
              ifelse(eta_rows$Eta2_partial >= 0.01, "orange", "red")))
}
Tabel Effect Size (Partial η²) — MANOVA & MANCOVA
Model DV Parameter Eta2_partial CI_low CI_high Interpretasi
MANOVA G1 sex 0.0085 0.0000 1 Trivial (<.01)
MANOVA G1 address 0.0053 0.0000 1 Trivial (<.01)
MANOVA G2 sex 0.0084 0.0000 1 Trivial (<.01)
MANOVA G2 address 0.0167 0.0022 1 Kecil (≥.01)
MANCOVA G1 sex 0.0085 0.0000 1 Trivial (<.01)
MANCOVA G1 address 0.0053 0.0000 1 Trivial (<.01)
MANCOVA G1 absences 0.0005 0.0000 1 Trivial (<.01)
MANCOVA G2 sex 0.0084 0.0000 1 Trivial (<.01)
MANCOVA G2 address 0.0167 0.0022 1 Kecil (≥.01)
MANCOVA G2 absences 0.0005 0.0000 1 Trivial (<.01)