Tahap 1 — Load Data & Inspeksi Awal

Install & load library

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.5.3

## Warning: package 'ggplot2' was built under R version 4.5.3

## Warning: package 'readr' was built under R version 4.5.3

## Warning: package 'forcats' was built under R version 4.5.3

## Warning: package 'lubridate' was built under R version 4.5.3

## ── Attaching core tidyverse packages ─────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggplot2)       
library(knitr)

## Warning: package 'knitr' was built under R version 4.5.3

library(kableExtra)

## Warning: package 'kableExtra' was built under R version 4.5.3

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(gridExtra)

## Warning: package 'gridExtra' was built under R version 4.5.3

## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

library(skimr)

## Warning: package 'skimr' was built under R version 4.5.3

library(DataExplorer)

## Warning: package 'DataExplorer' was built under R version 4.5.3

if (!require(MVN))       install.packages("MVN")

## Loading required package: MVN

## Warning: package 'MVN' was built under R version 4.5.3

## Registered S3 method overwritten by 'lme4':
##   method           from
##   na.action.merMod car

if (!require(ggpubr))    install.packages("ggpubr")

## Loading required package: ggpubr

## Warning: package 'ggpubr' was built under R version 4.5.3

if (!require(car))     install.packages("car")

## Loading required package: car

## Warning: package 'car' was built under R version 4.5.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.5.3

## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

if (!require(heplots)) install.packages("heplots")

## Loading required package: heplots

## Warning: package 'heplots' was built under R version 4.5.3

## Loading required package: broom

## Warning: package 'broom' was built under R version 4.5.3

library(car)
library(heplots)  

library(MVN)
library(ggpubr)

Baca data

df_raw <- read.csv("student-mat.csv", sep = ";", stringsAsFactors = FALSE)

cat("Jumlah baris  :", nrow(df_raw), "\n")

## Jumlah baris  : 395

cat("Jumlah kolom  :", ncol(df_raw), "\n")

## Jumlah kolom  : 33

cat("Nama kolom    :\n")

## Nama kolom    :

print(names(df_raw))

##  [1] "school"     "sex"        "age"        "address"    "famsize"   
##  [6] "Pstatus"    "Medu"       "Fedu"       "Mjob"       "Fjob"      
## [11] "reason"     "guardian"   "traveltime" "studytime"  "failures"  
## [16] "schoolsup"  "famsup"     "paid"       "activities" "nursery"   
## [21] "higher"     "internet"   "romantic"   "famrel"     "freetime"  
## [26] "goout"      "Dalc"       "Walc"       "health"     "absences"  
## [31] "G1"         "G2"         "G3"

Cek struktur data

str(df_raw)

## 'data.frame':    395 obs. of  33 variables:
##  $ school    : chr  "GP" "GP" "GP" "GP" ...
##  $ sex       : chr  "F" "F" "F" "F" ...
##  $ age       : int  18 17 15 15 16 16 16 17 15 15 ...
##  $ address   : chr  "U" "U" "U" "U" ...
##  $ famsize   : chr  "GT3" "GT3" "LE3" "GT3" ...
##  $ Pstatus   : chr  "A" "T" "T" "T" ...
##  $ Medu      : int  4 1 1 4 3 4 2 4 3 3 ...
##  $ Fedu      : int  4 1 1 2 3 3 2 4 2 4 ...
##  $ Mjob      : chr  "at_home" "at_home" "at_home" "health" ...
##  $ Fjob      : chr  "teacher" "other" "other" "services" ...
##  $ reason    : chr  "course" "course" "other" "home" ...
##  $ guardian  : chr  "mother" "father" "mother" "mother" ...
##  $ traveltime: int  2 1 1 1 1 1 1 2 1 1 ...
##  $ studytime : int  2 2 2 3 2 2 2 2 2 2 ...
##  $ failures  : int  0 0 3 0 0 0 0 0 0 0 ...
##  $ schoolsup : chr  "yes" "no" "yes" "no" ...
##  $ famsup    : chr  "no" "yes" "no" "yes" ...
##  $ paid      : chr  "no" "no" "yes" "yes" ...
##  $ activities: chr  "no" "no" "no" "yes" ...
##  $ nursery   : chr  "yes" "no" "yes" "yes" ...
##  $ higher    : chr  "yes" "yes" "yes" "yes" ...
##  $ internet  : chr  "no" "yes" "yes" "yes" ...
##  $ romantic  : chr  "no" "no" "no" "yes" ...
##  $ famrel    : int  4 5 4 3 4 5 4 4 4 5 ...
##  $ freetime  : int  3 3 3 2 3 4 4 1 2 5 ...
##  $ goout     : int  4 3 2 2 2 2 4 4 2 1 ...
##  $ Dalc      : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ Walc      : int  1 1 3 1 2 2 1 1 1 1 ...
##  $ health    : int  3 3 3 5 5 5 3 1 1 5 ...
##  $ absences  : int  6 4 10 2 4 10 0 6 0 0 ...
##  $ G1        : int  5 5 7 15 6 15 12 6 16 14 ...
##  $ G2        : int  6 5 8 14 10 15 12 5 18 15 ...
##  $ G3        : int  6 6 10 15 10 15 11 6 19 15 ...

Preview 10 baris pertama

kable(head(df_raw, 10),
      caption = "Preview 10 Baris Pertama Dataset") %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    full_width  = FALSE,
    font_size   = 12
  ) %>%
  scroll_box(width = "100%", height = "320px")

Preview 10 Baris Pertama Dataset
school	sex	age	address	famsize	Pstatus	Medu	Fedu	Mjob	Fjob	reason	guardian	traveltime	studytime	failures	schoolsup	famsup	paid	activities	nursery	higher	internet	romantic	famrel	freetime	goout	Dalc	Walc	health	absences	G1	G2	G3
GP	F	18	U	GT3	A	4	4	at_home	teacher	course	mother	2	2	0	yes	no	no	no	yes	yes	no	no	4	3	4	1	1	3	6	5	6	6
GP	F	17	U	GT3	T	1	1	at_home	other	course	father	1	2	0	no	yes	no	no	no	yes	yes	no	5	3	3	1	1	3	4	5	5	6
GP	F	15	U	LE3	T	1	1	at_home	other	other	mother	1	2	3	yes	no	yes	no	yes	yes	yes	no	4	3	2	2	3	3	10	7	8	10
GP	F	15	U	GT3	T	4	2	health	services	home	mother	1	3	0	no	yes	yes	yes	yes	yes	yes	yes	3	2	2	1	1	5	2	15	14	15
GP	F	16	U	GT3	T	3	3	other	other	home	father	1	2	0	no	yes	yes	no	yes	yes	no	no	4	3	2	1	2	5	4	6	10	10
GP	M	16	U	LE3	T	4	3	services	other	reputation	mother	1	2	0	no	yes	yes	yes	yes	yes	yes	no	5	4	2	1	2	5	10	15	15	15
GP	M	16	U	LE3	T	2	2	other	other	home	mother	1	2	0	no	no	no	no	yes	yes	yes	no	4	4	4	1	1	3	0	12	12	11
GP	F	17	U	GT3	A	4	4	other	teacher	home	mother	2	2	0	yes	yes	no	no	yes	yes	no	no	4	1	4	1	1	1	6	6	5	6
GP	M	15	U	LE3	A	3	2	services	other	home	mother	1	2	0	no	yes	yes	no	yes	yes	yes	no	4	2	2	1	1	1	0	16	18	19
GP	M	15	U	GT3	T	3	4	other	other	home	mother	1	2	0	no	yes	yes	yes	yes	yes	yes	no	5	5	1	1	1	5	0	14	15	15

Cek tipe data setiap kolom

tipe_df <- data.frame(
  Kolom      = names(df_raw),
  Tipe       = sapply(df_raw, class),
  Contoh_Isi = sapply(df_raw, function(x) paste(head(unique(x), 3), collapse = " | ")),
  row.names  = NULL
)

kable(tipe_df, caption = "Tipe Data Setiap Kolom") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE, font_size = 12) %>%
  scroll_box(width = "100%", height = "400px")

Tipe Data Setiap Kolom
Kolom	Tipe	Contoh_Isi
school	character	GP \| MS
sex	character	F \| M
age	integer	18 \| 17 \| 15
address	character	U \| R
famsize	character	GT3 \| LE3
Pstatus	character	A \| T
Medu	integer	4 \| 1 \| 3
Fedu	integer	4 \| 1 \| 2
Mjob	character	at_home \| health \| other
Fjob	character	teacher \| other \| services
reason	character	course \| other \| home
guardian	character	mother \| father \| other
traveltime	integer	2 \| 1 \| 3
studytime	integer	2 \| 3 \| 1
failures	integer	0 \| 3 \| 2
schoolsup	character	yes \| no
famsup	character	no \| yes
paid	character	no \| yes
activities	character	no \| yes
nursery	character	yes \| no
higher	character	yes \| no
internet	character	no \| yes
romantic	character	no \| yes
famrel	integer	4 \| 5 \| 3
freetime	integer	3 \| 2 \| 4
goout	integer	4 \| 3 \| 2
Dalc	integer	1 \| 2 \| 5
Walc	integer	1 \| 3 \| 2
health	integer	3 \| 5 \| 1
absences	integer	6 \| 4 \| 10
G1	integer	5 \| 7 \| 15
G2	integer	6 \| 5 \| 8
G3	integer	6 \| 10 \| 15

Nilai unik per kolom kategorikal

# Kolom yang bertipe karakter (kandidat kategorikal)
kolom_char <- names(df_raw)[sapply(df_raw, is.character)]

cat("Kolom kategorikal ditemukan:", paste(kolom_char, collapse = ", "), "\n\n")

## Kolom kategorikal ditemukan: school, sex, address, famsize, Pstatus, Mjob, Fjob, reason, guardian, schoolsup, famsup, paid, activities, nursery, higher, internet, romantic

for (kol in kolom_char) {
  cat(">> Kolom:", kol, "\n")
  print(table(df_raw[[kol]]))
  cat("\n")
}

## >> Kolom: school 
## 
##  GP  MS 
## 349  46 
## 
## >> Kolom: sex 
## 
##   F   M 
## 208 187 
## 
## >> Kolom: address 
## 
##   R   U 
##  88 307 
## 
## >> Kolom: famsize 
## 
## GT3 LE3 
## 281 114 
## 
## >> Kolom: Pstatus 
## 
##   A   T 
##  41 354 
## 
## >> Kolom: Mjob 
## 
##  at_home   health    other services  teacher 
##       59       34      141      103       58 
## 
## >> Kolom: Fjob 
## 
##  at_home   health    other services  teacher 
##       20       18      217      111       29 
## 
## >> Kolom: reason 
## 
##     course       home      other reputation 
##        145        109         36        105 
## 
## >> Kolom: guardian 
## 
## father mother  other 
##     90    273     32 
## 
## >> Kolom: schoolsup 
## 
##  no yes 
## 344  51 
## 
## >> Kolom: famsup 
## 
##  no yes 
## 153 242 
## 
## >> Kolom: paid 
## 
##  no yes 
## 214 181 
## 
## >> Kolom: activities 
## 
##  no yes 
## 194 201 
## 
## >> Kolom: nursery 
## 
##  no yes 
##  81 314 
## 
## >> Kolom: higher 
## 
##  no yes 
##  20 375 
## 
## >> Kolom: internet 
## 
##  no yes 
##  66 329 
## 
## >> Kolom: romantic 
## 
##  no yes 
## 263 132

Ringkasan distribusi semua variabel (skimr)

# Ringkasan lengkap: mean, sd, min, max, histogram mini, % missing
skim(df_raw)

Data summary
Name	df_raw
Number of rows	395
Number of columns	33
_______________________
Column type frequency:
character	17
numeric	16
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
school	1	2	2	2
sex	1	1	1	2
address	1	1	1	2
famsize	1	3	3	2
Pstatus	1	1	1	2
Mjob	1	5	8	5
Fjob	1	5	8	5
reason	1	4	10	4
guardian	1	5	6	3
schoolsup	1	2	3	2
famsup	1	2	3	2
paid	1	2	3	2
activities	1	2	3	2
nursery	1	2	3	2
higher	1	2	3	2
internet	1	2	3	2
romantic	1	2	3	2

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
age	1	16.70	1.28	15	16	17	18	22	▇▅▅▁▁
Medu	1	2.75	1.09	0	2	3	4	4	▁▃▆▆▇
Fedu	1	2.52	1.09	0	2	2	3	4	▁▆▇▇▇
traveltime	1	1.45	0.70	1	1	1	2	4	▇▃▁▁▁
studytime	1	2.04	0.84	1	1	2	2	4	▅▇▁▂▁
failures	1	0.33	0.74	0	0	0	0	3	▇▁▁▁▁
famrel	1	3.94	0.90	1	4	4	5	5	▁▁▃▇▅
freetime	1	3.24	1.00	1	3	3	4	5	▁▃▇▆▂
goout	1	3.11	1.11	1	2	3	4	5	▂▆▇▅▃
Dalc	1	1.48	0.89	1	1	1	2	5	▇▂▁▁▁
Walc	1	2.29	1.29	1	1	2	3	5	▇▅▅▃▂
health	1	3.55	1.39	1	3	4	5	5	▂▂▅▃▇
absences	1	5.71	8.00	0	0	4	8	75	▇▁▁▁▁
G1	1	10.91	3.32	3	8	11	13	19	▂▇▇▆▂
G2	1	10.71	3.76	0	9	11	13	19	▁▂▇▆▂
G3	1	10.42	4.58	0	8	11	14	20	▂▃▇▅▁

Tahap 2 — Seleksi & Definisi Variabel

Peta variabel yang digunakan

Peta Variabel untuk MANOVA / ANCOVA / MANCOVA
Kode	Kolom	Tipe	Deskripsi	Dipakai_di
Y1	G1	Kontinu	Nilai periode 1 (0–20)	MANOVA, MANCOVA
Y2	G2	Kontinu	Nilai periode 2 (0–20)	MANOVA, MANCOVA
X1	sex	Kategorikal	Jenis kelamin: F = Perempuan, M = Laki-laki	MANOVA, ANCOVA, MANCOVA
X2	address	Kategorikal	Alamat rumah: U = Urban, R = Rural	MANOVA, MANCOVA
X3	absences	Numerik (covariate)	Jumlah hari tidak masuk sekolah	ANCOVA, MANCOVA

Subset & konversi tipe data

df <- df_raw %>%
  select(
    sex,      # X1 — faktor kategorikal
    address,  # X2 — faktor kategorikal
    absences, # X3 — covariate numerik
    G1,       # Y1 — dependent variable 1
    G2        # Y2 — dependent variable 2
  ) %>%
  mutate(
    sex     = factor(sex,     levels = c("F", "M"),
                     labels  = c("Perempuan", "Laki-laki")),
    address = factor(address, levels = c("R", "U"),
                     labels  = c("Rural", "Urban")),
    G1      = as.numeric(G1),
    G2      = as.numeric(G2)
  )

cat("===== DATASET SETELAH SELEKSI =====\n")

## ===== DATASET SETELAH SELEKSI =====

cat("Dimensi:", nrow(df), "baris x", ncol(df), "kolom\n\n")

## Dimensi: 395 baris x 5 kolom

str(df)

## 'data.frame':    395 obs. of  5 variables:
##  $ sex     : Factor w/ 2 levels "Perempuan","Laki-laki": 1 1 1 1 1 2 2 1 2 2 ...
##  $ address : Factor w/ 2 levels "Rural","Urban": 2 2 2 2 2 2 2 2 2 2 ...
##  $ absences: int  6 4 10 2 4 10 0 6 0 0 ...
##  $ G1      : num  5 5 7 15 6 15 12 6 16 14 ...
##  $ G2      : num  6 5 8 14 10 15 12 5 18 15 ...

Cek hasil konversi

cat("Level X1 (sex)    :", levels(df$sex),     "\n")

## Level X1 (sex)    : Perempuan Laki-laki

cat("Level X2 (address):", levels(df$address), "\n")

## Level X2 (address): Rural Urban

cat("\nJumlah per grup sex:\n")

## 
## Jumlah per grup sex:

print(table(df$sex))

## 
## Perempuan Laki-laki 
##       208       187

cat("\nJumlah per grup address:\n")

## 
## Jumlah per grup address:

print(table(df$address))

## 
## Rural Urban 
##    88   307

cat("\nKombinasi X1 x X2:\n")

## 
## Kombinasi X1 x X2:

print(table(df$sex, df$address))

##            
##             Rural Urban
##   Perempuan    44   164
##   Laki-laki    44   143

Preview data bersih

kable(head(df, 10),
      caption = "Preview Dataset Setelah Seleksi Variabel") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Preview Dataset Setelah Seleksi Variabel
sex	address	absences	G1	G2
Perempuan	Urban	6	5	6
Perempuan	Urban	4	5	5
Perempuan	Urban	10	7	8
Perempuan	Urban	2	15	14
Perempuan	Urban	4	6	10
Laki-laki	Urban	10	15	15
Laki-laki	Urban	0	12	12
Perempuan	Urban	6	6	5
Laki-laki	Urban	0	16	18
Laki-laki	Urban	0	14	15

Tahap 3 — Cek & Tangani Missing Values

Deteksi missing values

mv_count <- colSums(is.na(df))
mv_pct   <- round(colMeans(is.na(df)) * 100, 2)

mv_tabel <- data.frame(
  Variabel     = names(mv_count),
  Jumlah_NA    = as.integer(mv_count),
  Persen_NA    = paste0(mv_pct, "%"),
  Status       = ifelse(mv_count == 0, "Lengkap ✓", "Ada missing ✗")
)

kable(mv_tabel, caption = "Ringkasan Missing Values per Variabel") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  column_spec(4,
    color     = ifelse(mv_tabel$Jumlah_NA == 0, "darkgreen", "red"),
    bold      = TRUE
  )

Ringkasan Missing Values per Variabel
	Variabel	Persen_NA	Status
sex	sex	0%	Lengkap ✓
address	address	0%	Lengkap ✓
absences	absences	0%	Lengkap ✓
G1	G1	0%	Lengkap ✓
G2	G2	0%	Lengkap ✓

total_mv <- sum(is.na(df))
cat("Total keseluruhan missing values:", total_mv, "\n")

## Total keseluruhan missing values: 0

if (total_mv == 0) {
  cat(">> Dataset BERSIH — tidak ada missing values.\n")
  cat(">> Analisis dapat langsung dilanjutkan ke tahap EDA.\n")
} else {
  cat(">> Ditemukan missing values! Lakukan penanganan di bawah.\n")
}

## >> Dataset BERSIH — tidak ada missing values.
## >> Analisis dapat langsung dilanjutkan ke tahap EDA.

Visualisasi pola missing values

plot_missing(df,
             title    = "Persentase Missing Values per Variabel",
             ggtheme  = theme_minimal(base_size = 13),
             theme_config = list(legend.position = "bottom"))

Panduan penanganan (jika ada missing values)

# --- Opsi A: Hapus baris yang mengandung NA (listwise deletion) ---
# Aman jika jumlah NA < 5% dari total data
df_clean <- df %>% drop_na()
cat("Baris setelah drop_na:", nrow(df_clean), "\n")

# --- Opsi B: Imputasi median untuk variabel NUMERIK ---
# Digunakan untuk: absences, G1, G2
df_imputasi <- df %>%
  mutate(
    absences = ifelse(is.na(absences), median(absences, na.rm = TRUE), absences),
    G1       = ifelse(is.na(G1),       median(G1,       na.rm = TRUE), G1),
    G2       = ifelse(is.na(G2),       median(G2,       na.rm = TRUE), G2)
  )

# --- Opsi C: Imputasi modus untuk variabel KATEGORIKAL ---
# Digunakan untuk: sex, address
modus <- function(x) {
  ux <- unique(x[!is.na(x)])
  ux[which.max(tabulate(match(x, ux)))]
}

df_imputasi <- df_imputasi %>%
  mutate(
    sex     = ifelse(is.na(sex),     modus(sex),     sex),
    address = ifelse(is.na(address), modus(address), address)
  )

# --- Verifikasi setelah imputasi ---
cat("Missing values setelah imputasi:", sum(is.na(df_imputasi)), "\n")

Konfirmasi data siap dianalisis

cat("===== KONFIRMASI DATASET FINAL =====\n")

## ===== KONFIRMASI DATASET FINAL =====

cat("Dimensi akhir   :", nrow(df), "baris x", ncol(df), "kolom\n")

## Dimensi akhir   : 395 baris x 5 kolom

cat("Missing values  :", sum(is.na(df)), "\n")

## Missing values  : 0

cat("Variabel        :", paste(names(df), collapse = ", "), "\n")

## Variabel        : sex, address, absences, G1, G2

cat("\nDataset siap untuk EDA dan uji asumsi.\n")

## 
## Dataset siap untuk EDA dan uji asumsi.

Tahap 4 — Eksplorasi Data (EDA)

Statistik deskriptif keseluruhan

deskriptif <- df %>%
  summarise(
    across(
      c(G1, G2, absences),
      list(
        N      = ~n(),
        Mean   = ~round(mean(.),   2),
        SD     = ~round(sd(.),     2),
        Median = ~round(median(.), 2),
        Min    = ~min(.),
        Max    = ~max(.)
      ),
      .names = "{.col}__{.fn}"
    )
  ) %>%
  pivot_longer(everything(),
               names_to  = c("Variabel", "Statistik"),
               names_sep = "__") %>%
  pivot_wider(names_from = Statistik, values_from = value)

kable(deskriptif,
      caption = "Statistik Deskriptif Variabel Numerik (Seluruh Data)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Statistik Deskriptif Variabel Numerik (Seluruh Data)
Variabel	N	Mean	SD	Median	Min	Max
G1	395	10.91	3.32	11	3	19
G2	395	10.71	3.76	11	0	19
absences	395	5.71	8.00	4	0	75

Statistik deskriptif per grup X1 (sex)

df %>%
  group_by(sex) %>%
  summarise(
    N        = n(),
    Mean_G1  = round(mean(G1),       2),
    SD_G1    = round(sd(G1),         2),
    Mean_G2  = round(mean(G2),       2),
    SD_G2    = round(sd(G2),         2),
    Mean_Abs = round(mean(absences), 2),
    SD_Abs   = round(sd(absences),   2)
  ) %>%
  kable(caption = "Statistik Deskriptif per Kelompok Jenis Kelamin (X1)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Statistik Deskriptif per Kelompok Jenis Kelamin (X1)
sex	N	Mean_G1	SD_G1	Mean_G2	SD_G2	Mean_Abs	SD_Abs
Perempuan	208	10.62	3.23	10.39	3.64	6.22	9.45
Laki-laki	187	11.23	3.39	11.07	3.87	5.14	5.98

Statistik deskriptif per grup X2 (address)

df %>%
  group_by(address) %>%
  summarise(
    N        = n(),
    Mean_G1  = round(mean(G1),       2),
    SD_G1    = round(sd(G1),         2),
    Mean_G2  = round(mean(G2),       2),
    SD_G2    = round(sd(G2),         2),
    Mean_Abs = round(mean(absences), 2),
    SD_Abs   = round(sd(absences),   2)
  ) %>%
  kable(caption = "Statistik Deskriptif per Kelompok Alamat (X2)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Statistik Deskriptif per Kelompok Alamat (X2)
address	N	Mean_G1	SD_G1	Mean_G2	SD_G2	Mean_Abs	SD_Abs
Rural	88	10.48	3.42	9.83	3.89	6.12	9.57
Urban	307	11.03	3.29	10.97	3.69	5.59	7.51

Statistik deskriptif kombinasi X1 × X2

df %>%
  group_by(sex, address) %>%
  summarise(
    N        = n(),
    Mean_G1  = round(mean(G1), 2),
    SD_G1    = round(sd(G1),   2),
    Mean_G2  = round(mean(G2), 2),
    SD_G2    = round(sd(G2),   2),
    .groups  = "drop"
  ) %>%
  kable(caption = "Statistik Deskriptif Kombinasi X1 × X2") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Statistik Deskriptif Kombinasi X1 × X2
sex	address	N	Mean_G1	SD_G1	Mean_G2	SD_G2
Perempuan	Rural	44	10.30	3.14	9.64	3.72
Perempuan	Urban	164	10.71	3.26	10.59	3.61
Laki-laki	Rural	44	10.66	3.70	10.02	4.09
Laki-laki	Urban	143	11.41	3.29	11.40	3.75

Visualisasi distribusi

Histogram Y1 (G1) dan Y2 (G2)

df_long_hist <- df %>%
  pivot_longer(cols = c(G1, G2),
               names_to  = "Periode",
               values_to = "Nilai")

mean_lines <- df_long_hist %>%
  group_by(Periode) %>%
  summarise(mean_val = mean(Nilai, na.rm = TRUE))

df_long_hist %>%
  ggplot(aes(x = Nilai, fill = Periode)) +
  geom_histogram(binwidth = 1, color = "white", alpha = 0.85) +
  geom_vline(data = mean_lines,
             aes(xintercept = mean_val),
             linetype = "dashed", color = "gray40", linewidth = 0.5) +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1 — Nilai Periode 1",
                                              G2 = "Y2: G2 — Nilai Periode 2"))) +
  scale_fill_manual(values = c("G1" = "#7B1FA2", "G2" = "#0288D1")) +
  labs(title = "Distribusi Nilai G1 (Y1) dan G2 (Y2)",
       x = "Nilai (0–20)", y = "Frekuensi", fill = "DV") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

Boxplot Y1 & Y2 per X1 (sex)

df %>%
  pivot_longer(cols = c(G1, G2),
               names_to  = "Periode",
               values_to = "Nilai") %>%
  ggplot(aes(x = sex, y = Nilai, fill = sex)) +
  geom_boxplot(alpha = 0.75, outlier.shape = 21,
               outlier.fill = "white", outlier.color = "gray50") +
  geom_jitter(width = 0.15, alpha = 0.2, size = 1.2, color = "gray40") +
  stat_summary(fun = mean, geom = "point",
               shape = 18, size = 3, color = "black") +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1 — Periode 1",
                                              G2 = "Y2: G2 — Periode 2"))) +
  scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title    = "Distribusi Nilai G1 & G2 berdasarkan Jenis Kelamin (X1)",
       subtitle = "Berlian hitam = rata-rata kelompok",
       x = "Jenis Kelamin", y = "Nilai", fill = "Sex") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

Boxplot Y1 & Y2 per X2 (address)

df %>%
  pivot_longer(cols = c(G1, G2),
               names_to  = "Periode",
               values_to = "Nilai") %>%
  ggplot(aes(x = address, y = Nilai, fill = address)) +
  geom_boxplot(alpha = 0.75, outlier.shape = 21,
               outlier.fill = "white", outlier.color = "gray50") +
  geom_jitter(width = 0.15, alpha = 0.2, size = 1.2, color = "gray40") +
  stat_summary(fun = mean, geom = "point",
               shape = 18, size = 3, color = "black") +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1 — Periode 1",
                                              G2 = "Y2: G2 — Periode 2"))) +
  scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  labs(title    = "Distribusi Nilai G1 & G2 berdasarkan Alamat (X2)",
       subtitle = "Berlian hitam = rata-rata kelompok",
       x = "Alamat", y = "Nilai", fill = "Address") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

Boxplot kombinasi X1 × X2

df %>%
  pivot_longer(cols = c(G1, G2),
               names_to  = "Periode",
               values_to = "Nilai") %>%
  ggplot(aes(x = sex, y = Nilai, fill = address)) +
  geom_boxplot(alpha = 0.75, position = position_dodge(width = 0.8)) +
  stat_summary(fun = mean, geom = "point",
               shape = 18, size = 2.5, color = "black",
               position = position_dodge(width = 0.8)) +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1", G2 = "Y2: G2"))) +
  scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  labs(title    = "Nilai G1 & G2 berdasarkan Kombinasi Sex × Address",
       subtitle = "Melihat pola interaksi X1 × X2",
       x = "Jenis Kelamin", y = "Nilai", fill = "Address") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

Scatter plot X3 (absences) vs Y1 & Y2

p_abs_G1 <- ggplot(df, aes(x = absences, y = G1, color = sex, shape = address)) +
  geom_point(alpha = 0.55, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 0.9, aes(group = sex)) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title = "X3 (Absences) vs Y1 (G1)",
       x = "Jumlah Absen", y = "Nilai G1",
       color = "Sex", shape = "Address") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_abs_G2 <- ggplot(df, aes(x = absences, y = G2, color = sex, shape = address)) +
  geom_point(alpha = 0.55, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 0.9, aes(group = sex)) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title = "X3 (Absences) vs Y2 (G2)",
       x = "Jumlah Absen", y = "Nilai G2",
       color = "Sex", shape = "Address") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_abs_G1, p_abs_G2, ncol = 2)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?

Distribusi X3 (absences) per grup

df %>%
  pivot_longer(cols = c(sex, address),
               names_to  = "Faktor",
               values_to = "Grup") %>%
  ggplot(aes(x = Grup, y = absences, fill = Grup)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21,
               outlier.fill = "white") +
  geom_jitter(width = 0.15, alpha = 0.2, size = 1, color = "gray40") +
  facet_wrap(~Faktor, scales = "free_x",
             labeller = labeller(Faktor = c(sex     = "X1: Sex",
                                             address = "X2: Address"))) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Distribusi Covariate X3 (Absences) per Grup",
       x = "Grup", y = "Jumlah Absen") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

Korelasi antar variabel numerik

# Matriks korelasi antara Y1, Y2, dan X3
mat_cor <- cor(df[, c("G1", "G2", "absences")], method = "pearson")

kable(round(mat_cor, 4),
      caption = "Matriks Korelasi Pearson — Y1, Y2, X3") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Matriks Korelasi Pearson — Y1, Y2, X3
	G1	G2	absences
G1	1.0000	0.8521	-0.0310
G2	0.8521	1.0000	-0.0318
absences	-0.0310	-0.0318	1.0000

# Visualisasi korelasi dengan ggplot
cor_data <- as.data.frame(mat_cor) %>%
  rownames_to_column("Var1") %>%
  pivot_longer(-Var1, names_to = "Var2", values_to = "r")

ggplot(cor_data, aes(x = Var1, y = Var2, fill = r)) +
  geom_tile(color = "white", linewidth = 0.8) +
  geom_text(aes(label = round(r, 3)), size = 4.5, color = "white", fontface = "bold") +
  scale_fill_gradient2(low = "#1976D2", mid = "white", high = "#E91E8C",
                       midpoint = 0, limits = c(-1, 1)) +
  labs(title = "Heatmap Korelasi Pearson",
       subtitle = "Y1 (G1), Y2 (G2), X3 (absences)",
       x = NULL, y = NULL, fill = "r") +
  coord_fixed() +
  theme_minimal(base_size = 13) +
  theme(axis.text = element_text(size = 12, face = "bold"))

r_G1_G2  <- round(mat_cor["G1", "G2"],       3)
r_G1_abs <- round(mat_cor["G1", "absences"],  3)
r_G2_abs <- round(mat_cor["G2", "absences"],  3)

cat("===== INTERPRETASI KORELASI =====\n\n")

## ===== INTERPRETASI KORELASI =====

cat("r (G1, G2)       =", r_G1_G2,
    "—", ifelse(abs(r_G1_G2) < 0.90,
                "OK: tidak ada multikolinearitas ekstrem ✓",
                "PERINGATAN: korelasi terlalu tinggi ✗"), "\n")

## r (G1, G2)       = 0.852 — OK: tidak ada multikolinearitas ekstrem ✓

cat("r (G1, absences) =", r_G1_abs, "— hubungan covariate vs Y1\n")

## r (G1, absences) = -0.031 — hubungan covariate vs Y1

cat("r (G2, absences) =", r_G2_abs, "— hubungan covariate vs Y2\n")

## r (G2, absences) = -0.032 — hubungan covariate vs Y2

cat("\n>> Syarat MANOVA/MANCOVA: r antar DV < 0.90\n")

## 
## >> Syarat MANOVA/MANCOVA: r antar DV < 0.90

cat(">> Hasil:", ifelse(abs(r_G1_G2) < 0.90,
                        "Asumsi terpenuhi — lanjut ke uji asumsi berikutnya.",
                        "Pertimbangkan ulang pilihan variabel DV."), "\n")

## >> Hasil: Asumsi terpenuhi — lanjut ke uji asumsi berikutnya.

Profil data otomatis (DataExplorer)

# Menghasilkan laporan ringkas: distribusi, missing, korelasi
introduce(df) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("Info") %>%
  rename(Nilai = V1) %>%
  kable(caption = "Profil Dataset (DataExplorer)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Profil Dataset (DataExplorer)
Info	Nilai
rows	395
columns	5
discrete_columns	2
continuous_columns	3
all_missing_columns	0
total_missing_values	0
complete_rows	395
total_observations	1975
memory_usage	14040

Uji Normalitas

##    UJI NORMALITAS (Univariat & Multivariat)

## Variabel DV : Y1 = G1,  Y2 = G2

## Faktor      : X1 = sex, X2 = address

## Syarat lulus: p > 0.05 → data berdistribusi normal

Uji Shapiro-Wilk Univariat per Variabel Keseluruhan

sw_G1 <- shapiro.test(df$G1)
sw_G2 <- shapiro.test(df$G2)

tabel_sw_global <- data.frame(
  Variabel  = c("Y1 (G1)", "Y2 (G2)"),
  Statistik = c(round(sw_G1$statistic, 4), round(sw_G2$statistic, 4)),
  p_value   = c(round(sw_G1$p.value,   4), round(sw_G2$p.value,   4)),
  Kesimpulan = c(
    ifelse(sw_G1$p.value > 0.05, "Normal (p > 0.05)", "Tidak Normal (p ≤ 0.05)"),
    ifelse(sw_G2$p.value > 0.05, "Normal (p > 0.05)", "Tidak Normal (p ≤ 0.05)")
  )
)

kable(tabel_sw_global,
      caption = "Shapiro-Wilk — Keseluruhan Data (Y1 & Y2)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(4,
    color = ifelse(tabel_sw_global$p_value > 0.05, "darkgreen", "red"),
    bold  = TRUE)

Shapiro-Wilk — Keseluruhan Data (Y1 & Y2)
Variabel	Statistik	p_value	Kesimpulan
Y1 (G1)	0.9749	0	Tidak Normal (p ≤ 0.05)
Y2 (G2)	0.9691	0	Tidak Normal (p ≤ 0.05)

Uji Shapiro-Wilk per Grup X1 (sex)

hasil_sw_sex <- df %>%
  group_by(sex) %>%
  summarise(
    W_G1  = round(shapiro.test(G1)$statistic, 4),
    p_G1  = round(shapiro.test(G1)$p.value,   4),
    W_G2  = round(shapiro.test(G2)$statistic, 4),
    p_G2  = round(shapiro.test(G2)$p.value,   4),
    .groups = "drop"
  ) %>%
  mutate(
    Ket_G1 = ifelse(p_G1 > 0.05, "Normal", "Tidak Normal"),
    Ket_G2 = ifelse(p_G2 > 0.05, "Normal", "Tidak Normal")
  )

kable(hasil_sw_sex,
      caption = "Shapiro-Wilk per Grup X1 (sex) — Y1 (G1) & Y2 (G2)",
      col.names = c("Grup (sex)", "W (G1)", "p (G1)", "W (G2)", "p (G2)",
                    "Ket. G1", "Ket. G2")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(6, color = ifelse(hasil_sw_sex$p_G1 > 0.05, "darkgreen", "red"), bold = TRUE) %>%
  column_spec(7, color = ifelse(hasil_sw_sex$p_G2 > 0.05, "darkgreen", "red"), bold = TRUE)

Shapiro-Wilk per Grup X1 (sex) — Y1 (G1) & Y2 (G2)
Grup (sex)	W (G1)	p (G1)	W (G2)	p (G2)	Ket. G1	Ket. G2
Perempuan	0.9655	0.0001	0.9665	1e-04	Tidak Normal	Tidak Normal
Laki-laki	0.9814	0.0136	0.9680	3e-04	Tidak Normal	Tidak Normal

Uji Shapiro-Wilk per Grup X2 (address)

hasil_sw_address <- df %>%
  group_by(address) %>%
  summarise(
    W_G1  = round(shapiro.test(G1)$statistic, 4),
    p_G1  = round(shapiro.test(G1)$p.value,   4),
    W_G2  = round(shapiro.test(G2)$statistic, 4),
    p_G2  = round(shapiro.test(G2)$p.value,   4),
    .groups = "drop"
  ) %>%
  mutate(
    Ket_G1 = ifelse(p_G1 > 0.05, "Normal", "Tidak Normal"),
    Ket_G2 = ifelse(p_G2 > 0.05, "Normal", "Tidak Normal")
  )

kable(hasil_sw_address,
      caption = "Shapiro-Wilk per Grup X2 (address) — Y1 (G1) & Y2 (G2)",
      col.names = c("Grup (address)", "W (G1)", "p (G1)", "W (G2)", "p (G2)",
                    "Ket. G1", "Ket. G2")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(6, color = ifelse(hasil_sw_address$p_G1 > 0.05, "darkgreen", "red"), bold = TRUE) %>%
  column_spec(7, color = ifelse(hasil_sw_address$p_G2 > 0.05, "darkgreen", "red"), bold = TRUE)

Shapiro-Wilk per Grup X2 (address) — Y1 (G1) & Y2 (G2)
Grup (address)	W (G1)	p (G1)	W (G2)	p (G2)	Ket. G1	Ket. G2
Rural	0.9687	0.0316	0.9698	0.0375	Tidak Normal	Tidak Normal
Urban	0.9740	0.0000	0.9668	0.0000	Tidak Normal	Tidak Normal

QQ-Plot Univariat (Y1 & Y2 per Grup)

p_qq_G1_sex <- ggqqplot(df, x = "G1", color = "sex",
                         palette = c("#E91E8C", "#1976D2"),
                         title   = "QQ-Plot Y1 (G1) per Sex") +
  theme_minimal(base_size = 11)

p_qq_G2_sex <- ggqqplot(df, x = "G2", color = "sex",
                         palette = c("#E91E8C", "#1976D2"),
                         title   = "QQ-Plot Y2 (G2) per Sex") +
  theme_minimal(base_size = 11)

p_qq_G1_addr <- ggqqplot(df, x = "G1", color = "address",
                          palette = c("#FF8F00", "#388E3C"),
                          title   = "QQ-Plot Y1 (G1) per Address") +
  theme_minimal(base_size = 11)

p_qq_G2_addr <- ggqqplot(df, x = "G2", color = "address",
                          palette = c("#FF8F00", "#388E3C"),
                          title   = "QQ-Plot Y2 (G2) per Address") +
  theme_minimal(base_size = 11)

grid.arrange(p_qq_G1_sex, p_qq_G2_sex,
             p_qq_G1_addr, p_qq_G2_addr,
             ncol = 2,
             top  = "QQ-Plot Univariat — Y1 & Y2 per Grup (X1 & X2)")

Uji Normalitas Multivariat (Mardia) dan cbind(G1, G2) keseluruhan

mvn_global <- MVN::mvn(data         = df[, c("G1", "G2")],
                       mvn_test     = "mardia",
                       univariate_test = "SW")

cat("MARDIA'S TEST — Keseluruhan Data\n")

## MARDIA'S TEST — Keseluruhan Data

cat(" Multivariat:\n")

##  Multivariat:

summary(mvn_global, select = "mvn")

##

## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────

##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness   291.175  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis    19.052  <0.001 asymptotic ✗ Not normal

cat("\n Univariat (Shapiro-Wilk via MVN):\n")

## 
##  Univariat (Shapiro-Wilk via MVN):

summary(mvn_global, select = "univariate")

##

## ── Univariate Normality Test Results ──────────────────────────────────────────────────────────────────────────────────────────────────────────

##           Test Variable Statistic p.value    Normality
## 1 Shapiro-Wilk       G1     0.975  <0.001 ✗ Not normal
## 2 Shapiro-Wilk       G2     0.969  <0.001 ✗ Not normal

Uji Normalitas Multivariat (Royston)

mvn_royston <- mvn(data    = df[, c("G1", "G2")],
                   mvn_test = "royston")

cat("ROYSTON'S TEST — Keseluruhan Data\n")

## ROYSTON'S TEST — Keseluruhan Data

summary(mvn_royston, select = "mvn")

##

## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────

##      Test Statistic p.value     Method          MVN
## 1 Royston    39.332  <0.001 asymptotic ✗ Not normal

Uji Normalitas Multivariat per Grup X1 (sex)

cat("MARDIA'S TEST per Grup X1 (sex)\n")

## MARDIA'S TEST per Grup X1 (sex)

for (grp in levels(df$sex)) {
  cat(" Grup:", grp, "\n")
  sub_df <- df %>% filter(sex == grp) %>% select(G1, G2)
  
  hasil <- mvn(data    = sub_df,
               mvn_test = "mardia",
               desc    = FALSE)
  
  # print(hasil$multivariateNormality)
  summary(hasil, select = "mvn")
  cat("\n")
}

##  Grup: Perempuan

##

## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────

##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness   216.849  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis    17.812  <0.001 asymptotic ✗ Not normal
## 
##  Grup: Laki-laki

## 
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────

##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness    90.266  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis     8.687  <0.001 asymptotic ✗ Not normal

Uji Normalitas Multivariat per Grup X2 (address)

# ── Mardia per grup X2 (address) — syarat MANOVA & MANCOVA ──
cat("MARDIA'S TEST per Grup X2 (address)\n")

## MARDIA'S TEST per Grup X2 (address)

for (grp in levels(df$address)) {
  cat("─── Grup:", grp, "───\n")
  sub_df <- df %>% filter(address == grp) %>% select(G1, G2)
  
  hasil <- mvn(data    = sub_df,
               mvn_test = "mardia",
               desc    = FALSE)
  
  # print(hasil$multivariateNormality)
  summary(hasil, select = "mvn")
  cat("\n")
}

## ─── Grup: Rural ───

##

## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────

##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness    63.074  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis     6.598  <0.001 asymptotic ✗ Not normal
## 
## ─── Grup: Urban ───

## 
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────

##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness   193.528  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis    15.134  <0.001 asymptotic ✗ Not normal

QQ-Plot Multivariat (Chi-Square Plot)

# Keseluruhan
res_global <- MVN::mvn(data     = df[, c("G1", "G2")],
                       mvn_test = "mardia")
p1 <- plot(res_global, diagnostic = "multivariate", type = "qq")
p1 <- p1 + ggplot2::ggtitle("Chi-Square QQ-Plot\n(Keseluruhan Data)")

# Per grup: Perempuan
sub_p <- df %>% filter(sex == "Perempuan") %>% select(G1, G2)
res_p <- MVN::mvn(data     = sub_p,
                  mvn_test = "mardia")
p2 <- plot(res_p, diagnostic = "multivariate", type = "qq")
p2 <- p2 + ggplot2::ggtitle("Chi-Square QQ-Plot\n(Grup: Perempuan)")

gridExtra::grid.arrange(p1, p2, ncol = 2)

Uji Homogenitas Varians & Kovarians

Levene’s Test: Y1 (G1) per X1 (sex)

levene_G1_sex <- leveneTest(G1 ~ sex, data = df, center = mean)

cat("LEVENE'S TEST: Y1 (G1) ~ X1 (sex)\n")

## LEVENE'S TEST: Y1 (G1) ~ X1 (sex)

print(levene_G1_sex)

## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.3946 0.5303
##       393

p_val <- levene_G1_sex$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
    ifelse(p_val > 0.05,
           "Varians HOMOGEN (p > 0.05)",
           "Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")

## 
## Kesimpulan: p = 0.5303 → Varians HOMOGEN (p > 0.05)

Levene’s Test: Y2 (G2) per X1 (sex)

levene_G2_sex <- leveneTest(G2 ~ sex, data = df, center = mean)

cat("LEVENE'S TEST: Y2 (G2) ~ X1 (sex)\n")

## LEVENE'S TEST: Y2 (G2) ~ X1 (sex)

print(levene_G2_sex)

## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.6075 0.4362
##       393

p_val <- levene_G2_sex$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
    ifelse(p_val > 0.05,
           "Varians HOMOGEN (p > 0.05)",
           "Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")

## 
## Kesimpulan: p = 0.4362 → Varians HOMOGEN (p > 0.05)

Levene’s Test: Y1 (G1) per X2 (address)

levene_G1_addr <- leveneTest(G1 ~ address, data = df, center = mean)

cat("LEVENE'S TEST: Y1 (G1) ~ X2 (address)\n")

## LEVENE'S TEST: Y1 (G1) ~ X2 (address)

print(levene_G1_addr)

## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.0033 0.9543
##       393

p_val <- levene_G1_addr$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
    ifelse(p_val > 0.05,
           "Varians HOMOGEN (p > 0.05)",
           "Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")

## 
## Kesimpulan: p = 0.9543 → Varians HOMOGEN (p > 0.05)

Levene’s Test: Y2 (G2) per X2 (address)

levene_G2_addr <- leveneTest(G2 ~ address, data = df, center = mean)

cat("LEVENE'S TEST: Y2 (G2) ~ X2 (address)\n")

## LEVENE'S TEST: Y2 (G2) ~ X2 (address)

print(levene_G2_addr)

## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   1  0.0509 0.8215
##       393

p_val <- levene_G2_addr$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
    ifelse(p_val > 0.05,
           "Varians HOMOGEN (p > 0.05)",
           "Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")

## 
## Kesimpulan: p = 0.8215 → Varians HOMOGEN (p > 0.05)

Ringkasan Levene’s Test (Tabel Gabungan)

tabel_levene <- data.frame(
  DV      = c("Y1 (G1)", "Y2 (G2)", "Y1 (G1)", "Y2 (G2)"),
  Faktor  = c("X1 (sex)", "X1 (sex)", "X2 (address)", "X2 (address)"),
  F_stat  = c(
    round(levene_G1_sex$`F value`[1],   4),
    round(levene_G2_sex$`F value`[1],   4),
    round(levene_G1_addr$`F value`[1],  4),
    round(levene_G2_addr$`F value`[1],  4)
  ),
  p_value = c(
    round(levene_G1_sex$`Pr(>F)`[1],    4),
    round(levene_G2_sex$`Pr(>F)`[1],    4),
    round(levene_G1_addr$`Pr(>F)`[1],   4),
    round(levene_G2_addr$`Pr(>F)`[1],   4)
  )
)

tabel_levene$Kesimpulan <- ifelse(
  tabel_levene$p_value > 0.05,
  "Homogen",
  "Tidak Homogen"
)

kable(tabel_levene,
      caption = "Ringkasan Levene's Test — Homogenitas Varians Y1 & Y2") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(5,
    color = ifelse(tabel_levene$p_value > 0.05, "darkgreen", "red"),
    bold  = TRUE)

Ringkasan Levene’s Test — Homogenitas Varians Y1 & Y2
DV	Faktor	F_stat	p_value	Kesimpulan
Y1 (G1)	X1 (sex)	0.3946	0.5303	Homogen
Y2 (G2)	X1 (sex)	0.6075	0.4362	Homogen
Y1 (G1)	X2 (address)	0.0033	0.9543	Homogen
Y2 (G2)	X2 (address)	0.0509	0.8215	Homogen

Visualisasi Homogenitas Varians (Boxplot Levene)

p_var_sex <- df %>%
  pivot_longer(cols = c(G1, G2), names_to = "Periode", values_to = "Nilai") %>%
  ggplot(aes(x = sex, y = Nilai, fill = sex)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21,
               outlier.fill = "white", outlier.color = "gray50") +
  stat_summary(fun = mean, geom = "point",
               shape = 18, size = 3, color = "black") +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1", G2 = "Y2: G2"))) +
  scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title    = "Distribusi Y1 & Y2 per X1 (sex)",
       subtitle = "Berlian = rata-rata | Levene's Test: cek kesamaan penyebaran",
       x = "Jenis Kelamin", y = "Nilai", fill = "Sex") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_var_addr <- df %>%
  pivot_longer(cols = c(G1, G2), names_to = "Periode", values_to = "Nilai") %>%
  ggplot(aes(x = address, y = Nilai, fill = address)) +
  geom_boxplot(alpha = 0.7, outlier.shape = 21,
               outlier.fill = "white", outlier.color = "gray50") +
  stat_summary(fun = mean, geom = "point",
               shape = 18, size = 3, color = "black") +
  facet_wrap(~Periode,
             labeller = labeller(Periode = c(G1 = "Y1: G1", G2 = "Y2: G2"))) +
  scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  labs(title    = "Distribusi Y1 & Y2 per X2 (address)",
       subtitle = "Berlian = rata-rata | Levene's Test: cek kesamaan penyebaran",
       x = "Alamat", y = "Nilai", fill = "Address") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_var_sex, p_var_addr, ncol = 2,
             top = "Visualisasi Homogenitas Varians — Pendukung Levene's Test")

Box’s M Test per X1 (sex) — untuk MANOVA & MANCOVA

boxm_sex <- boxM(cbind(G1, G2) ~ sex, data = df)

cat("BOX'S M TEST: cbind(G1,G2) ~ X1 (sex)\n")

## BOX'S M TEST: cbind(G1,G2) ~ X1 (sex)

print(boxm_sex)

## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  df 
## Chi-Sq (approx.) = 0.8588, df = 3, p-value = 0.8354

p_val_boxm_sex <- boxm_sex$p.value
cat("\nKesimpulan: p =", round(p_val_boxm_sex, 4), "→",
    ifelse(p_val_boxm_sex > 0.05,
           "Matriks kovarian HOMOGEN (p > 0.05)",
           "Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)"), "\n")

## 
## Kesimpulan: p = 0.8354 → Matriks kovarian HOMOGEN (p > 0.05)

cat("\n[Catatan] Box's M sangat sensitif pada sampel besar.\n")

## 
## [Catatan] Box's M sangat sensitif pada sampel besar.

cat("Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa\n")

## Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa

cat("dilanjutkan dengan kehati-hatian (lihat Pillai's trace).\n")

## dilanjutkan dengan kehati-hatian (lihat Pillai's trace).

Box’s M Test per X2 (address) — untuk MANOVA & MANCOVA

boxm_addr <- boxM(cbind(G1, G2) ~ address, data = df)

cat("BOX'S M TEST: cbind(G1,G2) ~ X2 (address)\n")

## BOX'S M TEST: cbind(G1,G2) ~ X2 (address)

print(boxm_addr)

## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  df 
## Chi-Sq (approx.) = 20.035, df = 3, p-value = 0.0001669

p_val_boxm_addr <- boxm_addr$p.value
cat("\nKesimpulan: p =", round(p_val_boxm_addr, 4), "→",
    ifelse(p_val_boxm_addr > 0.05,
           "Matriks kovarian HOMOGEN (p > 0.05)",
           "Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)"), "\n")

## 
## Kesimpulan: p = 2e-04 → Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)

cat("\n[Catatan] Box's M sangat sensitif pada sampel besar.\n")

## 
## [Catatan] Box's M sangat sensitif pada sampel besar.

cat("Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa\n")

## Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa

cat("dilanjutkan dengan kehati-hatian (lihat Pillai's trace).\n")

## dilanjutkan dengan kehati-hatian (lihat Pillai's trace).

Box’s M Test gabungan X1 × X2 (kombinasi faktor)

df_gabungan <- df %>%
  mutate(grup = interaction(sex, address, sep = " × "))

boxm_gabungan <- boxM(cbind(G1, G2) ~ grup, data = df_gabungan)

cat("BOX'S M TEST: cbind(G1,G2) ~ sex × address\n")

## BOX'S M TEST: cbind(G1,G2) ~ sex × address

print(boxm_gabungan)

## 
##  Box's M-test for Homogeneity of Covariance Matrices 
## 
## data:  df_gabungan 
## Chi-Sq (approx.) = 27.3438, df = 9, p-value = 0.001227

p_val_boxm_gab <- boxm_gabungan$p.value
cat("\nKesimpulan: p =", round(p_val_boxm_gab, 4), "→",
    ifelse(p_val_boxm_gab > 0.05,
           "Matriks kovarian HOMOGEN (p > 0.05)",
           "Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)"), "\n")

## 
## Kesimpulan: p = 0.0012 → Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)

Visualisasi Box’s M: Heatmap Matriks Korelasi per Grup

# ── Visualisasi: matriks korelasi per grup sebagai proxy kovarian ──

plot_cor_grup <- function(data_sub, label_judul) {
  mat <- cor(data_sub[, c("G1", "G2")], method = "pearson")
  df_cor <- as.data.frame(mat) %>%
    rownames_to_column("Var1") %>%
    pivot_longer(-Var1, names_to = "Var2", values_to = "r")
  
  ggplot(df_cor, aes(x = Var1, y = Var2, fill = r)) +
    geom_tile(color = "white", linewidth = 0.8) +
    geom_text(aes(label = round(r, 3)), size = 5,
              color = "white", fontface = "bold") +
    scale_fill_gradient2(low = "#1976D2", mid = "white", high = "#E91E8C",
                         midpoint = 0, limits = c(-1, 1)) +
    labs(title = label_judul, x = NULL, y = NULL, fill = "r") +
    coord_fixed() +
    theme_minimal(base_size = 11) +
    theme(axis.text = element_text(size = 11, face = "bold"))
}

p_cor_perempuan <- plot_cor_grup(df %>% filter(sex == "Perempuan"),
                                  "Korelasi G1-G2\n(Perempuan)")
p_cor_lakilaki  <- plot_cor_grup(df %>% filter(sex == "Laki-laki"),
                                  "Korelasi G1-G2\n(Laki-laki)")
p_cor_rural     <- plot_cor_grup(df %>% filter(address == "Rural"),
                                  "Korelasi G1-G2\n(Rural)")
p_cor_urban     <- plot_cor_grup(df %>% filter(address == "Urban"),
                                  "Korelasi G1-G2\n(Urban)")

grid.arrange(p_cor_perempuan, p_cor_lakilaki,
             p_cor_rural,     p_cor_urban,
             ncol = 4,
             top  = "Matriks Korelasi G1-G2 per Grup — Proxy Homogenitas Kovarian")

Ringkasan & Kesimpulan Tahap 6

p_lev_G1_sex  <- levene_G1_sex$`Pr(>F)`[1]
p_lev_G2_sex  <- levene_G2_sex$`Pr(>F)`[1]
p_lev_G1_addr <- levene_G1_addr$`Pr(>F)`[1]
p_lev_G2_addr <- levene_G2_addr$`Pr(>F)`[1]

tabel_ringkasan <- data.frame(
  Uji = c(
    "Levene (G1 ~ sex)",
    "Levene (G2 ~ sex)",
    "Levene (G1 ~ address)",
    "Levene (G2 ~ address)",
    "Box's M (G1,G2 ~ sex)",
    "Box's M (G1,G2 ~ address)",
    "Box's M (G1,G2 ~ sex×address)"
  ),
  p_value = c(
    round(p_lev_G1_sex,        4),
    round(p_lev_G2_sex,        4),
    round(p_lev_G1_addr,       4),
    round(p_lev_G2_addr,       4),
    round(p_val_boxm_sex,      4),
    round(p_val_boxm_addr,     4),
    round(p_val_boxm_gab,      4)
  ),
  Berlaku_untuk = c(
    "Semua metode",
    "Semua metode",
    "Semua metode",
    "Semua metode",
    "MANOVA & MANCOVA",
    "MANOVA & MANCOVA",
    "MANOVA & MANCOVA"
  )
)

tabel_ringkasan$Kesimpulan <- ifelse(
  tabel_ringkasan$p_value > 0.05,
  "Homogen",
  "Tidak Homogen"
)

kable(tabel_ringkasan,
      caption = "Ringkasan Tahap 6 — Semua Uji Homogenitas Varians & Kovarians") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(4,
    color = ifelse(tabel_ringkasan$p_value > 0.05, "darkgreen", "red"),
    bold  = TRUE) %>%
  row_spec(5:7, background = "#FFF9C4")

Ringkasan Tahap 6 — Semua Uji Homogenitas Varians & Kovarians
Uji	p_value	Berlaku_untuk	Kesimpulan
Levene (G1 ~ sex)	0.5303	Semua metode	Homogen
Levene (G2 ~ sex)	0.4362	Semua metode	Homogen
Levene (G1 ~ address)	0.9543	Semua metode	Homogen
Levene (G2 ~ address)	0.8215	Semua metode	Homogen
Box’s M (G1,G2 ~ sex)	0.8354	MANOVA & MANCOVA	Homogen
Box’s M (G1,G2 ~ address)	0.0002	MANOVA & MANCOVA	Tidak Homogen
Box’s M (G1,G2 ~ sex×address)	0.0012	MANOVA & MANCOVA	Tidak Homogen

Cek Multikolinearitas antar DV

Hitung Korelasi Pearson Y1 vs Y2

# ── Korelasi Pearson antara Y1 (G1) dan Y2 (G2) ──
r_G1_G2 <- cor(df$G1, df$G2, method = "pearson")

cat("===== KORELASI PEARSON: Y1 (G1) vs Y2 (G2) =====\n\n")

## ===== KORELASI PEARSON: Y1 (G1) vs Y2 (G2) =====

cat("r (G1, G2) =", round(r_G1_G2, 4), "\n\n")

## r (G1, G2) = 0.8521

cat("Kesimpulan:\n")

## Kesimpulan:

if (abs(r_G1_G2) < 0.90) {
  cat("  r =", round(r_G1_G2, 4), "< 0.90\n")
  cat("  → TIDAK ada multikolinearitas ekstrem\n")
  cat("  → G1 dan G2 cukup berbeda sebagai DV\n")
  cat("  → Analisis MANOVA & MANCOVA dapat dilanjutkan\n")
} else {
  cat("  r =", round(r_G1_G2, 4), "≥ 0.90\n")
  cat("  → MULTIKOLINEARITAS EKSTREM\n")
  cat("  → G1 dan G2 hampir identik, pertimbangkan ulang pilihan DV\n")
}

##   r = 0.8521 < 0.90
##   → TIDAK ada multikolinearitas ekstrem
##   → G1 dan G2 cukup berbeda sebagai DV
##   → Analisis MANOVA & MANCOVA dapat dilanjutkan

Uji Signifikansi Korelasi (cor.test)

cor_test_hasil <- cor.test(df$G1, df$G2, method = "pearson")

cat("UJI SIGNIFIKANSI KORELASI: G1 vs G2\n")

## UJI SIGNIFIKANSI KORELASI: G1 vs G2

cat("Statistik t :", round(cor_test_hasil$statistic, 4), "\n")

## Statistik t : 32.2778

cat("df          :", cor_test_hasil$parameter, "\n")

## df          : 393

cat("p-value     :", format(cor_test_hasil$p.value, scientific = TRUE), "\n")

## p-value     : 1.441347e-112

cat("95% CI      : [",
    round(cor_test_hasil$conf.int[1], 4), ",",
    round(cor_test_hasil$conf.int[2], 4), "]\n\n")

## 95% CI      : [ 0.8226 , 0.877 ]

cat("Interpretasi:\n")

## Interpretasi:

cat("  Korelasi r =", round(r_G1_G2, 4), "\n")

##   Korelasi r = 0.8521

if (cor_test_hasil$p.value < 0.05) {
  cat("  Signifikan secara statistik (p < 0.05)\n")
  cat("  → G1 dan G2 memang berkorelasi (bukan kebetulan)\n")
} else {
  cat("  Tidak signifikan (p ≥ 0.05)\n")
}

##   Signifikan secara statistik (p < 0.05)
##   → G1 dan G2 memang berkorelasi (bukan kebetulan)

Matriks Korelasi Lengkap (Y1, Y2, X3)

mat_cor_full <- cor(df[, c("G1", "G2", "absences")], method = "pearson")

cat("MATRIKS KORELASI PEARSON (G1, G2, absences)\n")

## MATRIKS KORELASI PEARSON (G1, G2, absences)

print(round(mat_cor_full, 4))

##               G1      G2 absences
## G1        1.0000  0.8521  -0.0310
## G2        0.8521  1.0000  -0.0318
## absences -0.0310 -0.0318   1.0000

kable(round(mat_cor_full, 4),
      caption = "Matriks Korelasi Pearson — Y1 (G1), Y2 (G2), X3 (absences)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE)

Matriks Korelasi Pearson — Y1 (G1), Y2 (G2), X3 (absences)
	G1	G2	absences
G1	1.0000	0.8521	-0.0310
G2	0.8521	1.0000	-0.0318
absences	-0.0310	-0.0318	1.0000

Visualisasi: Heatmap Korelasi

cor_data <- as.data.frame(mat_cor_full) %>%
  rownames_to_column("Var1") %>%
  pivot_longer(-Var1, names_to = "Var2", values_to = "r")

ggplot(cor_data, aes(x = Var1, y = Var2, fill = r)) +
  geom_tile(color = "white", linewidth = 1) +
  geom_text(aes(label = round(r, 3)),
            size = 5, color = "white", fontface = "bold") +
  scale_fill_gradient2(
    low = "#1976D2", mid = "white", high = "#E91E8C",
    midpoint = 0, limits = c(-1, 1)
  ) +
  labs(
    title    = "Heatmap Korelasi Pearson",
    subtitle = "Y1 (G1), Y2 (G2), X3 (absences)",
    x = NULL, y = NULL, fill = "r"
  ) +
  coord_fixed() +
  theme_minimal(base_size = 13) +
  theme(axis.text = element_text(size = 12, face = "bold"))

Visualisasi: Scatter Plot Y1 vs Y2 dengan Regresi

ggplot(df, aes(x = G1, y = G2, color = sex, shape = address)) +
  geom_point(alpha = 0.5, size = 2.5) +
  geom_smooth(method = "lm", se = TRUE,
              aes(group = 1),
              color = "gray30", linewidth = 1.2) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C",
                                "Laki-laki" = "#1976D2")) +
  annotate("text",
           x = max(df$G1, na.rm = TRUE) * 0.7,
           y = min(df$G2, na.rm = TRUE) * 1.1,
           label = paste0("r = ", round(r_G1_G2, 3)),
           size = 5, color = "gray20", fontface = "bold") +
  labs(
    title    = "Scatter Plot Y1 (G1) vs Y2 (G2)",
    subtitle = paste0("r = ", round(r_G1_G2, 3),
                      " | Korelasi kuat positif — DV berkorelasi tapi tidak identik"),
    x     = "Y1 (G1 — Nilai Periode 1)",
    y     = "Y2 (G2 — Nilai Periode 2)",
    color = "Sex", shape = "Address"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?

Visualisasi: Scatter Plot per Grup

# ── Scatter plot G1 vs G2 per grup (sex dan address) ──
p_scatter_sex <- ggplot(df, aes(x = G1, y = G2, color = sex)) +
  geom_point(alpha = 0.5, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C",
                                "Laki-laki" = "#1976D2")) +
  labs(
    title    = "G1 vs G2 per Grup Sex (X1)",
    x = "Y1 (G1)", y = "Y2 (G2)", color = "Sex"
  ) +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom")

p_scatter_addr <- ggplot(df, aes(x = G1, y = G2, color = address)) +
  geom_point(alpha = 0.5, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
  scale_color_manual(values = c("Rural" = "#FF8F00",
                                "Urban" = "#388E3C")) +
  labs(
    title    = "G1 vs G2 per Grup Address (X2)",
    x = "Y1 (G1)", y = "Y2 (G2)", color = "Address"
  ) +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom")

grid.arrange(p_scatter_sex, p_scatter_addr, ncol = 2,
             top = "Scatter Plot Y1 vs Y2 per Grup — Cek Multikolinearitas")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Korelasi per Grup (sex × address)

# ── Korelasi G1 vs G2 per setiap grup ──
cat("===== KORELASI G1 vs G2 PER GRUP =====\n\n")

## ===== KORELASI G1 vs G2 PER GRUP =====

# Per sex
cat("--- Per Grup X1 (sex) ---\n")

## --- Per Grup X1 (sex) ---

for (grp in levels(df$sex)) {
  sub <- df %>% filter(sex == grp)
  r   <- round(cor(sub$G1, sub$G2, method = "pearson"), 4)
  cat("  Grup:", grp, "→ r =", r,
      ifelse(abs(r) < 0.90, "✓ OK (< 0.90)", "✗ Terlalu Tinggi (≥ 0.90)"), "\n")
}

##   Grup: Perempuan → r = 0.8395 ✓ OK (< 0.90) 
##   Grup: Laki-laki → r = 0.8623 ✓ OK (< 0.90)

cat("\n--- Per Grup X2 (address) ---\n")

## 
## --- Per Grup X2 (address) ---

for (grp in levels(df$address)) {
  sub <- df %>% filter(address == grp)
  r   <- round(cor(sub$G1, sub$G2, method = "pearson"), 4)
  cat("  Grup:", grp, "→ r =", r,
      ifelse(abs(r) < 0.90, "✓ OK (< 0.90)", "✗ Terlalu Tinggi (≥ 0.90)"), "\n")
}

##   Grup: Rural → r = 0.77 ✓ OK (< 0.90) 
##   Grup: Urban → r = 0.8778 ✓ OK (< 0.90)

cat("\n--- Per Grup Kombinasi sex × address ---\n")

## 
## --- Per Grup Kombinasi sex × address ---

df_cek <- df %>%
  group_by(sex, address) %>%
  summarise(
    n   = n(),
    r   = round(cor(G1, G2, method = "pearson"), 4),
    .groups = "drop"
  ) %>%
  mutate(Status = ifelse(abs(r) < 0.90, "OK ✓", "Terlalu Tinggi ✗"))

print(df_cek)

## # A tibble: 4 × 5
##   sex       address     n     r Status          
##   <fct>     <fct>   <int> <dbl> <chr>           
## 1 Perempuan Rural      44 0.785 OK ✓            
## 2 Perempuan Urban     164 0.855 OK ✓            
## 3 Laki-laki Rural      44 0.758 OK ✓            
## 4 Laki-laki Urban     143 0.900 Terlalu Tinggi ✗

#8A Uji Linearitas: Scatter Plot dengan Garis Regresi

Scatter Plot X3 (absences) vs Y1 (G1) & Y2 (G2) - Keseluruhan

p_lin_G1 <- ggplot(df, aes(x = absences, y = G1)) +
  geom_point(alpha = 0.4, size = 2, color = "#7B1FA2") +
  geom_smooth(method = "lm",  se = TRUE, color = "#7B1FA2",
              linewidth = 1.2, linetype = "solid") +
  geom_smooth(method = "loess", se = FALSE, color = "gray40",
              linewidth = 0.9, linetype = "dashed") +
  labs(
    title    = "Linearitas X3 (absences) → Y1 (G1)",
    subtitle = "Ungu = garis regresi linear | Abu = LOESS (non-linear smoother)",
    x = "X3: Jumlah Absen (absences)",
    y = "Y1: Nilai Periode 1 (G1)"
  ) +
  theme_minimal(base_size = 13)

p_lin_G2 <- ggplot(df, aes(x = absences, y = G2)) +
  geom_point(alpha = 0.4, size = 2, color = "#0288D1") +
  geom_smooth(method = "lm",  se = TRUE, color = "#0288D1",
              linewidth = 1.2, linetype = "solid") +
  geom_smooth(method = "loess", se = FALSE, color = "gray40",
              linewidth = 0.9, linetype = "dashed") +
  labs(
    title    = "Linearitas X3 (absences) → Y2 (G2)",
    subtitle = "Biru = garis regresi linear | Abu = LOESS (non-linear smoother)",
    x = "X3: Jumlah Absen (absences)",
    y = "Y2: Nilai Periode 2 (G2)"
  ) +
  theme_minimal(base_size = 13)

grid.arrange(p_lin_G1, p_lin_G2, ncol = 2,
             top = "Uji Linearitas: X3 (absences) vs Y1 (G1) & Y2 (G2)")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Scatter Plot per Grup X1 (sex)

p_lins_G1 <- ggplot(df, aes(x = absences, y = G1, color = sex)) +
  geom_point(alpha = 0.4, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = sex)) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  scale_fill_manual(values  = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(
    title    = "Linearitas per X1 (sex) → Y1 (G1)",
    subtitle = "Garis regresi terpisah per grup",
    x = "X3: Absences", y = "Y1: G1", color = "Sex", fill = "Sex"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_lins_G2 <- ggplot(df, aes(x = absences, y = G2, color = sex)) +
  geom_point(alpha = 0.4, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = sex)) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  scale_fill_manual(values  = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(
    title    = "Linearitas per X1 (sex) → Y2 (G2)",
    subtitle = "Garis regresi terpisah per grup",
    x = "X3: Absences", y = "Y2: G2", color = "Sex", fill = "Sex"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_lins_G1, p_lins_G2, ncol = 2,
             top = "Linearitas X3 → Y1 & Y2 per Grup X1 (sex)")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Scatter Plot per Grup X2 (address)

p_lina_G1 <- ggplot(df, aes(x = absences, y = G1, color = address)) +
  geom_point(alpha = 0.4, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = address)) +
  scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  scale_fill_manual(values  = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  labs(
    title    = "Linearitas per X2 (address) → Y1 (G1)",
    subtitle = "Garis regresi terpisah per grup",
    x = "X3: Absences", y = "Y1: G1", color = "Address", fill = "Address"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_lina_G2 <- ggplot(df, aes(x = absences, y = G2, color = address)) +
  geom_point(alpha = 0.4, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = address)) +
  scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  scale_fill_manual(values  = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  labs(
    title    = "Linearitas per X2 (address) → Y2 (G2)",
    subtitle = "Garis regresi terpisah per grup",
    x = "X3: Absences", y = "Y2: G2", color = "Address", fill = "Address"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_lina_G1, p_lina_G2, ncol = 2,
             top = "Linearitas X3 → Y1 & Y2 per Grup X2 (address)")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

8B Uji Linearitas: Korelasi Pearson X3 vs Y1, Y2

# ── Korelasi Pearson sebagai konfirmasi numerik linearitas ──
kor_global <- data.frame(
  Perbandingan = c("X3 (absences) vs Y1 (G1)", "X3 (absences) vs Y2 (G2)"),
  r            = c(
    round(cor(df$absences, df$G1, method = "pearson"), 4),
    round(cor(df$absences, df$G2, method = "pearson"), 4)
  ),
  p_value      = c(
    round(cor.test(df$absences, df$G1)$p.value, 4),
    round(cor.test(df$absences, df$G2)$p.value, 4)
  )
)

kor_global$Signifikan <- ifelse(kor_global$p_value < 0.05,
                                 "Signifikan (p < 0.05)",
                                 "Tidak Signifikan (p ≥ 0.05)")
kor_global$Keterangan <- ifelse(kor_global$p_value < 0.05,
                                 "Ada hubungan linear — covariate relevan ✓",
                                 "Hubungan linear lemah — covariate kurang kuat")

kable(kor_global,
      caption = "Korelasi Pearson: X3 (absences) vs Y1 (G1) & Y2 (G2)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(5, color = ifelse(kor_global$p_value < 0.05, "darkgreen", "orange"),
              bold = TRUE)

Korelasi Pearson: X3 (absences) vs Y1 (G1) & Y2 (G2)
Perbandingan	r	p_value	Signifikan	Keterangan
X3 (absences) vs Y1 (G1)	-0.0310	0.5390	Tidak Signifikan (p ≥ 0.05)	Hubungan linear lemah — covariate kurang kuat
X3 (absences) vs Y2 (G2)	-0.0318	0.5289	Tidak Signifikan (p ≥ 0.05)	Hubungan linear lemah — covariate kurang kuat

# ── Korelasi per grup ──
grp_kor <- bind_rows(
  df %>%
    group_by(Faktor = "sex", Grup = as.character(sex)) %>%
    summarise(
      r_G1    = round(cor(absences, G1), 4),
      p_G1    = round(cor.test(absences, G1)$p.value, 4),
      r_G2    = round(cor(absences, G2), 4),
      p_G2    = round(cor.test(absences, G2)$p.value, 4),
      .groups = "drop"
    ),
  df %>%
    group_by(Faktor = "address", Grup = as.character(address)) %>%
    summarise(
      r_G1    = round(cor(absences, G1), 4),
      p_G1    = round(cor.test(absences, G1)$p.value, 4),
      r_G2    = round(cor(absences, G2), 4),
      p_G2    = round(cor.test(absences, G2)$p.value, 4),
      .groups = "drop"
    )
)

kable(grp_kor,
      caption = "Korelasi Pearson X3 vs Y1 & Y2 per Grup",
      col.names = c("Faktor", "Grup", "r (G1)", "p (G1)", "r (G2)", "p (G2)")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE)

Korelasi Pearson X3 vs Y1 & Y2 per Grup
Faktor	Grup	r (G1)	p (G1)	r (G2)	p (G2)
sex	Laki-laki	-0.1097	0.1349	-0.1188	0.1055
sex	Perempuan	0.0244	0.7267	0.0290	0.6776
address	Rural	0.0287	0.7905	0.0428	0.6920
address	Urban	-0.0511	0.3719	-0.0559	0.3287

8C Uji Homogenitas Slope (Interaksi Faktor × Covariate)

Model Interaksi: sex × absences → G1

model_int_G1_sex <- lm(G1 ~ sex * absences, data = df)

cat("MODEL: G1 ~ sex * absences\n")

## MODEL: G1 ~ sex * absences

cat(rep("─", 50), "\n", sep = "")

## ──────────────────────────────────────────────────

print(Anova(model_int_G1_sex, type = 2))

## Anova Table (Type II tests)
## 
## Response: G1
##              Sum Sq  Df F value  Pr(>F)  
## sex            35.1   1  3.2118 0.07388 .
## absences        2.7   1  0.2462 0.62003  
## sex:absences   24.4   1  2.2288 0.13627  
## Residuals    4277.0 391                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

p_int_G1_sex <- Anova(model_int_G1_sex, type = 2)["sex:absences", "Pr(>F)"]
cat("\nInteraksi sex × absences → G1: p =", round(p_int_G1_sex, 4), "→",
    ifelse(p_int_G1_sex > 0.05,
           "Slope HOMOGEN — asumsi terpenuhi ✓",
           "Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")

## 
## Interaksi sex × absences → G1: p = 0.1363 → Slope HOMOGEN — asumsi terpenuhi ✓

Model Interaksi: sex × absences → G2

model_int_G2_sex <- lm(G2 ~ sex * absences, data = df)

cat("MODEL: G2 ~ sex * absences\n")

## MODEL: G2 ~ sex * absences

cat(rep("─", 50), "\n", sep = "")

## ──────────────────────────────────────────────────

print(Anova(model_int_G2_sex, type = 2))

## Anova Table (Type II tests)
## 
## Response: G2
##              Sum Sq  Df F value  Pr(>F)  
## sex            44.3   1  3.1588 0.07629 .
## absences        3.7   1  0.2631 0.60830  
## sex:absences   37.9   1  2.6985 0.10125  
## Residuals    5486.8 391                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

p_int_G2_sex <- Anova(model_int_G2_sex, type = 2)["sex:absences", "Pr(>F)"]
cat("\nInteraksi sex × absences → G2: p =", round(p_int_G2_sex, 4), "→",
    ifelse(p_int_G2_sex > 0.05,
           "Slope HOMOGEN — asumsi terpenuhi ✓",
           "Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")

## 
## Interaksi sex × absences → G2: p = 0.1012 → Slope HOMOGEN — asumsi terpenuhi ✓

Model Interaksi: address × absences → G1

model_int_G1_addr <- lm(G1 ~ address * absences, data = df)

cat("MODEL: G1 ~ address * absences\n")

## MODEL: G1 ~ address * absences

cat(rep("─", 50), "\n", sep = "")

## ──────────────────────────────────────────────────

print(Anova(model_int_G1_addr, type = 2))

## Anova Table (Type II tests)
## 
## Response: G1
##                  Sum Sq  Df F value Pr(>F)
## address            20.6   1  1.8675 0.1725
## absences            3.7   1  0.3328 0.5644
## address:absences    5.8   1  0.5268 0.4684
## Residuals        4310.2 391

p_int_G1_addr <- Anova(model_int_G1_addr, type = 2)["address:absences", "Pr(>F)"]
cat("\nInteraksi address × absences → G1: p =", round(p_int_G1_addr, 4), "→",
    ifelse(p_int_G1_addr > 0.05,
           "Slope HOMOGEN — asumsi terpenuhi ✓",
           "Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")

## 
## Interaksi address × absences → G1: p = 0.4684 → Slope HOMOGEN — asumsi terpenuhi ✓

Model Interaksi: address × absences → G2

model_int_G2_addr <- lm(G2 ~ address * absences, data = df)

cat("MODEL: G2 ~ address * absences\n")

## MODEL: G2 ~ address * absences

cat(rep("─", 50), "\n", sep = "")

## ──────────────────────────────────────────────────

print(Anova(model_int_G2_addr, type = 2))

## Anova Table (Type II tests)
## 
## Response: G2
##                  Sum Sq  Df F value  Pr(>F)  
## address            87.4   1  6.2455 0.01286 *
## absences            4.5   1  0.3185 0.57282  
## address:absences   11.0   1  0.7861 0.37581  
## Residuals        5470.7 391                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

p_int_G2_addr <- Anova(model_int_G2_addr, type = 2)["address:absences", "Pr(>F)"]
cat("\nInteraksi address × absences → G2: p =", round(p_int_G2_addr, 4), "→",
    ifelse(p_int_G2_addr > 0.05,
           "Slope HOMOGEN — asumsi terpenuhi ✓",
           "Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")

## 
## Interaksi address × absences → G2: p = 0.3758 → Slope HOMOGEN — asumsi terpenuhi ✓

8D Visualisasi Homogenitas Slope

# ── Visualisasi slope per grup X1 (sex) ──
slope_sex <- df %>%
  group_by(sex) %>%
  summarise(
    slope_G1 = round(coef(lm(G1 ~ absences))[2], 4),
    slope_G2 = round(coef(lm(G2 ~ absences))[2], 4),
    .groups  = "drop"
  )

cat("Slope X3 → Y1 (G1) per Grup sex:\n")

## Slope X3 → Y1 (G1) per Grup sex:

print(slope_sex)

## # A tibble: 2 × 3
##   sex       slope_G1 slope_G2
##   <fct>        <dbl>    <dbl>
## 1 Perempuan   0.0083   0.0112
## 2 Laki-laki  -0.0623  -0.0768

p_slope_G1_sex <- ggplot(df, aes(x = absences, y = G1, color = sex)) +
  geom_point(alpha = 0.3, size = 1.8) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  annotate("text", x = 50, y = 18,
           label = paste0("Interaksi: p = ", round(p_int_G1_sex, 3)),
           size = 3.5, color = "gray30") +
  labs(title    = "Slope X3 → Y1 (G1) per Sex",
       subtitle = paste0("p-interaksi = ", round(p_int_G1_sex, 4),
                         ifelse(p_int_G1_sex > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
       x = "X3: Absences", y = "Y1: G1", color = "Sex") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_slope_G2_sex <- ggplot(df, aes(x = absences, y = G2, color = sex)) +
  geom_point(alpha = 0.3, size = 1.8) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  annotate("text", x = 50, y = 18,
           label = paste0("Interaksi: p = ", round(p_int_G2_sex, 3)),
           size = 3.5, color = "gray30") +
  labs(title    = "Slope X3 → Y2 (G2) per Sex",
       subtitle = paste0("p-interaksi = ", round(p_int_G2_sex, 4),
                         ifelse(p_int_G2_sex > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
       x = "X3: Absences", y = "Y2: G2", color = "Sex") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_slope_G1_sex, p_slope_G2_sex, ncol = 2,
             top = "Homogenitas Slope: X3 (absences) per Grup X1 (sex)")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# ── Visualisasi slope per grup X2 (address) ──
slope_addr <- df %>%
  group_by(address) %>%
  summarise(
    slope_G1 = round(coef(lm(G1 ~ absences))[2], 4),
    slope_G2 = round(coef(lm(G2 ~ absences))[2], 4),
    .groups  = "drop"
  )

cat("Slope X3 → Y1 (G1) per Grup address:\n")

## Slope X3 → Y1 (G1) per Grup address:

print(slope_addr)

## # A tibble: 2 × 3
##   address slope_G1 slope_G2
##   <fct>      <dbl>    <dbl>
## 1 Rural     0.0102   0.0174
## 2 Urban    -0.0224  -0.0275

p_slope_G1_addr <- ggplot(df, aes(x = absences, y = G1, color = address)) +
  geom_point(alpha = 0.3, size = 1.8) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
  scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  annotate("text", x = 50, y = 18,
           label = paste0("Interaksi: p = ", round(p_int_G1_addr, 3)),
           size = 3.5, color = "gray30") +
  labs(title    = "Slope X3 → Y1 (G1) per Address",
       subtitle = paste0("p-interaksi = ", round(p_int_G1_addr, 4),
                         ifelse(p_int_G1_addr > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
       x = "X3: Absences", y = "Y1: G1", color = "Address") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

p_slope_G2_addr <- ggplot(df, aes(x = absences, y = G2, color = address)) +
  geom_point(alpha = 0.3, size = 1.8) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
  scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
  annotate("text", x = 50, y = 18,
           label = paste0("Interaksi: p = ", round(p_int_G2_addr, 3)),
           size = 3.5, color = "gray30") +
  labs(title    = "Slope X3 → Y2 (G2) per Address",
       subtitle = paste0("p-interaksi = ", round(p_int_G2_addr, 4),
                         ifelse(p_int_G2_addr > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
       x = "X3: Absences", y = "Y2: G2", color = "Address") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

grid.arrange(p_slope_G1_addr, p_slope_G2_addr, ncol = 2,
             top = "Homogenitas Slope: X3 (absences) per Grup X2 (address)")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

8E Ringkasan Akhir Tahap 8

# ── Tabel ringkasan lengkap ──
tabel_step8 <- data.frame(
  Uji = c(
    "Linearitas (Pearson): X3 vs Y1 — Global",
    "Linearitas (Pearson): X3 vs Y2 — Global",
    "Homogenitas Slope: sex × absences → Y1 (G1)",
    "Homogenitas Slope: sex × absences → Y2 (G2)",
    "Homogenitas Slope: address × absences → Y1 (G1)",
    "Homogenitas Slope: address × absences → Y2 (G2)"
  ),
  p_value = c(
    round(cor.test(df$absences, df$G1)$p.value, 4),
    round(cor.test(df$absences, df$G2)$p.value, 4),
    round(p_int_G1_sex,  4),
    round(p_int_G2_sex,  4),
    round(p_int_G1_addr, 4),
    round(p_int_G2_addr, 4)
  ),
  Berlaku_untuk = c(
    "ANCOVA & MANCOVA",
    "ANCOVA & MANCOVA",
    "ANCOVA & MANCOVA",
    "ANCOVA & MANCOVA",
    "ANCOVA & MANCOVA",
    "ANCOVA & MANCOVA"
  )
)

tabel_step8$Kesimpulan <- c(
  ifelse(tabel_step8$p_value[1] < 0.05, "Hubungan linear signifikan ✓", "Hubungan linear lemah ⚠"),
  ifelse(tabel_step8$p_value[2] < 0.05, "Hubungan linear signifikan ✓", "Hubungan linear lemah ⚠"),
  ifelse(tabel_step8$p_value[3] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗"),
  ifelse(tabel_step8$p_value[4] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗"),
  ifelse(tabel_step8$p_value[5] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗"),
  ifelse(tabel_step8$p_value[6] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗")
)

# Warna baris
warna_kes <- c(
  ifelse(tabel_step8$p_value[1] < 0.05, "darkgreen", "orange"),
  ifelse(tabel_step8$p_value[2] < 0.05, "darkgreen", "orange"),
  ifelse(tabel_step8$p_value[3] > 0.05, "darkgreen", "red"),
  ifelse(tabel_step8$p_value[4] > 0.05, "darkgreen", "red"),
  ifelse(tabel_step8$p_value[5] > 0.05, "darkgreen", "red"),
  ifelse(tabel_step8$p_value[6] > 0.05, "darkgreen", "red")
)

kable(tabel_step8,
      caption = "Ringkasan Tahap 8 — Uji Linearitas & Homogenitas Slope") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(4, color = warna_kes, bold = TRUE) %>%
  row_spec(1:2, background = "#F3E5F5") %>%   # Linearitas — ungu muda
  row_spec(3:6, background = "#E3F2FD")         # Slope homogen — biru muda

Ringkasan Tahap 8 — Uji Linearitas & Homogenitas Slope
Uji	p_value	Berlaku_untuk	Kesimpulan
Linearitas (Pearson): X3 vs Y1 — Global	0.5390	ANCOVA & MANCOVA	Hubungan linear lemah ⚠
Linearitas (Pearson): X3 vs Y2 — Global	0.5289	ANCOVA & MANCOVA	Hubungan linear lemah ⚠
Homogenitas Slope: sex × absences → Y1 (G1)	0.1363	ANCOVA & MANCOVA	Slope Homogen ✓
Homogenitas Slope: sex × absences → Y2 (G2)	0.1012	ANCOVA & MANCOVA	Slope Homogen ✓
Homogenitas Slope: address × absences → Y1 (G1)	0.4684	ANCOVA & MANCOVA	Slope Homogen ✓
Homogenitas Slope: address × absences → Y2 (G2)	0.3758	ANCOVA & MANCOVA	Slope Homogen ✓

Tahap 9 — Uji Utama: MANOVA, ANCOVA, dan MANCOVA

9A — MANOVA (Multivariate Analysis of Variance)

Model: Y1 + Y2 ~ X1 + X2 (tanpa covariate)
Statistik uji: Wilks’ Lambda
Pertanyaan: Apakah sex dan/atau address berpengaruh signifikan terhadap kombinasi G1 dan G2?

Fit model MANOVA

if (!require(heplots)) install.packages("heplots")
library(heplots)

# Fit MANOVA
manova_model <- manova(cbind(G1, G2) ~ sex + address, data = df)

cat("===== MANOVA: cbind(G1, G2) ~ sex + address =====\n")

## ===== MANOVA: cbind(G1, G2) ~ sex + address =====

summary(manova_model, test = "Wilks")

##            Df   Wilks approx F num Df den Df  Pr(>F)  
## sex         1 0.99087   1.8022      2    391 0.16630  
## address     1 0.97826   4.3442      2    391 0.01361 *
## Residuals 392                                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Tabel MANOVA — semua statistik multivariat

cat("\n--- Pillai's Trace ---\n")

## 
## --- Pillai's Trace ---

print(summary(manova_model, test = "Pillai"))

##            Df    Pillai approx F num Df den Df  Pr(>F)  
## sex         1 0.0091344   1.8022      2    391 0.16630  
## address     1 0.0217377   4.3442      2    391 0.01361 *
## Residuals 392                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Hotelling-Lawley Trace ---\n")

## 
## --- Hotelling-Lawley Trace ---

print(summary(manova_model, test = "Hotelling-Lawley"))

##            Df Hotelling-Lawley approx F num Df den Df  Pr(>F)  
## sex         1        0.0092186   1.8022      2    391 0.16630  
## address     1        0.0222208   4.3442      2    391 0.01361 *
## Residuals 392                                                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Roy's Largest Root ---\n")

## 
## --- Roy's Largest Root ---

print(summary(manova_model, test = "Roy"))

##            Df       Roy approx F num Df den Df  Pr(>F)  
## sex         1 0.0092186   1.8022      2    391 0.16630  
## address     1 0.0222208   4.3442      2    391 0.01361 *
## Residuals 392                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Univariate follow-up MANOVA

cat("===== ANOVA Univariat (follow-up MANOVA) =====\n")

## ===== ANOVA Univariat (follow-up MANOVA) =====

summary.aov(manova_model)

##  Response G1 :
##              Df Sum Sq Mean Sq F value  Pr(>F)  
## sex           1   36.6  36.611  3.3521 0.06788 .
## address       1   22.7  22.723  2.0805 0.14999  
## Residuals   392 4281.4  10.922                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response G2 :
##              Df Sum Sq Mean Sq F value  Pr(>F)  
## sex           1   46.3  46.265  3.3362 0.06853 .
## address       1   92.3  92.318  6.6571 0.01024 *
## Residuals   392 5436.1  13.868                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Ringkasan hasil MANOVA

wil_sex  <- summary(manova_model, test = "Wilks")$stats["sex",     ]
wil_addr <- summary(manova_model, test = "Wilks")$stats["address", ]

tabel_manova <- data.frame(
  Efek    = c("sex (X1)", "address (X2)"),
  Lambda  = round(c(wil_sex["Wilks"], wil_addr["Wilks"]), 4),
  F_value = round(c(wil_sex["approx F"], wil_addr["approx F"]), 4),
  p_value = round(c(wil_sex["Pr(>F)"], wil_addr["Pr(>F)"]), 4)
)
tabel_manova$Kesimpulan <- ifelse(
  tabel_manova$p_value < 0.05,
  "Signifikan — ada perbedaan multivariat ✓",
  "Tidak signifikan ✗"
)

kable(tabel_manova,
      caption = "Ringkasan MANOVA — Wilks' Lambda",
      col.names = c("Efek", "Wilks λ", "F", "p-value", "Kesimpulan")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(5,
    color = ifelse(tabel_manova$p_value < 0.05, "darkgreen", "red"),
    bold  = TRUE)

Ringkasan MANOVA — Wilks’ Lambda
Efek	Wilks λ	F	p-value	Kesimpulan
sex (X1)	0.9909	1.8022	0.1663	Tidak signifikan ✗
address (X2)	0.9783	4.3442	0.0136	Signifikan — ada perbedaan multivariat ✓

Visualisasi HE plot MANOVA

par(mfrow = c(1, 2))

heplot(manova_model,
       main  = "HE Plot: sex (X1)\n[G1 vs G2]",
       xlab  = "Y1: G1 (Nilai Periode 1)",
       ylab  = "Y2: G2 (Nilai Periode 2)",
       col   = c("blue", "red"),
       lwd   = 2)

heplot(manova_model,
       terms = "address",
       main  = "HE Plot: address (X2)\n[G1 vs G2]",
       xlab  = "Y1: G1 (Nilai Periode 1)",
       ylab  = "Y2: G2 (Nilai Periode 2)",
       col   = c("darkgreen", "orange"),
       lwd   = 2)

par(mfrow = c(1, 1))

9B — ANCOVA (Analysis of Covariance)

Model: Y1 ~ X1 + X3 (covariate: absences)
Tujuan: Mengendalikan pengaruh absensi (X3) sebelum menguji efek sex (X1) terhadap G1
Output utama: Adjusted means (rata-rata terkoreksi covariate)

Install package tambahan

if (!require(emmeans))  install.packages("emmeans")

## Loading required package: emmeans

## Warning: package 'emmeans' was built under R version 4.5.3

## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

if (!require(effectsize)) install.packages("effectsize")

## Loading required package: effectsize

## Warning: package 'effectsize' was built under R version 4.5.3

library(emmeans)
library(effectsize)

Fit model ANCOVA

# ANCOVA: G1 ~ sex + absences
ancova_model <- lm(G1 ~ sex + absences, data = df)

cat("===== ANCOVA: G1 ~ sex + absences =====\n")

## ===== ANCOVA: G1 ~ sex + absences =====

print(Anova(ancova_model, type = 2))

## Anova Table (Type II tests)
## 
## Response: G1
##           Sum Sq  Df F value  Pr(>F)  
## sex         35.1   1  3.2017 0.07433 .
## absences     2.7   1  0.2454 0.62058  
## Residuals 4301.4 392                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Adjusted means (Estimated Marginal Means)

emm_ancova <- emmeans(ancova_model, ~ sex)

cat("===== Adjusted Means (Controlling for absences) =====\n")

## ===== Adjusted Means (Controlling for absences) =====

print(summary(emm_ancova))

##  sex       emmean    SE  df lower.CL upper.CL
##  Perempuan   10.6 0.230 392     10.2     11.1
##  Laki-laki   11.2 0.243 392     10.7     11.7
## 
## Confidence level used: 0.95

# Tabel rapi
emm_df <- as.data.frame(summary(emm_ancova))
kable(emm_df,
      caption = "Adjusted Means G1 per Sex (Setelah Kontrol Absences)",
      digits  = 3,
      col.names = c("Sex", "Adj. Mean", "SE", "df", "Lower CI", "Upper CI")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Adjusted Means G1 per Sex (Setelah Kontrol Absences)
Sex	Adj. Mean	SE	df	Lower CI	Upper CI
Perempuan	10.625	0.230	392	10.173	11.077
Laki-laki	11.224	0.243	392	10.747	11.701

Perbandingan adjusted means (contrast)

cat("===== Pairwise Contrast (Adjusted Means) =====\n")

## ===== Pairwise Contrast (Adjusted Means) =====

pairs(emm_ancova)

##  contrast                estimate    SE  df t.ratio p.value
##  Perempuan - (Laki-laki)   -0.599 0.335 392  -1.789  0.0743

Effect size ANCOVA (Partial η²)

eta_ancova <- eta_squared(Anova(ancova_model, type = 2), partial = TRUE)

kable(eta_ancova,
      caption = "Effect Size ANCOVA — Partial η²",
      digits  = 4) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE)

Effect Size ANCOVA — Partial η²
Parameter	Eta2_partial	CI	CI_low	CI_high
sex	0.0081	0.95	0	1
absences	0.0006	0.95	0	1

Visualisasi ANCOVA — adjusted means dengan CI

emm_plot_df <- as.data.frame(summary(emm_ancova))

ggplot(emm_plot_df, aes(x = sex, y = emmean, fill = sex)) +
  geom_col(alpha = 0.8, width = 0.5, color = "white") +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
                width = 0.15, linewidth = 0.9, color = "gray30") +
  geom_text(aes(label = round(emmean, 2)),
            vjust = -0.6, size = 4.5, fontface = "bold") +
  scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title    = "ANCOVA — Adjusted Means G1 per Sex",
       subtitle = "Setelah dikontrol covariate: Absences (X3)",
       x = "Jenis Kelamin (X1)", y = "Adjusted Mean G1", fill = "Sex") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

Visualisasi ANCOVA — scatter dengan garis regresi

ggplot(df, aes(x = absences, y = G1, color = sex)) +
  geom_point(alpha = 0.35, size = 2) +
  geom_smooth(method = "lm", se = TRUE, linewidth = 1.3) +
  scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
  labs(title    = "ANCOVA — Hubungan Absences & G1 per Sex",
       subtitle = "Garis regresi paralel = asumsi homogenitas slope terpenuhi",
       x = "X3: Absences", y = "Y1: G1", color = "Sex") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

## `geom_smooth()` using formula = 'y ~ x'

Ringkasan ANCOVA

anv_ancova <- Anova(ancova_model, type = 2)
anv_df <- as.data.frame(anv_ancova)
anv_df$Variabel <- rownames(anv_df)

# Buang baris Residuals yang tidak punya p-value (NA)
anv_df <- anv_df[!is.na(anv_df$`Pr(>F)`), ]

anv_df$Kesimpulan <- ifelse(anv_df$`Pr(>F)` < 0.05,
                             "Signifikan ✓", "Tidak Signifikan ✗")
warna_kes <- ifelse(anv_df$`Pr(>F)` < 0.05, "darkgreen", "red")

kable(anv_df[, c("Variabel", "Sum Sq", "Df", "F value", "Pr(>F)", "Kesimpulan")],
      caption   = "Tabel ANCOVA: G1 ~ sex + absences",
      digits    = 4,
      row.names = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(6, color = warna_kes, bold = TRUE)

Tabel ANCOVA: G1 ~ sex + absences
Variabel	Sum Sq	Df	F value	Pr(>F)	Kesimpulan
sex	35.1325	1	3.2017	0.0743	Tidak Signifikan ✗
absences	2.6933	1	0.2454	0.6206	Tidak Signifikan ✗

9C — MANCOVA (Multivariate Analysis of Covariance)

Fit model MANCOVA

mancova_model <- manova(cbind(G1, G2) ~ sex + address + absences, data = df)

cat("===== MANCOVA: cbind(G1, G2) ~ sex + address + absences =====\n")

## ===== MANCOVA: cbind(G1, G2) ~ sex + address + absences =====

cat("\n--- Pillai's Trace ---\n")

## 
## --- Pillai's Trace ---

print(summary(mancova_model, test = "Pillai"))

##            Df    Pillai approx F num Df den Df  Pr(>F)  
## sex         1 0.0091394   1.7986      2    390 0.16690  
## address     1 0.0217429   4.3341      2    390 0.01375 *
## absences    1 0.0005512   0.1075      2    390 0.89807  
## Residuals 391                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Semua statistik multivariat MANCOVA

cat("\n--- Wilks' Lambda ---\n")

## 
## --- Wilks' Lambda ---

print(summary(mancova_model, test = "Wilks"))

##            Df   Wilks approx F num Df den Df  Pr(>F)  
## sex         1 0.99086   1.7986      2    390 0.16690  
## address     1 0.97826   4.3341      2    390 0.01375 *
## absences    1 0.99945   0.1075      2    390 0.89807  
## Residuals 391                                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Hotelling-Lawley Trace ---\n")

## 
## --- Hotelling-Lawley Trace ---

print(summary(mancova_model, test = "Hotelling-Lawley"))

##            Df Hotelling-Lawley approx F num Df den Df  Pr(>F)  
## sex         1        0.0092237   1.7986      2    390 0.16690  
## address     1        0.0222262   4.3341      2    390 0.01375 *
## absences    1        0.0005515   0.1075      2    390 0.89807  
## Residuals 391                                                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Roy's Largest Root ---\n")

## 
## --- Roy's Largest Root ---

print(summary(mancova_model, test = "Roy"))

##            Df       Roy approx F num Df den Df  Pr(>F)  
## sex         1 0.0092237   1.7986      2    390 0.16690  
## address     1 0.0222262   4.3341      2    390 0.01375 *
## absences    1 0.0005515   0.1075      2    390 0.89807  
## Residuals 391                                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Univariate tests per DV setelah kontrol X3

cat("===== ANOVA Univariat per DV (setelah kontrol absences) =====\n")

## ===== ANOVA Univariat per DV (setelah kontrol absences) =====

summary.aov(mancova_model)

##  Response G1 :
##              Df Sum Sq Mean Sq F value  Pr(>F)  
## sex           1   36.6  36.611  3.3453 0.06816 .
## address       1   22.7  22.723  2.0762 0.15041  
## absences      1    2.2   2.248  0.2054 0.65062  
## Residuals   391 4279.1  10.944                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response G2 :
##              Df Sum Sq Mean Sq F value  Pr(>F)  
## sex           1   46.3  46.265  3.3293 0.06882 .
## address       1   92.3  92.318  6.6434 0.01032 *
## absences      1    2.7   2.674  0.1924 0.66115  
## Residuals   391 5433.4  13.896                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size MANCOVA

# Pillai ringkasan per efek
pil <- summary(mancova_model, test = "Pillai")$stats

tabel_mancova <- data.frame(
  Efek    = rownames(pil)[1:(nrow(pil)-1)],
  Pillai  = round(pil[1:(nrow(pil)-1), "Pillai"],      4),
  F_value = round(pil[1:(nrow(pil)-1), "approx F"],    4),
  p_value = round(pil[1:(nrow(pil)-1), "Pr(>F)"],      4)
)
tabel_mancova$Kesimpulan <- ifelse(
  tabel_mancova$p_value < 0.05,
  "Signifikan ✓",
  "Tidak Signifikan ✗"
)

kable(tabel_mancova,
      caption = "Ringkasan MANCOVA — Pillai's Trace",
      col.names = c("Efek", "Pillai", "F", "p-value", "Kesimpulan")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(5,
    color = ifelse(tabel_mancova$p_value < 0.05, "darkgreen", "red"),
    bold  = TRUE)

Ringkasan MANCOVA — Pillai’s Trace
	Efek	Pillai	F	p-value	Kesimpulan
sex	sex	0.0091	1.7986	0.1669	Tidak Signifikan ✗
address	address	0.0217	4.3341	0.0138	Signifikan ✓
absences	absences	0.0006	0.1075	0.8981	Tidak Signifikan ✗

Adjusted means per DV dari MANCOVA (emmeans)

# Model univariat per DV (kontrol absences)
lm_G1 <- lm(G1 ~ sex + address + absences, data = df)
lm_G2 <- lm(G2 ~ sex + address + absences, data = df)

emm_G1_sex  <- emmeans(lm_G1, ~ sex)
emm_G2_sex  <- emmeans(lm_G2, ~ sex)
emm_G1_addr <- emmeans(lm_G1, ~ address)
emm_G2_addr <- emmeans(lm_G2, ~ address)

cat("=== Adjusted Means G1 per sex ===\n")

## === Adjusted Means G1 per sex ===

print(summary(emm_G1_sex))

##  sex       emmean    SE  df lower.CL upper.CL
##  Perempuan   10.5 0.257 391     9.95     11.0
##  Laki-laki   11.1 0.264 391    10.55     11.6
## 
## Results are averaged over the levels of: address 
## Confidence level used: 0.95

cat("\n=== Adjusted Means G2 per sex ===\n")

## 
## === Adjusted Means G2 per sex ===

print(summary(emm_G2_sex))

##  sex       emmean    SE  df lower.CL upper.CL
##  Perempuan   10.1 0.290 391     9.49     10.6
##  Laki-laki   10.8 0.298 391    10.18     11.3
## 
## Results are averaged over the levels of: address 
## Confidence level used: 0.95

cat("\n=== Adjusted Means G1 per address ===\n")

## 
## === Adjusted Means G1 per address ===

print(summary(emm_G1_addr))

##  address emmean    SE  df lower.CL upper.CL
##  Rural     10.5 0.353 391     9.79     11.2
##  Urban     11.1 0.189 391    10.68     11.4
## 
## Results are averaged over the levels of: sex 
## Confidence level used: 0.95

cat("\n=== Adjusted Means G2 per address ===\n")

## 
## === Adjusted Means G2 per address ===

print(summary(emm_G2_addr))

##  address emmean    SE  df lower.CL upper.CL
##  Rural     9.83 0.398 391     9.05     10.6
##  Urban    10.99 0.213 391    10.57     11.4
## 
## Results are averaged over the levels of: sex 
## Confidence level used: 0.95

Visualisasi adjusted means MANCOVA

make_emm_plot <- function(emm_obj, fill_col, title_str, y_lab) {
  df_plot <- as.data.frame(summary(emm_obj))
  x_var   <- names(df_plot)[1]   # kolom pertama = faktor

  ggplot(df_plot, aes_string(x = x_var, y = "emmean", fill = x_var)) +
    geom_col(alpha = 0.82, width = 0.5, color = "white") +
    geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
                  width = 0.15, linewidth = 0.8, color = "gray30") +
    geom_text(aes(label = round(emmean, 2)),
              vjust = -0.55, size = 4, fontface = "bold") +
    scale_fill_manual(values = fill_col) +
    labs(title = title_str,
         x = x_var, y = y_lab) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "none")
}

p1 <- make_emm_plot(emm_G1_sex,
  c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
  "Adj. Mean G1 per Sex\n(dikontrol absences)", "Adj. Mean G1")

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

p2 <- make_emm_plot(emm_G2_sex,
  c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
  "Adj. Mean G2 per Sex\n(dikontrol absences)", "Adj. Mean G2")

p3 <- make_emm_plot(emm_G1_addr,
  c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
  "Adj. Mean G1 per Address\n(dikontrol absences)", "Adj. Mean G1")

p4 <- make_emm_plot(emm_G2_addr,
  c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
  "Adj. Mean G2 per Address\n(dikontrol absences)", "Adj. Mean G2")

grid.arrange(p1, p2, p3, p4, ncol = 2,
             top = "MANCOVA — Adjusted Means (G1 & G2) setelah Kontrol Absences")

Perbandingan MANOVA vs MANCOVA

wil_manova <- summary(manova_model,  test = "Wilks")$stats
pil_mancova <- summary(mancova_model, test = "Pillai")$stats

efek_names <- c("sex", "address")
rows_manova  <- efek_names[efek_names %in% rownames(wil_manova)]
rows_mancova <- efek_names[efek_names %in% rownames(pil_mancova)]

compare_df <- data.frame(
  Efek          = efek_names,
  MANOVA_p      = round(wil_manova[rows_manova,  "Pr(>F)"], 4),
  MANCOVA_p     = round(pil_mancova[rows_mancova, "Pr(>F)"], 4)
)
compare_df$Perubahan <- ifelse(
  (compare_df$MANOVA_p < 0.05) == (compare_df$MANCOVA_p < 0.05),
  "Konsisten ✓", "Berubah setelah kontrol covariate ⚠"
)

kable(compare_df,
      caption = "Perbandingan Signifikansi MANOVA vs MANCOVA",
      col.names = c("Efek", "p-value MANOVA\n(Wilks)", "p-value MANCOVA\n(Pillai)", "Perubahan")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(4, bold = TRUE)

Perbandingan Signifikansi MANOVA vs MANCOVA
	Efek	p-value MANOVA (Wilks	p-value MANCOVA (Pilla	)\|Perubahan
sex	sex	0.1663	0.1669	Konsisten ✓
address	address	0.0136	0.0138	Konsisten ✓

Tahap 10 — Post-hoc & Interpretasi Hasil

10A — Post-hoc MANOVA: Tukey pairwise per DV

Post-hoc dilakukan pada variabel yang signifikan di MANOVA.
Karena X1 (sex) hanya 2 level, pairwise langsung informatif.

if (!require(emmeans))   install.packages("emmeans")
if (!require(multcomp))  install.packages("multcomp")

## Loading required package: multcomp

## Warning: package 'multcomp' was built under R version 4.5.3

## Loading required package: mvtnorm

## Warning: package 'mvtnorm' was built under R version 4.5.3

## 
## Attaching package: 'mvtnorm'

## The following object is masked from 'package:effectsize':
## 
##     standardize

## Loading required package: survival

## 
## Attaching package: 'survival'

## The following object is masked from 'package:MVN':
## 
##     royston

## Loading required package: TH.data

## Warning: package 'TH.data' was built under R version 4.5.3

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## 
## Attaching package: 'TH.data'

## The following object is masked from 'package:MASS':
## 
##     geyser

library(emmeans)
library(multcomp)

Post-hoc sex × G1

lm_G1_sex <- lm(G1 ~ sex, data = df)
emm_ph_G1_sex <- emmeans(lm_G1_sex, ~ sex)

cat("===== Post-hoc: sex → G1 (Tukey) =====\n")

## ===== Post-hoc: sex → G1 (Tukey) =====

print(pairs(emm_ph_G1_sex, adjust = "tukey"))

##  contrast                estimate    SE  df t.ratio p.value
##  Perempuan - (Laki-laki)    -0.61 0.333 393  -1.828  0.0683

Post-hoc sex × G2

lm_G2_sex <- lm(G2 ~ sex, data = df)
emm_ph_G2_sex <- emmeans(lm_G2_sex, ~ sex)

cat("===== Post-hoc: sex → G2 (Tukey) =====\n")

## ===== Post-hoc: sex → G2 (Tukey) =====

print(pairs(emm_ph_G2_sex, adjust = "tukey"))

##  contrast                estimate    SE  df t.ratio p.value
##  Perempuan - (Laki-laki)   -0.685 0.378 393  -1.814  0.0705

Post-hoc address × G1

lm_G1_addr <- lm(G1 ~ address, data = df)
emm_ph_G1_addr <- emmeans(lm_G1_addr, ~ address)

cat("===== Post-hoc: address → G1 (Tukey) =====\n")

## ===== Post-hoc: address → G1 (Tukey) =====

print(pairs(emm_ph_G1_addr, adjust = "tukey"))

##  contrast      estimate    SE  df t.ratio p.value
##  Rural - Urban   -0.555 0.401 393  -1.385  0.1668

Post-hoc address × G2

lm_G2_addr <- lm(G2 ~ address, data = df)
emm_ph_G2_addr <- emmeans(lm_G2_addr, ~ address)

cat("===== Post-hoc: address → G2 (Tukey) =====\n")

## ===== Post-hoc: address → G2 (Tukey) =====

print(pairs(emm_ph_G2_addr, adjust = "tukey"))

##  contrast      estimate    SE  df t.ratio p.value
##  Rural - Urban    -1.14 0.452 393  -2.519  0.0122

Visualisasi post-hoc: means + CI per grup

plot_posthoc <- function(emm_obj, fill_vals, title_str, y_lbl) {
  df_p <- as.data.frame(summary(emm_obj))
  xv   <- names(df_p)[1]
  ggplot(df_p, aes_string(x = xv, y = "emmean", fill = xv)) +
    geom_col(alpha = 0.8, width = 0.5, color = "white") +
    geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
                  width = 0.15, linewidth = 0.8, color = "gray40") +
    geom_text(aes(label = round(emmean, 2)),
              vjust = -0.5, size = 4.2, fontface = "bold") +
    scale_fill_manual(values = fill_vals) +
    labs(title = title_str, x = xv, y = y_lbl) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "none")
}

p_ph1 <- plot_posthoc(emm_ph_G1_sex,
  c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
  "Post-hoc: sex → G1", "Mean G1")

p_ph2 <- plot_posthoc(emm_ph_G2_sex,
  c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
  "Post-hoc: sex → G2", "Mean G2")

p_ph3 <- plot_posthoc(emm_ph_G1_addr,
  c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
  "Post-hoc: address → G1", "Mean G1")

p_ph4 <- plot_posthoc(emm_ph_G2_addr,
  c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
  "Post-hoc: address → G2", "Mean G2")

grid.arrange(p_ph1, p_ph2, p_ph3, p_ph4, ncol = 2,
             top = "Post-hoc Pairwise Comparisons (Tukey) — MANOVA")

10B — Effect Size (Partial η²) Semua Model

library(effectsize)

# MANOVA — univariate follow-up
aov_manova_G1 <- aov(G1 ~ sex + address, data = df)
aov_manova_G2 <- aov(G2 ~ sex + address, data = df)

eta_G1 <- eta_squared(aov_manova_G1, partial = TRUE)
eta_G2 <- eta_squared(aov_manova_G2, partial = TRUE)

cat("===== Effect Size MANOVA — Y1 (G1) =====\n")

## ===== Effect Size MANOVA — Y1 (G1) =====

print(eta_G1)

## # Effect Size for ANOVA (Type I)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## sex       |       8.48e-03 | [0.00, 1.00]
## address   |       5.28e-03 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

cat("\n===== Effect Size MANOVA — Y2 (G2) =====\n")

## 
## ===== Effect Size MANOVA — Y2 (G2) =====

print(eta_G2)

## # Effect Size for ANOVA (Type I)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## sex       |       8.44e-03 | [0.00, 1.00]
## address   |           0.02 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

# ANCOVA
cat("\n===== Effect Size ANCOVA — G1 ~ sex + absences =====\n")

## 
## ===== Effect Size ANCOVA — G1 ~ sex + absences =====

print(eta_squared(Anova(ancova_model, type = 2), partial = TRUE))

## # Effect Size for ANOVA (Type II)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## sex       |       8.10e-03 | [0.00, 1.00]
## absences  |       6.26e-04 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

# MANCOVA univariate
aov_mc_G1 <- aov(G1 ~ sex + address + absences, data = df)
aov_mc_G2 <- aov(G2 ~ sex + address + absences, data = df)

cat("\n===== Effect Size MANCOVA — Y1 (G1) =====\n")

## 
## ===== Effect Size MANCOVA — Y1 (G1) =====

print(eta_squared(aov_mc_G1, partial = TRUE))

## # Effect Size for ANOVA (Type I)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## sex       |       8.48e-03 | [0.00, 1.00]
## address   |       5.28e-03 | [0.00, 1.00]
## absences  |       5.25e-04 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

cat("\n===== Effect Size MANCOVA — Y2 (G2) =====\n")

## 
## ===== Effect Size MANCOVA — Y2 (G2) =====

print(eta_squared(aov_mc_G2, partial = TRUE))

## # Effect Size for ANOVA (Type I)
## 
## Parameter | Eta2 (partial) |       95% CI
## -----------------------------------------
## sex       |       8.44e-03 | [0.00, 1.00]
## address   |           0.02 | [0.00, 1.00]
## absences  |       4.92e-04 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

Tabel gabungan effect size

# Ambil nilai partial eta squared
get_eta <- function(model, dv_name, model_name) {
  eta_obj <- tryCatch(
    eta_squared(model, partial = TRUE),
    error = function(e) NULL
  )
  if (is.null(eta_obj)) return(NULL)
  df_eta <- as.data.frame(eta_obj)
  df_eta$DV    <- dv_name
  df_eta$Model <- model_name
  df_eta
}

eta_rows <- rbind(
  get_eta(aov_manova_G1, "G1", "MANOVA"),
  get_eta(aov_manova_G2, "G2", "MANOVA"),
  get_eta(aov_mc_G1,     "G1", "MANCOVA"),
  get_eta(aov_mc_G2,     "G2", "MANCOVA")
)

if (!is.null(eta_rows)) {
  eta_rows$Interpretasi <- ifelse(
    eta_rows$Eta2_partial >= 0.14, "Besar (≥.14)",
    ifelse(eta_rows$Eta2_partial >= 0.06, "Sedang (≥.06)",
    ifelse(eta_rows$Eta2_partial >= 0.01, "Kecil (≥.01)", "Trivial (<.01)"))
  )

  kable(eta_rows[, c("Model", "DV", "Parameter", "Eta2_partial", "CI_low", "CI_high", "Interpretasi")],
        caption = "Tabel Effect Size (Partial η²) — MANOVA & MANCOVA",
        digits  = 4,
        row.names = FALSE) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                  full_width = TRUE) %>%
    column_spec(7, bold = TRUE,
      color = ifelse(eta_rows$Eta2_partial >= 0.06, "darkgreen",
              ifelse(eta_rows$Eta2_partial >= 0.01, "orange", "red")))
}

Tabel Effect Size (Partial η²) — MANOVA & MANCOVA
Model	DV	Parameter	Eta2_partial	CI_low	CI_high	Interpretasi
MANOVA	G1	sex	0.0085	0.0000	1	Trivial (<.01)
MANOVA	G1	address	0.0053	0.0000	1	Trivial (<.01)
MANOVA	G2	sex	0.0084	0.0000	1	Trivial (<.01)
MANOVA	G2	address	0.0167	0.0022	1	Kecil (≥.01)
MANCOVA	G1	sex	0.0085	0.0000	1	Trivial (<.01)
MANCOVA	G1	address	0.0053	0.0000	1	Trivial (<.01)
MANCOVA	G1	absences	0.0005	0.0000	1	Trivial (<.01)
MANCOVA	G2	sex	0.0084	0.0000	1	Trivial (<.01)
MANCOVA	G2	address	0.0167	0.0022	1	Kecil (≥.01)
MANCOVA	G2	absences	0.0005	0.0000	1	Trivial (<.01)

MODUL MANOVA

Darista Wardhani/146 Dan Argya Ishmat/042 Dan Yuli Inka V./118

2026-04-19