library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'ggplot2' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ─────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(knitr)
## Warning: package 'knitr' was built under R version 4.5.3
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.3
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.5.3
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(skimr)
## Warning: package 'skimr' was built under R version 4.5.3
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.5.3
if (!require(MVN)) install.packages("MVN")
## Loading required package: MVN
## Warning: package 'MVN' was built under R version 4.5.3
## Registered S3 method overwritten by 'lme4':
## method from
## na.action.merMod car
if (!require(ggpubr)) install.packages("ggpubr")
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.5.3
if (!require(car)) install.packages("car")
## Loading required package: car
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.3
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
if (!require(heplots)) install.packages("heplots")
## Loading required package: heplots
## Warning: package 'heplots' was built under R version 4.5.3
## Loading required package: broom
## Warning: package 'broom' was built under R version 4.5.3
library(car)
library(heplots)
library(MVN)
library(ggpubr)
df_raw <- read.csv("student-mat.csv", sep = ";", stringsAsFactors = FALSE)
cat("Jumlah baris :", nrow(df_raw), "\n")
## Jumlah baris : 395
cat("Jumlah kolom :", ncol(df_raw), "\n")
## Jumlah kolom : 33
cat("Nama kolom :\n")
## Nama kolom :
print(names(df_raw))
## [1] "school" "sex" "age" "address" "famsize"
## [6] "Pstatus" "Medu" "Fedu" "Mjob" "Fjob"
## [11] "reason" "guardian" "traveltime" "studytime" "failures"
## [16] "schoolsup" "famsup" "paid" "activities" "nursery"
## [21] "higher" "internet" "romantic" "famrel" "freetime"
## [26] "goout" "Dalc" "Walc" "health" "absences"
## [31] "G1" "G2" "G3"
str(df_raw)
## 'data.frame': 395 obs. of 33 variables:
## $ school : chr "GP" "GP" "GP" "GP" ...
## $ sex : chr "F" "F" "F" "F" ...
## $ age : int 18 17 15 15 16 16 16 17 15 15 ...
## $ address : chr "U" "U" "U" "U" ...
## $ famsize : chr "GT3" "GT3" "LE3" "GT3" ...
## $ Pstatus : chr "A" "T" "T" "T" ...
## $ Medu : int 4 1 1 4 3 4 2 4 3 3 ...
## $ Fedu : int 4 1 1 2 3 3 2 4 2 4 ...
## $ Mjob : chr "at_home" "at_home" "at_home" "health" ...
## $ Fjob : chr "teacher" "other" "other" "services" ...
## $ reason : chr "course" "course" "other" "home" ...
## $ guardian : chr "mother" "father" "mother" "mother" ...
## $ traveltime: int 2 1 1 1 1 1 1 2 1 1 ...
## $ studytime : int 2 2 2 3 2 2 2 2 2 2 ...
## $ failures : int 0 0 3 0 0 0 0 0 0 0 ...
## $ schoolsup : chr "yes" "no" "yes" "no" ...
## $ famsup : chr "no" "yes" "no" "yes" ...
## $ paid : chr "no" "no" "yes" "yes" ...
## $ activities: chr "no" "no" "no" "yes" ...
## $ nursery : chr "yes" "no" "yes" "yes" ...
## $ higher : chr "yes" "yes" "yes" "yes" ...
## $ internet : chr "no" "yes" "yes" "yes" ...
## $ romantic : chr "no" "no" "no" "yes" ...
## $ famrel : int 4 5 4 3 4 5 4 4 4 5 ...
## $ freetime : int 3 3 3 2 3 4 4 1 2 5 ...
## $ goout : int 4 3 2 2 2 2 4 4 2 1 ...
## $ Dalc : int 1 1 2 1 1 1 1 1 1 1 ...
## $ Walc : int 1 1 3 1 2 2 1 1 1 1 ...
## $ health : int 3 3 3 5 5 5 3 1 1 5 ...
## $ absences : int 6 4 10 2 4 10 0 6 0 0 ...
## $ G1 : int 5 5 7 15 6 15 12 6 16 14 ...
## $ G2 : int 6 5 8 14 10 15 12 5 18 15 ...
## $ G3 : int 6 6 10 15 10 15 11 6 19 15 ...
kable(head(df_raw, 10),
caption = "Preview 10 Baris Pertama Dataset") %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
font_size = 12
) %>%
scroll_box(width = "100%", height = "320px")
| school | sex | age | address | famsize | Pstatus | Medu | Fedu | Mjob | Fjob | reason | guardian | traveltime | studytime | failures | schoolsup | famsup | paid | activities | nursery | higher | internet | romantic | famrel | freetime | goout | Dalc | Walc | health | absences | G1 | G2 | G3 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GP | F | 18 | U | GT3 | A | 4 | 4 | at_home | teacher | course | mother | 2 | 2 | 0 | yes | no | no | no | yes | yes | no | no | 4 | 3 | 4 | 1 | 1 | 3 | 6 | 5 | 6 | 6 |
| GP | F | 17 | U | GT3 | T | 1 | 1 | at_home | other | course | father | 1 | 2 | 0 | no | yes | no | no | no | yes | yes | no | 5 | 3 | 3 | 1 | 1 | 3 | 4 | 5 | 5 | 6 |
| GP | F | 15 | U | LE3 | T | 1 | 1 | at_home | other | other | mother | 1 | 2 | 3 | yes | no | yes | no | yes | yes | yes | no | 4 | 3 | 2 | 2 | 3 | 3 | 10 | 7 | 8 | 10 |
| GP | F | 15 | U | GT3 | T | 4 | 2 | health | services | home | mother | 1 | 3 | 0 | no | yes | yes | yes | yes | yes | yes | yes | 3 | 2 | 2 | 1 | 1 | 5 | 2 | 15 | 14 | 15 |
| GP | F | 16 | U | GT3 | T | 3 | 3 | other | other | home | father | 1 | 2 | 0 | no | yes | yes | no | yes | yes | no | no | 4 | 3 | 2 | 1 | 2 | 5 | 4 | 6 | 10 | 10 |
| GP | M | 16 | U | LE3 | T | 4 | 3 | services | other | reputation | mother | 1 | 2 | 0 | no | yes | yes | yes | yes | yes | yes | no | 5 | 4 | 2 | 1 | 2 | 5 | 10 | 15 | 15 | 15 |
| GP | M | 16 | U | LE3 | T | 2 | 2 | other | other | home | mother | 1 | 2 | 0 | no | no | no | no | yes | yes | yes | no | 4 | 4 | 4 | 1 | 1 | 3 | 0 | 12 | 12 | 11 |
| GP | F | 17 | U | GT3 | A | 4 | 4 | other | teacher | home | mother | 2 | 2 | 0 | yes | yes | no | no | yes | yes | no | no | 4 | 1 | 4 | 1 | 1 | 1 | 6 | 6 | 5 | 6 |
| GP | M | 15 | U | LE3 | A | 3 | 2 | services | other | home | mother | 1 | 2 | 0 | no | yes | yes | no | yes | yes | yes | no | 4 | 2 | 2 | 1 | 1 | 1 | 0 | 16 | 18 | 19 |
| GP | M | 15 | U | GT3 | T | 3 | 4 | other | other | home | mother | 1 | 2 | 0 | no | yes | yes | yes | yes | yes | yes | no | 5 | 5 | 1 | 1 | 1 | 5 | 0 | 14 | 15 | 15 |
tipe_df <- data.frame(
Kolom = names(df_raw),
Tipe = sapply(df_raw, class),
Contoh_Isi = sapply(df_raw, function(x) paste(head(unique(x), 3), collapse = " | ")),
row.names = NULL
)
kable(tipe_df, caption = "Tipe Data Setiap Kolom") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE, font_size = 12) %>%
scroll_box(width = "100%", height = "400px")
| Kolom | Tipe | Contoh_Isi |
|---|---|---|
| school | character | GP | MS |
| sex | character | F | M |
| age | integer | 18 | 17 | 15 |
| address | character | U | R |
| famsize | character | GT3 | LE3 |
| Pstatus | character | A | T |
| Medu | integer | 4 | 1 | 3 |
| Fedu | integer | 4 | 1 | 2 |
| Mjob | character | at_home | health | other |
| Fjob | character | teacher | other | services |
| reason | character | course | other | home |
| guardian | character | mother | father | other |
| traveltime | integer | 2 | 1 | 3 |
| studytime | integer | 2 | 3 | 1 |
| failures | integer | 0 | 3 | 2 |
| schoolsup | character | yes | no |
| famsup | character | no | yes |
| paid | character | no | yes |
| activities | character | no | yes |
| nursery | character | yes | no |
| higher | character | yes | no |
| internet | character | no | yes |
| romantic | character | no | yes |
| famrel | integer | 4 | 5 | 3 |
| freetime | integer | 3 | 2 | 4 |
| goout | integer | 4 | 3 | 2 |
| Dalc | integer | 1 | 2 | 5 |
| Walc | integer | 1 | 3 | 2 |
| health | integer | 3 | 5 | 1 |
| absences | integer | 6 | 4 | 10 |
| G1 | integer | 5 | 7 | 15 |
| G2 | integer | 6 | 5 | 8 |
| G3 | integer | 6 | 10 | 15 |
# Kolom yang bertipe karakter (kandidat kategorikal)
kolom_char <- names(df_raw)[sapply(df_raw, is.character)]
cat("Kolom kategorikal ditemukan:", paste(kolom_char, collapse = ", "), "\n\n")
## Kolom kategorikal ditemukan: school, sex, address, famsize, Pstatus, Mjob, Fjob, reason, guardian, schoolsup, famsup, paid, activities, nursery, higher, internet, romantic
for (kol in kolom_char) {
cat(">> Kolom:", kol, "\n")
print(table(df_raw[[kol]]))
cat("\n")
}
## >> Kolom: school
##
## GP MS
## 349 46
##
## >> Kolom: sex
##
## F M
## 208 187
##
## >> Kolom: address
##
## R U
## 88 307
##
## >> Kolom: famsize
##
## GT3 LE3
## 281 114
##
## >> Kolom: Pstatus
##
## A T
## 41 354
##
## >> Kolom: Mjob
##
## at_home health other services teacher
## 59 34 141 103 58
##
## >> Kolom: Fjob
##
## at_home health other services teacher
## 20 18 217 111 29
##
## >> Kolom: reason
##
## course home other reputation
## 145 109 36 105
##
## >> Kolom: guardian
##
## father mother other
## 90 273 32
##
## >> Kolom: schoolsup
##
## no yes
## 344 51
##
## >> Kolom: famsup
##
## no yes
## 153 242
##
## >> Kolom: paid
##
## no yes
## 214 181
##
## >> Kolom: activities
##
## no yes
## 194 201
##
## >> Kolom: nursery
##
## no yes
## 81 314
##
## >> Kolom: higher
##
## no yes
## 20 375
##
## >> Kolom: internet
##
## no yes
## 66 329
##
## >> Kolom: romantic
##
## no yes
## 263 132
# Ringkasan lengkap: mean, sd, min, max, histogram mini, % missing
skim(df_raw)
| Name | df_raw |
| Number of rows | 395 |
| Number of columns | 33 |
| _______________________ | |
| Column type frequency: | |
| character | 17 |
| numeric | 16 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| school | 0 | 1 | 2 | 2 | 0 | 2 | 0 |
| sex | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
| address | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
| famsize | 0 | 1 | 3 | 3 | 0 | 2 | 0 |
| Pstatus | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
| Mjob | 0 | 1 | 5 | 8 | 0 | 5 | 0 |
| Fjob | 0 | 1 | 5 | 8 | 0 | 5 | 0 |
| reason | 0 | 1 | 4 | 10 | 0 | 4 | 0 |
| guardian | 0 | 1 | 5 | 6 | 0 | 3 | 0 |
| schoolsup | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| famsup | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| paid | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| activities | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| nursery | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| higher | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| internet | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| romantic | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 16.70 | 1.28 | 15 | 16 | 17 | 18 | 22 | ▇▅▅▁▁ |
| Medu | 0 | 1 | 2.75 | 1.09 | 0 | 2 | 3 | 4 | 4 | ▁▃▆▆▇ |
| Fedu | 0 | 1 | 2.52 | 1.09 | 0 | 2 | 2 | 3 | 4 | ▁▆▇▇▇ |
| traveltime | 0 | 1 | 1.45 | 0.70 | 1 | 1 | 1 | 2 | 4 | ▇▃▁▁▁ |
| studytime | 0 | 1 | 2.04 | 0.84 | 1 | 1 | 2 | 2 | 4 | ▅▇▁▂▁ |
| failures | 0 | 1 | 0.33 | 0.74 | 0 | 0 | 0 | 0 | 3 | ▇▁▁▁▁ |
| famrel | 0 | 1 | 3.94 | 0.90 | 1 | 4 | 4 | 5 | 5 | ▁▁▃▇▅ |
| freetime | 0 | 1 | 3.24 | 1.00 | 1 | 3 | 3 | 4 | 5 | ▁▃▇▆▂ |
| goout | 0 | 1 | 3.11 | 1.11 | 1 | 2 | 3 | 4 | 5 | ▂▆▇▅▃ |
| Dalc | 0 | 1 | 1.48 | 0.89 | 1 | 1 | 1 | 2 | 5 | ▇▂▁▁▁ |
| Walc | 0 | 1 | 2.29 | 1.29 | 1 | 1 | 2 | 3 | 5 | ▇▅▅▃▂ |
| health | 0 | 1 | 3.55 | 1.39 | 1 | 3 | 4 | 5 | 5 | ▂▂▅▃▇ |
| absences | 0 | 1 | 5.71 | 8.00 | 0 | 0 | 4 | 8 | 75 | ▇▁▁▁▁ |
| G1 | 0 | 1 | 10.91 | 3.32 | 3 | 8 | 11 | 13 | 19 | ▂▇▇▆▂ |
| G2 | 0 | 1 | 10.71 | 3.76 | 0 | 9 | 11 | 13 | 19 | ▁▂▇▆▂ |
| G3 | 0 | 1 | 10.42 | 4.58 | 0 | 8 | 11 | 14 | 20 | ▂▃▇▅▁ |
| Kode | Kolom | Tipe | Deskripsi | Dipakai_di |
|---|---|---|---|---|
| Y1 | G1 | Kontinu | Nilai periode 1 (0–20) | MANOVA, MANCOVA |
| Y2 | G2 | Kontinu | Nilai periode 2 (0–20) | MANOVA, MANCOVA |
| X1 | sex | Kategorikal | Jenis kelamin: F = Perempuan, M = Laki-laki | MANOVA, ANCOVA, MANCOVA |
| X2 | address | Kategorikal | Alamat rumah: U = Urban, R = Rural | MANOVA, MANCOVA |
| X3 | absences | Numerik (covariate) | Jumlah hari tidak masuk sekolah | ANCOVA, MANCOVA |
df <- df_raw %>%
select(
sex, # X1 — faktor kategorikal
address, # X2 — faktor kategorikal
absences, # X3 — covariate numerik
G1, # Y1 — dependent variable 1
G2 # Y2 — dependent variable 2
) %>%
mutate(
sex = factor(sex, levels = c("F", "M"),
labels = c("Perempuan", "Laki-laki")),
address = factor(address, levels = c("R", "U"),
labels = c("Rural", "Urban")),
G1 = as.numeric(G1),
G2 = as.numeric(G2)
)
cat("===== DATASET SETELAH SELEKSI =====\n")
## ===== DATASET SETELAH SELEKSI =====
cat("Dimensi:", nrow(df), "baris x", ncol(df), "kolom\n\n")
## Dimensi: 395 baris x 5 kolom
str(df)
## 'data.frame': 395 obs. of 5 variables:
## $ sex : Factor w/ 2 levels "Perempuan","Laki-laki": 1 1 1 1 1 2 2 1 2 2 ...
## $ address : Factor w/ 2 levels "Rural","Urban": 2 2 2 2 2 2 2 2 2 2 ...
## $ absences: int 6 4 10 2 4 10 0 6 0 0 ...
## $ G1 : num 5 5 7 15 6 15 12 6 16 14 ...
## $ G2 : num 6 5 8 14 10 15 12 5 18 15 ...
cat("Level X1 (sex) :", levels(df$sex), "\n")
## Level X1 (sex) : Perempuan Laki-laki
cat("Level X2 (address):", levels(df$address), "\n")
## Level X2 (address): Rural Urban
cat("\nJumlah per grup sex:\n")
##
## Jumlah per grup sex:
print(table(df$sex))
##
## Perempuan Laki-laki
## 208 187
cat("\nJumlah per grup address:\n")
##
## Jumlah per grup address:
print(table(df$address))
##
## Rural Urban
## 88 307
cat("\nKombinasi X1 x X2:\n")
##
## Kombinasi X1 x X2:
print(table(df$sex, df$address))
##
## Rural Urban
## Perempuan 44 164
## Laki-laki 44 143
kable(head(df, 10),
caption = "Preview Dataset Setelah Seleksi Variabel") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| sex | address | absences | G1 | G2 |
|---|---|---|---|---|
| Perempuan | Urban | 6 | 5 | 6 |
| Perempuan | Urban | 4 | 5 | 5 |
| Perempuan | Urban | 10 | 7 | 8 |
| Perempuan | Urban | 2 | 15 | 14 |
| Perempuan | Urban | 4 | 6 | 10 |
| Laki-laki | Urban | 10 | 15 | 15 |
| Laki-laki | Urban | 0 | 12 | 12 |
| Perempuan | Urban | 6 | 6 | 5 |
| Laki-laki | Urban | 0 | 16 | 18 |
| Laki-laki | Urban | 0 | 14 | 15 |
mv_count <- colSums(is.na(df))
mv_pct <- round(colMeans(is.na(df)) * 100, 2)
mv_tabel <- data.frame(
Variabel = names(mv_count),
Jumlah_NA = as.integer(mv_count),
Persen_NA = paste0(mv_pct, "%"),
Status = ifelse(mv_count == 0, "Lengkap ✓", "Ada missing ✗")
)
kable(mv_tabel, caption = "Ringkasan Missing Values per Variabel") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE) %>%
column_spec(4,
color = ifelse(mv_tabel$Jumlah_NA == 0, "darkgreen", "red"),
bold = TRUE
)
| Variabel | Jumlah_NA | Persen_NA | Status | |
|---|---|---|---|---|
| sex | sex | 0 | 0% | Lengkap ✓ |
| address | address | 0 | 0% | Lengkap ✓ |
| absences | absences | 0 | 0% | Lengkap ✓ |
| G1 | G1 | 0 | 0% | Lengkap ✓ |
| G2 | G2 | 0 | 0% | Lengkap ✓ |
total_mv <- sum(is.na(df))
cat("Total keseluruhan missing values:", total_mv, "\n")
## Total keseluruhan missing values: 0
if (total_mv == 0) {
cat(">> Dataset BERSIH — tidak ada missing values.\n")
cat(">> Analisis dapat langsung dilanjutkan ke tahap EDA.\n")
} else {
cat(">> Ditemukan missing values! Lakukan penanganan di bawah.\n")
}
## >> Dataset BERSIH — tidak ada missing values.
## >> Analisis dapat langsung dilanjutkan ke tahap EDA.
plot_missing(df,
title = "Persentase Missing Values per Variabel",
ggtheme = theme_minimal(base_size = 13),
theme_config = list(legend.position = "bottom"))
# --- Opsi A: Hapus baris yang mengandung NA (listwise deletion) ---
# Aman jika jumlah NA < 5% dari total data
df_clean <- df %>% drop_na()
cat("Baris setelah drop_na:", nrow(df_clean), "\n")
# --- Opsi B: Imputasi median untuk variabel NUMERIK ---
# Digunakan untuk: absences, G1, G2
df_imputasi <- df %>%
mutate(
absences = ifelse(is.na(absences), median(absences, na.rm = TRUE), absences),
G1 = ifelse(is.na(G1), median(G1, na.rm = TRUE), G1),
G2 = ifelse(is.na(G2), median(G2, na.rm = TRUE), G2)
)
# --- Opsi C: Imputasi modus untuk variabel KATEGORIKAL ---
# Digunakan untuk: sex, address
modus <- function(x) {
ux <- unique(x[!is.na(x)])
ux[which.max(tabulate(match(x, ux)))]
}
df_imputasi <- df_imputasi %>%
mutate(
sex = ifelse(is.na(sex), modus(sex), sex),
address = ifelse(is.na(address), modus(address), address)
)
# --- Verifikasi setelah imputasi ---
cat("Missing values setelah imputasi:", sum(is.na(df_imputasi)), "\n")
cat("===== KONFIRMASI DATASET FINAL =====\n")
## ===== KONFIRMASI DATASET FINAL =====
cat("Dimensi akhir :", nrow(df), "baris x", ncol(df), "kolom\n")
## Dimensi akhir : 395 baris x 5 kolom
cat("Missing values :", sum(is.na(df)), "\n")
## Missing values : 0
cat("Variabel :", paste(names(df), collapse = ", "), "\n")
## Variabel : sex, address, absences, G1, G2
cat("\nDataset siap untuk EDA dan uji asumsi.\n")
##
## Dataset siap untuk EDA dan uji asumsi.
deskriptif <- df %>%
summarise(
across(
c(G1, G2, absences),
list(
N = ~n(),
Mean = ~round(mean(.), 2),
SD = ~round(sd(.), 2),
Median = ~round(median(.), 2),
Min = ~min(.),
Max = ~max(.)
),
.names = "{.col}__{.fn}"
)
) %>%
pivot_longer(everything(),
names_to = c("Variabel", "Statistik"),
names_sep = "__") %>%
pivot_wider(names_from = Statistik, values_from = value)
kable(deskriptif,
caption = "Statistik Deskriptif Variabel Numerik (Seluruh Data)") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Variabel | N | Mean | SD | Median | Min | Max |
|---|---|---|---|---|---|---|
| G1 | 395 | 10.91 | 3.32 | 11 | 3 | 19 |
| G2 | 395 | 10.71 | 3.76 | 11 | 0 | 19 |
| absences | 395 | 5.71 | 8.00 | 4 | 0 | 75 |
df %>%
group_by(sex) %>%
summarise(
N = n(),
Mean_G1 = round(mean(G1), 2),
SD_G1 = round(sd(G1), 2),
Mean_G2 = round(mean(G2), 2),
SD_G2 = round(sd(G2), 2),
Mean_Abs = round(mean(absences), 2),
SD_Abs = round(sd(absences), 2)
) %>%
kable(caption = "Statistik Deskriptif per Kelompok Jenis Kelamin (X1)") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| sex | N | Mean_G1 | SD_G1 | Mean_G2 | SD_G2 | Mean_Abs | SD_Abs |
|---|---|---|---|---|---|---|---|
| Perempuan | 208 | 10.62 | 3.23 | 10.39 | 3.64 | 6.22 | 9.45 |
| Laki-laki | 187 | 11.23 | 3.39 | 11.07 | 3.87 | 5.14 | 5.98 |
df %>%
group_by(address) %>%
summarise(
N = n(),
Mean_G1 = round(mean(G1), 2),
SD_G1 = round(sd(G1), 2),
Mean_G2 = round(mean(G2), 2),
SD_G2 = round(sd(G2), 2),
Mean_Abs = round(mean(absences), 2),
SD_Abs = round(sd(absences), 2)
) %>%
kable(caption = "Statistik Deskriptif per Kelompok Alamat (X2)") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| address | N | Mean_G1 | SD_G1 | Mean_G2 | SD_G2 | Mean_Abs | SD_Abs |
|---|---|---|---|---|---|---|---|
| Rural | 88 | 10.48 | 3.42 | 9.83 | 3.89 | 6.12 | 9.57 |
| Urban | 307 | 11.03 | 3.29 | 10.97 | 3.69 | 5.59 | 7.51 |
df %>%
group_by(sex, address) %>%
summarise(
N = n(),
Mean_G1 = round(mean(G1), 2),
SD_G1 = round(sd(G1), 2),
Mean_G2 = round(mean(G2), 2),
SD_G2 = round(sd(G2), 2),
.groups = "drop"
) %>%
kable(caption = "Statistik Deskriptif Kombinasi X1 × X2") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| sex | address | N | Mean_G1 | SD_G1 | Mean_G2 | SD_G2 |
|---|---|---|---|---|---|---|
| Perempuan | Rural | 44 | 10.30 | 3.14 | 9.64 | 3.72 |
| Perempuan | Urban | 164 | 10.71 | 3.26 | 10.59 | 3.61 |
| Laki-laki | Rural | 44 | 10.66 | 3.70 | 10.02 | 4.09 |
| Laki-laki | Urban | 143 | 11.41 | 3.29 | 11.40 | 3.75 |
df_long_hist <- df %>%
pivot_longer(cols = c(G1, G2),
names_to = "Periode",
values_to = "Nilai")
mean_lines <- df_long_hist %>%
group_by(Periode) %>%
summarise(mean_val = mean(Nilai, na.rm = TRUE))
df_long_hist %>%
ggplot(aes(x = Nilai, fill = Periode)) +
geom_histogram(binwidth = 1, color = "white", alpha = 0.85) +
geom_vline(data = mean_lines,
aes(xintercept = mean_val),
linetype = "dashed", color = "gray40", linewidth = 0.5) +
facet_wrap(~Periode,
labeller = labeller(Periode = c(G1 = "Y1: G1 — Nilai Periode 1",
G2 = "Y2: G2 — Nilai Periode 2"))) +
scale_fill_manual(values = c("G1" = "#7B1FA2", "G2" = "#0288D1")) +
labs(title = "Distribusi Nilai G1 (Y1) dan G2 (Y2)",
x = "Nilai (0–20)", y = "Frekuensi", fill = "DV") +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
df %>%
pivot_longer(cols = c(G1, G2),
names_to = "Periode",
values_to = "Nilai") %>%
ggplot(aes(x = sex, y = Nilai, fill = sex)) +
geom_boxplot(alpha = 0.75, outlier.shape = 21,
outlier.fill = "white", outlier.color = "gray50") +
geom_jitter(width = 0.15, alpha = 0.2, size = 1.2, color = "gray40") +
stat_summary(fun = mean, geom = "point",
shape = 18, size = 3, color = "black") +
facet_wrap(~Periode,
labeller = labeller(Periode = c(G1 = "Y1: G1 — Periode 1",
G2 = "Y2: G2 — Periode 2"))) +
scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
labs(title = "Distribusi Nilai G1 & G2 berdasarkan Jenis Kelamin (X1)",
subtitle = "Berlian hitam = rata-rata kelompok",
x = "Jenis Kelamin", y = "Nilai", fill = "Sex") +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
df %>%
pivot_longer(cols = c(G1, G2),
names_to = "Periode",
values_to = "Nilai") %>%
ggplot(aes(x = address, y = Nilai, fill = address)) +
geom_boxplot(alpha = 0.75, outlier.shape = 21,
outlier.fill = "white", outlier.color = "gray50") +
geom_jitter(width = 0.15, alpha = 0.2, size = 1.2, color = "gray40") +
stat_summary(fun = mean, geom = "point",
shape = 18, size = 3, color = "black") +
facet_wrap(~Periode,
labeller = labeller(Periode = c(G1 = "Y1: G1 — Periode 1",
G2 = "Y2: G2 — Periode 2"))) +
scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
labs(title = "Distribusi Nilai G1 & G2 berdasarkan Alamat (X2)",
subtitle = "Berlian hitam = rata-rata kelompok",
x = "Alamat", y = "Nilai", fill = "Address") +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
df %>%
pivot_longer(cols = c(G1, G2),
names_to = "Periode",
values_to = "Nilai") %>%
ggplot(aes(x = sex, y = Nilai, fill = address)) +
geom_boxplot(alpha = 0.75, position = position_dodge(width = 0.8)) +
stat_summary(fun = mean, geom = "point",
shape = 18, size = 2.5, color = "black",
position = position_dodge(width = 0.8)) +
facet_wrap(~Periode,
labeller = labeller(Periode = c(G1 = "Y1: G1", G2 = "Y2: G2"))) +
scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
labs(title = "Nilai G1 & G2 berdasarkan Kombinasi Sex × Address",
subtitle = "Melihat pola interaksi X1 × X2",
x = "Jenis Kelamin", y = "Nilai", fill = "Address") +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
p_abs_G1 <- ggplot(df, aes(x = absences, y = G1, color = sex, shape = address)) +
geom_point(alpha = 0.55, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 0.9, aes(group = sex)) +
scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
labs(title = "X3 (Absences) vs Y1 (G1)",
x = "Jumlah Absen", y = "Nilai G1",
color = "Sex", shape = "Address") +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
p_abs_G2 <- ggplot(df, aes(x = absences, y = G2, color = sex, shape = address)) +
geom_point(alpha = 0.55, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 0.9, aes(group = sex)) +
scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
labs(title = "X3 (Absences) vs Y2 (G2)",
x = "Jumlah Absen", y = "Nilai G2",
color = "Sex", shape = "Address") +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
grid.arrange(p_abs_G1, p_abs_G2, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?
df %>%
pivot_longer(cols = c(sex, address),
names_to = "Faktor",
values_to = "Grup") %>%
ggplot(aes(x = Grup, y = absences, fill = Grup)) +
geom_boxplot(alpha = 0.7, outlier.shape = 21,
outlier.fill = "white") +
geom_jitter(width = 0.15, alpha = 0.2, size = 1, color = "gray40") +
facet_wrap(~Faktor, scales = "free_x",
labeller = labeller(Faktor = c(sex = "X1: Sex",
address = "X2: Address"))) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Distribusi Covariate X3 (Absences) per Grup",
x = "Grup", y = "Jumlah Absen") +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
# Matriks korelasi antara Y1, Y2, dan X3
mat_cor <- cor(df[, c("G1", "G2", "absences")], method = "pearson")
kable(round(mat_cor, 4),
caption = "Matriks Korelasi Pearson — Y1, Y2, X3") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| G1 | G2 | absences | |
|---|---|---|---|
| G1 | 1.0000 | 0.8521 | -0.0310 |
| G2 | 0.8521 | 1.0000 | -0.0318 |
| absences | -0.0310 | -0.0318 | 1.0000 |
# Visualisasi korelasi dengan ggplot
cor_data <- as.data.frame(mat_cor) %>%
rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "r")
ggplot(cor_data, aes(x = Var1, y = Var2, fill = r)) +
geom_tile(color = "white", linewidth = 0.8) +
geom_text(aes(label = round(r, 3)), size = 4.5, color = "white", fontface = "bold") +
scale_fill_gradient2(low = "#1976D2", mid = "white", high = "#E91E8C",
midpoint = 0, limits = c(-1, 1)) +
labs(title = "Heatmap Korelasi Pearson",
subtitle = "Y1 (G1), Y2 (G2), X3 (absences)",
x = NULL, y = NULL, fill = "r") +
coord_fixed() +
theme_minimal(base_size = 13) +
theme(axis.text = element_text(size = 12, face = "bold"))
r_G1_G2 <- round(mat_cor["G1", "G2"], 3)
r_G1_abs <- round(mat_cor["G1", "absences"], 3)
r_G2_abs <- round(mat_cor["G2", "absences"], 3)
cat("===== INTERPRETASI KORELASI =====\n\n")
## ===== INTERPRETASI KORELASI =====
cat("r (G1, G2) =", r_G1_G2,
"—", ifelse(abs(r_G1_G2) < 0.90,
"OK: tidak ada multikolinearitas ekstrem ✓",
"PERINGATAN: korelasi terlalu tinggi ✗"), "\n")
## r (G1, G2) = 0.852 — OK: tidak ada multikolinearitas ekstrem ✓
cat("r (G1, absences) =", r_G1_abs, "— hubungan covariate vs Y1\n")
## r (G1, absences) = -0.031 — hubungan covariate vs Y1
cat("r (G2, absences) =", r_G2_abs, "— hubungan covariate vs Y2\n")
## r (G2, absences) = -0.032 — hubungan covariate vs Y2
cat("\n>> Syarat MANOVA/MANCOVA: r antar DV < 0.90\n")
##
## >> Syarat MANOVA/MANCOVA: r antar DV < 0.90
cat(">> Hasil:", ifelse(abs(r_G1_G2) < 0.90,
"Asumsi terpenuhi — lanjut ke uji asumsi berikutnya.",
"Pertimbangkan ulang pilihan variabel DV."), "\n")
## >> Hasil: Asumsi terpenuhi — lanjut ke uji asumsi berikutnya.
# Menghasilkan laporan ringkas: distribusi, missing, korelasi
introduce(df) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("Info") %>%
rename(Nilai = V1) %>%
kable(caption = "Profil Dataset (DataExplorer)") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Info | Nilai |
|---|---|
| rows | 395 |
| columns | 5 |
| discrete_columns | 2 |
| continuous_columns | 3 |
| all_missing_columns | 0 |
| total_missing_values | 0 |
| complete_rows | 395 |
| total_observations | 1975 |
| memory_usage | 14040 |
## UJI NORMALITAS (Univariat & Multivariat)
## Variabel DV : Y1 = G1, Y2 = G2
## Faktor : X1 = sex, X2 = address
## Syarat lulus: p > 0.05 → data berdistribusi normal
sw_G1 <- shapiro.test(df$G1)
sw_G2 <- shapiro.test(df$G2)
tabel_sw_global <- data.frame(
Variabel = c("Y1 (G1)", "Y2 (G2)"),
Statistik = c(round(sw_G1$statistic, 4), round(sw_G2$statistic, 4)),
p_value = c(round(sw_G1$p.value, 4), round(sw_G2$p.value, 4)),
Kesimpulan = c(
ifelse(sw_G1$p.value > 0.05, "Normal (p > 0.05)", "Tidak Normal (p ≤ 0.05)"),
ifelse(sw_G2$p.value > 0.05, "Normal (p > 0.05)", "Tidak Normal (p ≤ 0.05)")
)
)
kable(tabel_sw_global,
caption = "Shapiro-Wilk — Keseluruhan Data (Y1 & Y2)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) %>%
column_spec(4,
color = ifelse(tabel_sw_global$p_value > 0.05, "darkgreen", "red"),
bold = TRUE)
| Variabel | Statistik | p_value | Kesimpulan |
|---|---|---|---|
| Y1 (G1) | 0.9749 | 0 | Tidak Normal (p ≤ 0.05) |
| Y2 (G2) | 0.9691 | 0 | Tidak Normal (p ≤ 0.05) |
hasil_sw_sex <- df %>%
group_by(sex) %>%
summarise(
W_G1 = round(shapiro.test(G1)$statistic, 4),
p_G1 = round(shapiro.test(G1)$p.value, 4),
W_G2 = round(shapiro.test(G2)$statistic, 4),
p_G2 = round(shapiro.test(G2)$p.value, 4),
.groups = "drop"
) %>%
mutate(
Ket_G1 = ifelse(p_G1 > 0.05, "Normal", "Tidak Normal"),
Ket_G2 = ifelse(p_G2 > 0.05, "Normal", "Tidak Normal")
)
kable(hasil_sw_sex,
caption = "Shapiro-Wilk per Grup X1 (sex) — Y1 (G1) & Y2 (G2)",
col.names = c("Grup (sex)", "W (G1)", "p (G1)", "W (G2)", "p (G2)",
"Ket. G1", "Ket. G2")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) %>%
column_spec(6, color = ifelse(hasil_sw_sex$p_G1 > 0.05, "darkgreen", "red"), bold = TRUE) %>%
column_spec(7, color = ifelse(hasil_sw_sex$p_G2 > 0.05, "darkgreen", "red"), bold = TRUE)
| Grup (sex) | W (G1) | p (G1) | W (G2) | p (G2) | Ket. G1 | Ket. G2 |
|---|---|---|---|---|---|---|
| Perempuan | 0.9655 | 0.0001 | 0.9665 | 1e-04 | Tidak Normal | Tidak Normal |
| Laki-laki | 0.9814 | 0.0136 | 0.9680 | 3e-04 | Tidak Normal | Tidak Normal |
hasil_sw_address <- df %>%
group_by(address) %>%
summarise(
W_G1 = round(shapiro.test(G1)$statistic, 4),
p_G1 = round(shapiro.test(G1)$p.value, 4),
W_G2 = round(shapiro.test(G2)$statistic, 4),
p_G2 = round(shapiro.test(G2)$p.value, 4),
.groups = "drop"
) %>%
mutate(
Ket_G1 = ifelse(p_G1 > 0.05, "Normal", "Tidak Normal"),
Ket_G2 = ifelse(p_G2 > 0.05, "Normal", "Tidak Normal")
)
kable(hasil_sw_address,
caption = "Shapiro-Wilk per Grup X2 (address) — Y1 (G1) & Y2 (G2)",
col.names = c("Grup (address)", "W (G1)", "p (G1)", "W (G2)", "p (G2)",
"Ket. G1", "Ket. G2")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) %>%
column_spec(6, color = ifelse(hasil_sw_address$p_G1 > 0.05, "darkgreen", "red"), bold = TRUE) %>%
column_spec(7, color = ifelse(hasil_sw_address$p_G2 > 0.05, "darkgreen", "red"), bold = TRUE)
| Grup (address) | W (G1) | p (G1) | W (G2) | p (G2) | Ket. G1 | Ket. G2 |
|---|---|---|---|---|---|---|
| Rural | 0.9687 | 0.0316 | 0.9698 | 0.0375 | Tidak Normal | Tidak Normal |
| Urban | 0.9740 | 0.0000 | 0.9668 | 0.0000 | Tidak Normal | Tidak Normal |
p_qq_G1_sex <- ggqqplot(df, x = "G1", color = "sex",
palette = c("#E91E8C", "#1976D2"),
title = "QQ-Plot Y1 (G1) per Sex") +
theme_minimal(base_size = 11)
p_qq_G2_sex <- ggqqplot(df, x = "G2", color = "sex",
palette = c("#E91E8C", "#1976D2"),
title = "QQ-Plot Y2 (G2) per Sex") +
theme_minimal(base_size = 11)
p_qq_G1_addr <- ggqqplot(df, x = "G1", color = "address",
palette = c("#FF8F00", "#388E3C"),
title = "QQ-Plot Y1 (G1) per Address") +
theme_minimal(base_size = 11)
p_qq_G2_addr <- ggqqplot(df, x = "G2", color = "address",
palette = c("#FF8F00", "#388E3C"),
title = "QQ-Plot Y2 (G2) per Address") +
theme_minimal(base_size = 11)
grid.arrange(p_qq_G1_sex, p_qq_G2_sex,
p_qq_G1_addr, p_qq_G2_addr,
ncol = 2,
top = "QQ-Plot Univariat — Y1 & Y2 per Grup (X1 & X2)")
mvn_global <- MVN::mvn(data = df[, c("G1", "G2")],
mvn_test = "mardia",
univariate_test = "SW")
cat("MARDIA'S TEST — Keseluruhan Data\n")
## MARDIA'S TEST — Keseluruhan Data
cat(" Multivariat:\n")
## Multivariat:
summary(mvn_global, select = "mvn")
##
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
## Test Statistic p.value Method MVN
## 1 Mardia Skewness 291.175 <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis 19.052 <0.001 asymptotic ✗ Not normal
cat("\n Univariat (Shapiro-Wilk via MVN):\n")
##
## Univariat (Shapiro-Wilk via MVN):
summary(mvn_global, select = "univariate")
##
## ── Univariate Normality Test Results ──────────────────────────────────────────────────────────────────────────────────────────────────────────
## Test Variable Statistic p.value Normality
## 1 Shapiro-Wilk G1 0.975 <0.001 ✗ Not normal
## 2 Shapiro-Wilk G2 0.969 <0.001 ✗ Not normal
mvn_royston <- mvn(data = df[, c("G1", "G2")],
mvn_test = "royston")
cat("ROYSTON'S TEST — Keseluruhan Data\n")
## ROYSTON'S TEST — Keseluruhan Data
summary(mvn_royston, select = "mvn")
##
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
## Test Statistic p.value Method MVN
## 1 Royston 39.332 <0.001 asymptotic ✗ Not normal
cat("MARDIA'S TEST per Grup X1 (sex)\n")
## MARDIA'S TEST per Grup X1 (sex)
for (grp in levels(df$sex)) {
cat(" Grup:", grp, "\n")
sub_df <- df %>% filter(sex == grp) %>% select(G1, G2)
hasil <- mvn(data = sub_df,
mvn_test = "mardia",
desc = FALSE)
# print(hasil$multivariateNormality)
summary(hasil, select = "mvn")
cat("\n")
}
## Grup: Perempuan
##
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
## Test Statistic p.value Method MVN
## 1 Mardia Skewness 216.849 <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis 17.812 <0.001 asymptotic ✗ Not normal
##
## Grup: Laki-laki
##
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
## Test Statistic p.value Method MVN
## 1 Mardia Skewness 90.266 <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis 8.687 <0.001 asymptotic ✗ Not normal
# ── Mardia per grup X2 (address) — syarat MANOVA & MANCOVA ──
cat("MARDIA'S TEST per Grup X2 (address)\n")
## MARDIA'S TEST per Grup X2 (address)
for (grp in levels(df$address)) {
cat("─── Grup:", grp, "───\n")
sub_df <- df %>% filter(address == grp) %>% select(G1, G2)
hasil <- mvn(data = sub_df,
mvn_test = "mardia",
desc = FALSE)
# print(hasil$multivariateNormality)
summary(hasil, select = "mvn")
cat("\n")
}
## ─── Grup: Rural ───
##
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
## Test Statistic p.value Method MVN
## 1 Mardia Skewness 63.074 <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis 6.598 <0.001 asymptotic ✗ Not normal
##
## ─── Grup: Urban ───
##
## ── Multivariate Normality Test Results ────────────────────────────────────────────────────────────────────────────────────────────────────────
## Test Statistic p.value Method MVN
## 1 Mardia Skewness 193.528 <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis 15.134 <0.001 asymptotic ✗ Not normal
# Keseluruhan
res_global <- MVN::mvn(data = df[, c("G1", "G2")],
mvn_test = "mardia")
p1 <- plot(res_global, diagnostic = "multivariate", type = "qq")
p1 <- p1 + ggplot2::ggtitle("Chi-Square QQ-Plot\n(Keseluruhan Data)")
# Per grup: Perempuan
sub_p <- df %>% filter(sex == "Perempuan") %>% select(G1, G2)
res_p <- MVN::mvn(data = sub_p,
mvn_test = "mardia")
p2 <- plot(res_p, diagnostic = "multivariate", type = "qq")
p2 <- p2 + ggplot2::ggtitle("Chi-Square QQ-Plot\n(Grup: Perempuan)")
gridExtra::grid.arrange(p1, p2, ncol = 2)
levene_G1_sex <- leveneTest(G1 ~ sex, data = df, center = mean)
cat("LEVENE'S TEST: Y1 (G1) ~ X1 (sex)\n")
## LEVENE'S TEST: Y1 (G1) ~ X1 (sex)
print(levene_G1_sex)
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 1 0.3946 0.5303
## 393
p_val <- levene_G1_sex$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
ifelse(p_val > 0.05,
"Varians HOMOGEN (p > 0.05)",
"Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
##
## Kesimpulan: p = 0.5303 → Varians HOMOGEN (p > 0.05)
levene_G2_sex <- leveneTest(G2 ~ sex, data = df, center = mean)
cat("LEVENE'S TEST: Y2 (G2) ~ X1 (sex)\n")
## LEVENE'S TEST: Y2 (G2) ~ X1 (sex)
print(levene_G2_sex)
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 1 0.6075 0.4362
## 393
p_val <- levene_G2_sex$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
ifelse(p_val > 0.05,
"Varians HOMOGEN (p > 0.05)",
"Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
##
## Kesimpulan: p = 0.4362 → Varians HOMOGEN (p > 0.05)
levene_G1_addr <- leveneTest(G1 ~ address, data = df, center = mean)
cat("LEVENE'S TEST: Y1 (G1) ~ X2 (address)\n")
## LEVENE'S TEST: Y1 (G1) ~ X2 (address)
print(levene_G1_addr)
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 1 0.0033 0.9543
## 393
p_val <- levene_G1_addr$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
ifelse(p_val > 0.05,
"Varians HOMOGEN (p > 0.05)",
"Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
##
## Kesimpulan: p = 0.9543 → Varians HOMOGEN (p > 0.05)
levene_G2_addr <- leveneTest(G2 ~ address, data = df, center = mean)
cat("LEVENE'S TEST: Y2 (G2) ~ X2 (address)\n")
## LEVENE'S TEST: Y2 (G2) ~ X2 (address)
print(levene_G2_addr)
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 1 0.0509 0.8215
## 393
p_val <- levene_G2_addr$`Pr(>F)`[1]
cat("\nKesimpulan: p =", round(p_val, 4), "→",
ifelse(p_val > 0.05,
"Varians HOMOGEN (p > 0.05)",
"Varians TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
##
## Kesimpulan: p = 0.8215 → Varians HOMOGEN (p > 0.05)
tabel_levene <- data.frame(
DV = c("Y1 (G1)", "Y2 (G2)", "Y1 (G1)", "Y2 (G2)"),
Faktor = c("X1 (sex)", "X1 (sex)", "X2 (address)", "X2 (address)"),
F_stat = c(
round(levene_G1_sex$`F value`[1], 4),
round(levene_G2_sex$`F value`[1], 4),
round(levene_G1_addr$`F value`[1], 4),
round(levene_G2_addr$`F value`[1], 4)
),
p_value = c(
round(levene_G1_sex$`Pr(>F)`[1], 4),
round(levene_G2_sex$`Pr(>F)`[1], 4),
round(levene_G1_addr$`Pr(>F)`[1], 4),
round(levene_G2_addr$`Pr(>F)`[1], 4)
)
)
tabel_levene$Kesimpulan <- ifelse(
tabel_levene$p_value > 0.05,
"Homogen",
"Tidak Homogen"
)
kable(tabel_levene,
caption = "Ringkasan Levene's Test — Homogenitas Varians Y1 & Y2") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) %>%
column_spec(5,
color = ifelse(tabel_levene$p_value > 0.05, "darkgreen", "red"),
bold = TRUE)
| DV | Faktor | F_stat | p_value | Kesimpulan |
|---|---|---|---|---|
| Y1 (G1) | X1 (sex) | 0.3946 | 0.5303 | Homogen |
| Y2 (G2) | X1 (sex) | 0.6075 | 0.4362 | Homogen |
| Y1 (G1) | X2 (address) | 0.0033 | 0.9543 | Homogen |
| Y2 (G2) | X2 (address) | 0.0509 | 0.8215 | Homogen |
p_var_sex <- df %>%
pivot_longer(cols = c(G1, G2), names_to = "Periode", values_to = "Nilai") %>%
ggplot(aes(x = sex, y = Nilai, fill = sex)) +
geom_boxplot(alpha = 0.7, outlier.shape = 21,
outlier.fill = "white", outlier.color = "gray50") +
stat_summary(fun = mean, geom = "point",
shape = 18, size = 3, color = "black") +
facet_wrap(~Periode,
labeller = labeller(Periode = c(G1 = "Y1: G1", G2 = "Y2: G2"))) +
scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
labs(title = "Distribusi Y1 & Y2 per X1 (sex)",
subtitle = "Berlian = rata-rata | Levene's Test: cek kesamaan penyebaran",
x = "Jenis Kelamin", y = "Nilai", fill = "Sex") +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
p_var_addr <- df %>%
pivot_longer(cols = c(G1, G2), names_to = "Periode", values_to = "Nilai") %>%
ggplot(aes(x = address, y = Nilai, fill = address)) +
geom_boxplot(alpha = 0.7, outlier.shape = 21,
outlier.fill = "white", outlier.color = "gray50") +
stat_summary(fun = mean, geom = "point",
shape = 18, size = 3, color = "black") +
facet_wrap(~Periode,
labeller = labeller(Periode = c(G1 = "Y1: G1", G2 = "Y2: G2"))) +
scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
labs(title = "Distribusi Y1 & Y2 per X2 (address)",
subtitle = "Berlian = rata-rata | Levene's Test: cek kesamaan penyebaran",
x = "Alamat", y = "Nilai", fill = "Address") +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
grid.arrange(p_var_sex, p_var_addr, ncol = 2,
top = "Visualisasi Homogenitas Varians — Pendukung Levene's Test")
boxm_sex <- boxM(cbind(G1, G2) ~ sex, data = df)
cat("BOX'S M TEST: cbind(G1,G2) ~ X1 (sex)\n")
## BOX'S M TEST: cbind(G1,G2) ~ X1 (sex)
print(boxm_sex)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df
## Chi-Sq (approx.) = 0.8588, df = 3, p-value = 0.8354
p_val_boxm_sex <- boxm_sex$p.value
cat("\nKesimpulan: p =", round(p_val_boxm_sex, 4), "→",
ifelse(p_val_boxm_sex > 0.05,
"Matriks kovarian HOMOGEN (p > 0.05)",
"Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
##
## Kesimpulan: p = 0.8354 → Matriks kovarian HOMOGEN (p > 0.05)
cat("\n[Catatan] Box's M sangat sensitif pada sampel besar.\n")
##
## [Catatan] Box's M sangat sensitif pada sampel besar.
cat("Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa\n")
## Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa
cat("dilanjutkan dengan kehati-hatian (lihat Pillai's trace).\n")
## dilanjutkan dengan kehati-hatian (lihat Pillai's trace).
boxm_addr <- boxM(cbind(G1, G2) ~ address, data = df)
cat("BOX'S M TEST: cbind(G1,G2) ~ X2 (address)\n")
## BOX'S M TEST: cbind(G1,G2) ~ X2 (address)
print(boxm_addr)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df
## Chi-Sq (approx.) = 20.035, df = 3, p-value = 0.0001669
p_val_boxm_addr <- boxm_addr$p.value
cat("\nKesimpulan: p =", round(p_val_boxm_addr, 4), "→",
ifelse(p_val_boxm_addr > 0.05,
"Matriks kovarian HOMOGEN (p > 0.05)",
"Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
##
## Kesimpulan: p = 2e-04 → Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)
cat("\n[Catatan] Box's M sangat sensitif pada sampel besar.\n")
##
## [Catatan] Box's M sangat sensitif pada sampel besar.
cat("Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa\n")
## Jika n besar & p sedikit di bawah 0.05, MANOVA masih bisa
cat("dilanjutkan dengan kehati-hatian (lihat Pillai's trace).\n")
## dilanjutkan dengan kehati-hatian (lihat Pillai's trace).
df_gabungan <- df %>%
mutate(grup = interaction(sex, address, sep = " × "))
boxm_gabungan <- boxM(cbind(G1, G2) ~ grup, data = df_gabungan)
cat("BOX'S M TEST: cbind(G1,G2) ~ sex × address\n")
## BOX'S M TEST: cbind(G1,G2) ~ sex × address
print(boxm_gabungan)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: df_gabungan
## Chi-Sq (approx.) = 27.3438, df = 9, p-value = 0.001227
p_val_boxm_gab <- boxm_gabungan$p.value
cat("\nKesimpulan: p =", round(p_val_boxm_gab, 4), "→",
ifelse(p_val_boxm_gab > 0.05,
"Matriks kovarian HOMOGEN (p > 0.05)",
"Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)"), "\n")
##
## Kesimpulan: p = 0.0012 → Matriks kovarian TIDAK HOMOGEN (p ≤ 0.05)
# ── Visualisasi: matriks korelasi per grup sebagai proxy kovarian ──
plot_cor_grup <- function(data_sub, label_judul) {
mat <- cor(data_sub[, c("G1", "G2")], method = "pearson")
df_cor <- as.data.frame(mat) %>%
rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "r")
ggplot(df_cor, aes(x = Var1, y = Var2, fill = r)) +
geom_tile(color = "white", linewidth = 0.8) +
geom_text(aes(label = round(r, 3)), size = 5,
color = "white", fontface = "bold") +
scale_fill_gradient2(low = "#1976D2", mid = "white", high = "#E91E8C",
midpoint = 0, limits = c(-1, 1)) +
labs(title = label_judul, x = NULL, y = NULL, fill = "r") +
coord_fixed() +
theme_minimal(base_size = 11) +
theme(axis.text = element_text(size = 11, face = "bold"))
}
p_cor_perempuan <- plot_cor_grup(df %>% filter(sex == "Perempuan"),
"Korelasi G1-G2\n(Perempuan)")
p_cor_lakilaki <- plot_cor_grup(df %>% filter(sex == "Laki-laki"),
"Korelasi G1-G2\n(Laki-laki)")
p_cor_rural <- plot_cor_grup(df %>% filter(address == "Rural"),
"Korelasi G1-G2\n(Rural)")
p_cor_urban <- plot_cor_grup(df %>% filter(address == "Urban"),
"Korelasi G1-G2\n(Urban)")
grid.arrange(p_cor_perempuan, p_cor_lakilaki,
p_cor_rural, p_cor_urban,
ncol = 4,
top = "Matriks Korelasi G1-G2 per Grup — Proxy Homogenitas Kovarian")
p_lev_G1_sex <- levene_G1_sex$`Pr(>F)`[1]
p_lev_G2_sex <- levene_G2_sex$`Pr(>F)`[1]
p_lev_G1_addr <- levene_G1_addr$`Pr(>F)`[1]
p_lev_G2_addr <- levene_G2_addr$`Pr(>F)`[1]
tabel_ringkasan <- data.frame(
Uji = c(
"Levene (G1 ~ sex)",
"Levene (G2 ~ sex)",
"Levene (G1 ~ address)",
"Levene (G2 ~ address)",
"Box's M (G1,G2 ~ sex)",
"Box's M (G1,G2 ~ address)",
"Box's M (G1,G2 ~ sex×address)"
),
p_value = c(
round(p_lev_G1_sex, 4),
round(p_lev_G2_sex, 4),
round(p_lev_G1_addr, 4),
round(p_lev_G2_addr, 4),
round(p_val_boxm_sex, 4),
round(p_val_boxm_addr, 4),
round(p_val_boxm_gab, 4)
),
Berlaku_untuk = c(
"Semua metode",
"Semua metode",
"Semua metode",
"Semua metode",
"MANOVA & MANCOVA",
"MANOVA & MANCOVA",
"MANOVA & MANCOVA"
)
)
tabel_ringkasan$Kesimpulan <- ifelse(
tabel_ringkasan$p_value > 0.05,
"Homogen",
"Tidak Homogen"
)
kable(tabel_ringkasan,
caption = "Ringkasan Tahap 6 — Semua Uji Homogenitas Varians & Kovarians") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = TRUE) %>%
column_spec(4,
color = ifelse(tabel_ringkasan$p_value > 0.05, "darkgreen", "red"),
bold = TRUE) %>%
row_spec(5:7, background = "#FFF9C4")
| Uji | p_value | Berlaku_untuk | Kesimpulan |
|---|---|---|---|
| Levene (G1 ~ sex) | 0.5303 | Semua metode | Homogen |
| Levene (G2 ~ sex) | 0.4362 | Semua metode | Homogen |
| Levene (G1 ~ address) | 0.9543 | Semua metode | Homogen |
| Levene (G2 ~ address) | 0.8215 | Semua metode | Homogen |
| Box’s M (G1,G2 ~ sex) | 0.8354 | MANOVA & MANCOVA | Homogen |
| Box’s M (G1,G2 ~ address) | 0.0002 | MANOVA & MANCOVA | Tidak Homogen |
| Box’s M (G1,G2 ~ sex×address) | 0.0012 | MANOVA & MANCOVA | Tidak Homogen |
# ── Korelasi Pearson antara Y1 (G1) dan Y2 (G2) ──
r_G1_G2 <- cor(df$G1, df$G2, method = "pearson")
cat("===== KORELASI PEARSON: Y1 (G1) vs Y2 (G2) =====\n\n")
## ===== KORELASI PEARSON: Y1 (G1) vs Y2 (G2) =====
cat("r (G1, G2) =", round(r_G1_G2, 4), "\n\n")
## r (G1, G2) = 0.8521
cat("Kesimpulan:\n")
## Kesimpulan:
if (abs(r_G1_G2) < 0.90) {
cat(" r =", round(r_G1_G2, 4), "< 0.90\n")
cat(" → TIDAK ada multikolinearitas ekstrem\n")
cat(" → G1 dan G2 cukup berbeda sebagai DV\n")
cat(" → Analisis MANOVA & MANCOVA dapat dilanjutkan\n")
} else {
cat(" r =", round(r_G1_G2, 4), "≥ 0.90\n")
cat(" → MULTIKOLINEARITAS EKSTREM\n")
cat(" → G1 dan G2 hampir identik, pertimbangkan ulang pilihan DV\n")
}
## r = 0.8521 < 0.90
## → TIDAK ada multikolinearitas ekstrem
## → G1 dan G2 cukup berbeda sebagai DV
## → Analisis MANOVA & MANCOVA dapat dilanjutkan
cor_test_hasil <- cor.test(df$G1, df$G2, method = "pearson")
cat("UJI SIGNIFIKANSI KORELASI: G1 vs G2\n")
## UJI SIGNIFIKANSI KORELASI: G1 vs G2
cat("Statistik t :", round(cor_test_hasil$statistic, 4), "\n")
## Statistik t : 32.2778
cat("df :", cor_test_hasil$parameter, "\n")
## df : 393
cat("p-value :", format(cor_test_hasil$p.value, scientific = TRUE), "\n")
## p-value : 1.441347e-112
cat("95% CI : [",
round(cor_test_hasil$conf.int[1], 4), ",",
round(cor_test_hasil$conf.int[2], 4), "]\n\n")
## 95% CI : [ 0.8226 , 0.877 ]
cat("Interpretasi:\n")
## Interpretasi:
cat(" Korelasi r =", round(r_G1_G2, 4), "\n")
## Korelasi r = 0.8521
if (cor_test_hasil$p.value < 0.05) {
cat(" Signifikan secara statistik (p < 0.05)\n")
cat(" → G1 dan G2 memang berkorelasi (bukan kebetulan)\n")
} else {
cat(" Tidak signifikan (p ≥ 0.05)\n")
}
## Signifikan secara statistik (p < 0.05)
## → G1 dan G2 memang berkorelasi (bukan kebetulan)
mat_cor_full <- cor(df[, c("G1", "G2", "absences")], method = "pearson")
cat("MATRIKS KORELASI PEARSON (G1, G2, absences)\n")
## MATRIKS KORELASI PEARSON (G1, G2, absences)
print(round(mat_cor_full, 4))
## G1 G2 absences
## G1 1.0000 0.8521 -0.0310
## G2 0.8521 1.0000 -0.0318
## absences -0.0310 -0.0318 1.0000
kable(round(mat_cor_full, 4),
caption = "Matriks Korelasi Pearson — Y1 (G1), Y2 (G2), X3 (absences)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE)
| G1 | G2 | absences | |
|---|---|---|---|
| G1 | 1.0000 | 0.8521 | -0.0310 |
| G2 | 0.8521 | 1.0000 | -0.0318 |
| absences | -0.0310 | -0.0318 | 1.0000 |
cor_data <- as.data.frame(mat_cor_full) %>%
rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "r")
ggplot(cor_data, aes(x = Var1, y = Var2, fill = r)) +
geom_tile(color = "white", linewidth = 1) +
geom_text(aes(label = round(r, 3)),
size = 5, color = "white", fontface = "bold") +
scale_fill_gradient2(
low = "#1976D2", mid = "white", high = "#E91E8C",
midpoint = 0, limits = c(-1, 1)
) +
labs(
title = "Heatmap Korelasi Pearson",
subtitle = "Y1 (G1), Y2 (G2), X3 (absences)",
x = NULL, y = NULL, fill = "r"
) +
coord_fixed() +
theme_minimal(base_size = 13) +
theme(axis.text = element_text(size = 12, face = "bold"))
ggplot(df, aes(x = G1, y = G2, color = sex, shape = address)) +
geom_point(alpha = 0.5, size = 2.5) +
geom_smooth(method = "lm", se = TRUE,
aes(group = 1),
color = "gray30", linewidth = 1.2) +
scale_color_manual(values = c("Perempuan" = "#E91E8C",
"Laki-laki" = "#1976D2")) +
annotate("text",
x = max(df$G1, na.rm = TRUE) * 0.7,
y = min(df$G2, na.rm = TRUE) * 1.1,
label = paste0("r = ", round(r_G1_G2, 3)),
size = 5, color = "gray20", fontface = "bold") +
labs(
title = "Scatter Plot Y1 (G1) vs Y2 (G2)",
subtitle = paste0("r = ", round(r_G1_G2, 3),
" | Korelasi kuat positif — DV berkorelasi tapi tidak identik"),
x = "Y1 (G1 — Nilai Periode 1)",
y = "Y2 (G2 — Nilai Periode 2)",
color = "Sex", shape = "Address"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: shape.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?
# ── Scatter plot G1 vs G2 per grup (sex dan address) ──
p_scatter_sex <- ggplot(df, aes(x = G1, y = G2, color = sex)) +
geom_point(alpha = 0.5, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
scale_color_manual(values = c("Perempuan" = "#E91E8C",
"Laki-laki" = "#1976D2")) +
labs(
title = "G1 vs G2 per Grup Sex (X1)",
x = "Y1 (G1)", y = "Y2 (G2)", color = "Sex"
) +
theme_minimal(base_size = 11) +
theme(legend.position = "bottom")
p_scatter_addr <- ggplot(df, aes(x = G1, y = G2, color = address)) +
geom_point(alpha = 0.5, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
scale_color_manual(values = c("Rural" = "#FF8F00",
"Urban" = "#388E3C")) +
labs(
title = "G1 vs G2 per Grup Address (X2)",
x = "Y1 (G1)", y = "Y2 (G2)", color = "Address"
) +
theme_minimal(base_size = 11) +
theme(legend.position = "bottom")
grid.arrange(p_scatter_sex, p_scatter_addr, ncol = 2,
top = "Scatter Plot Y1 vs Y2 per Grup — Cek Multikolinearitas")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# ── Korelasi G1 vs G2 per setiap grup ──
cat("===== KORELASI G1 vs G2 PER GRUP =====\n\n")
## ===== KORELASI G1 vs G2 PER GRUP =====
# Per sex
cat("--- Per Grup X1 (sex) ---\n")
## --- Per Grup X1 (sex) ---
for (grp in levels(df$sex)) {
sub <- df %>% filter(sex == grp)
r <- round(cor(sub$G1, sub$G2, method = "pearson"), 4)
cat(" Grup:", grp, "→ r =", r,
ifelse(abs(r) < 0.90, "✓ OK (< 0.90)", "✗ Terlalu Tinggi (≥ 0.90)"), "\n")
}
## Grup: Perempuan → r = 0.8395 ✓ OK (< 0.90)
## Grup: Laki-laki → r = 0.8623 ✓ OK (< 0.90)
cat("\n--- Per Grup X2 (address) ---\n")
##
## --- Per Grup X2 (address) ---
for (grp in levels(df$address)) {
sub <- df %>% filter(address == grp)
r <- round(cor(sub$G1, sub$G2, method = "pearson"), 4)
cat(" Grup:", grp, "→ r =", r,
ifelse(abs(r) < 0.90, "✓ OK (< 0.90)", "✗ Terlalu Tinggi (≥ 0.90)"), "\n")
}
## Grup: Rural → r = 0.77 ✓ OK (< 0.90)
## Grup: Urban → r = 0.8778 ✓ OK (< 0.90)
cat("\n--- Per Grup Kombinasi sex × address ---\n")
##
## --- Per Grup Kombinasi sex × address ---
df_cek <- df %>%
group_by(sex, address) %>%
summarise(
n = n(),
r = round(cor(G1, G2, method = "pearson"), 4),
.groups = "drop"
) %>%
mutate(Status = ifelse(abs(r) < 0.90, "OK ✓", "Terlalu Tinggi ✗"))
print(df_cek)
## # A tibble: 4 × 5
## sex address n r Status
## <fct> <fct> <int> <dbl> <chr>
## 1 Perempuan Rural 44 0.785 OK ✓
## 2 Perempuan Urban 164 0.855 OK ✓
## 3 Laki-laki Rural 44 0.758 OK ✓
## 4 Laki-laki Urban 143 0.900 Terlalu Tinggi ✗
#8A Uji Linearitas: Scatter Plot dengan Garis Regresi
Scatter Plot X3 (absences) vs Y1 (G1) & Y2 (G2) - Keseluruhan
p_lin_G1 <- ggplot(df, aes(x = absences, y = G1)) +
geom_point(alpha = 0.4, size = 2, color = "#7B1FA2") +
geom_smooth(method = "lm", se = TRUE, color = "#7B1FA2",
linewidth = 1.2, linetype = "solid") +
geom_smooth(method = "loess", se = FALSE, color = "gray40",
linewidth = 0.9, linetype = "dashed") +
labs(
title = "Linearitas X3 (absences) → Y1 (G1)",
subtitle = "Ungu = garis regresi linear | Abu = LOESS (non-linear smoother)",
x = "X3: Jumlah Absen (absences)",
y = "Y1: Nilai Periode 1 (G1)"
) +
theme_minimal(base_size = 13)
p_lin_G2 <- ggplot(df, aes(x = absences, y = G2)) +
geom_point(alpha = 0.4, size = 2, color = "#0288D1") +
geom_smooth(method = "lm", se = TRUE, color = "#0288D1",
linewidth = 1.2, linetype = "solid") +
geom_smooth(method = "loess", se = FALSE, color = "gray40",
linewidth = 0.9, linetype = "dashed") +
labs(
title = "Linearitas X3 (absences) → Y2 (G2)",
subtitle = "Biru = garis regresi linear | Abu = LOESS (non-linear smoother)",
x = "X3: Jumlah Absen (absences)",
y = "Y2: Nilai Periode 2 (G2)"
) +
theme_minimal(base_size = 13)
grid.arrange(p_lin_G1, p_lin_G2, ncol = 2,
top = "Uji Linearitas: X3 (absences) vs Y1 (G1) & Y2 (G2)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Scatter Plot per Grup X1 (sex)
p_lins_G1 <- ggplot(df, aes(x = absences, y = G1, color = sex)) +
geom_point(alpha = 0.4, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = sex)) +
scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
labs(
title = "Linearitas per X1 (sex) → Y1 (G1)",
subtitle = "Garis regresi terpisah per grup",
x = "X3: Absences", y = "Y1: G1", color = "Sex", fill = "Sex"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
p_lins_G2 <- ggplot(df, aes(x = absences, y = G2, color = sex)) +
geom_point(alpha = 0.4, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = sex)) +
scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
labs(
title = "Linearitas per X1 (sex) → Y2 (G2)",
subtitle = "Garis regresi terpisah per grup",
x = "X3: Absences", y = "Y2: G2", color = "Sex", fill = "Sex"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
grid.arrange(p_lins_G1, p_lins_G2, ncol = 2,
top = "Linearitas X3 → Y1 & Y2 per Grup X1 (sex)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Scatter Plot per Grup X2 (address)
p_lina_G1 <- ggplot(df, aes(x = absences, y = G1, color = address)) +
geom_point(alpha = 0.4, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = address)) +
scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
labs(
title = "Linearitas per X2 (address) → Y1 (G1)",
subtitle = "Garis regresi terpisah per grup",
x = "X3: Absences", y = "Y1: G1", color = "Address", fill = "Address"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
p_lina_G2 <- ggplot(df, aes(x = absences, y = G2, color = address)) +
geom_point(alpha = 0.4, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1.1, aes(fill = address)) +
scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
scale_fill_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
labs(
title = "Linearitas per X2 (address) → Y2 (G2)",
subtitle = "Garis regresi terpisah per grup",
x = "X3: Absences", y = "Y2: G2", color = "Address", fill = "Address"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
grid.arrange(p_lina_G1, p_lina_G2, ncol = 2,
top = "Linearitas X3 → Y1 & Y2 per Grup X2 (address)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
8B Uji Linearitas: Korelasi Pearson X3 vs Y1, Y2
# ── Korelasi Pearson sebagai konfirmasi numerik linearitas ──
kor_global <- data.frame(
Perbandingan = c("X3 (absences) vs Y1 (G1)", "X3 (absences) vs Y2 (G2)"),
r = c(
round(cor(df$absences, df$G1, method = "pearson"), 4),
round(cor(df$absences, df$G2, method = "pearson"), 4)
),
p_value = c(
round(cor.test(df$absences, df$G1)$p.value, 4),
round(cor.test(df$absences, df$G2)$p.value, 4)
)
)
kor_global$Signifikan <- ifelse(kor_global$p_value < 0.05,
"Signifikan (p < 0.05)",
"Tidak Signifikan (p ≥ 0.05)")
kor_global$Keterangan <- ifelse(kor_global$p_value < 0.05,
"Ada hubungan linear — covariate relevan ✓",
"Hubungan linear lemah — covariate kurang kuat")
kable(kor_global,
caption = "Korelasi Pearson: X3 (absences) vs Y1 (G1) & Y2 (G2)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) %>%
column_spec(5, color = ifelse(kor_global$p_value < 0.05, "darkgreen", "orange"),
bold = TRUE)
| Perbandingan | r | p_value | Signifikan | Keterangan |
|---|---|---|---|---|
| X3 (absences) vs Y1 (G1) | -0.0310 | 0.5390 | Tidak Signifikan (p ≥ 0.05) | Hubungan linear lemah — covariate kurang kuat |
| X3 (absences) vs Y2 (G2) | -0.0318 | 0.5289 | Tidak Signifikan (p ≥ 0.05) | Hubungan linear lemah — covariate kurang kuat |
# ── Korelasi per grup ──
grp_kor <- bind_rows(
df %>%
group_by(Faktor = "sex", Grup = as.character(sex)) %>%
summarise(
r_G1 = round(cor(absences, G1), 4),
p_G1 = round(cor.test(absences, G1)$p.value, 4),
r_G2 = round(cor(absences, G2), 4),
p_G2 = round(cor.test(absences, G2)$p.value, 4),
.groups = "drop"
),
df %>%
group_by(Faktor = "address", Grup = as.character(address)) %>%
summarise(
r_G1 = round(cor(absences, G1), 4),
p_G1 = round(cor.test(absences, G1)$p.value, 4),
r_G2 = round(cor(absences, G2), 4),
p_G2 = round(cor.test(absences, G2)$p.value, 4),
.groups = "drop"
)
)
kable(grp_kor,
caption = "Korelasi Pearson X3 vs Y1 & Y2 per Grup",
col.names = c("Faktor", "Grup", "r (G1)", "p (G1)", "r (G2)", "p (G2)")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE)
| Faktor | Grup | r (G1) | p (G1) | r (G2) | p (G2) |
|---|---|---|---|---|---|
| sex | Laki-laki | -0.1097 | 0.1349 | -0.1188 | 0.1055 |
| sex | Perempuan | 0.0244 | 0.7267 | 0.0290 | 0.6776 |
| address | Rural | 0.0287 | 0.7905 | 0.0428 | 0.6920 |
| address | Urban | -0.0511 | 0.3719 | -0.0559 | 0.3287 |
8C Uji Homogenitas Slope (Interaksi Faktor × Covariate)
Model Interaksi: sex × absences → G1
model_int_G1_sex <- lm(G1 ~ sex * absences, data = df)
cat("MODEL: G1 ~ sex * absences\n")
## MODEL: G1 ~ sex * absences
cat(rep("─", 50), "\n", sep = "")
## ──────────────────────────────────────────────────
print(Anova(model_int_G1_sex, type = 2))
## Anova Table (Type II tests)
##
## Response: G1
## Sum Sq Df F value Pr(>F)
## sex 35.1 1 3.2118 0.07388 .
## absences 2.7 1 0.2462 0.62003
## sex:absences 24.4 1 2.2288 0.13627
## Residuals 4277.0 391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_int_G1_sex <- Anova(model_int_G1_sex, type = 2)["sex:absences", "Pr(>F)"]
cat("\nInteraksi sex × absences → G1: p =", round(p_int_G1_sex, 4), "→",
ifelse(p_int_G1_sex > 0.05,
"Slope HOMOGEN — asumsi terpenuhi ✓",
"Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")
##
## Interaksi sex × absences → G1: p = 0.1363 → Slope HOMOGEN — asumsi terpenuhi ✓
Model Interaksi: sex × absences → G2
model_int_G2_sex <- lm(G2 ~ sex * absences, data = df)
cat("MODEL: G2 ~ sex * absences\n")
## MODEL: G2 ~ sex * absences
cat(rep("─", 50), "\n", sep = "")
## ──────────────────────────────────────────────────
print(Anova(model_int_G2_sex, type = 2))
## Anova Table (Type II tests)
##
## Response: G2
## Sum Sq Df F value Pr(>F)
## sex 44.3 1 3.1588 0.07629 .
## absences 3.7 1 0.2631 0.60830
## sex:absences 37.9 1 2.6985 0.10125
## Residuals 5486.8 391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_int_G2_sex <- Anova(model_int_G2_sex, type = 2)["sex:absences", "Pr(>F)"]
cat("\nInteraksi sex × absences → G2: p =", round(p_int_G2_sex, 4), "→",
ifelse(p_int_G2_sex > 0.05,
"Slope HOMOGEN — asumsi terpenuhi ✓",
"Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")
##
## Interaksi sex × absences → G2: p = 0.1012 → Slope HOMOGEN — asumsi terpenuhi ✓
Model Interaksi: address × absences → G1
model_int_G1_addr <- lm(G1 ~ address * absences, data = df)
cat("MODEL: G1 ~ address * absences\n")
## MODEL: G1 ~ address * absences
cat(rep("─", 50), "\n", sep = "")
## ──────────────────────────────────────────────────
print(Anova(model_int_G1_addr, type = 2))
## Anova Table (Type II tests)
##
## Response: G1
## Sum Sq Df F value Pr(>F)
## address 20.6 1 1.8675 0.1725
## absences 3.7 1 0.3328 0.5644
## address:absences 5.8 1 0.5268 0.4684
## Residuals 4310.2 391
p_int_G1_addr <- Anova(model_int_G1_addr, type = 2)["address:absences", "Pr(>F)"]
cat("\nInteraksi address × absences → G1: p =", round(p_int_G1_addr, 4), "→",
ifelse(p_int_G1_addr > 0.05,
"Slope HOMOGEN — asumsi terpenuhi ✓",
"Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")
##
## Interaksi address × absences → G1: p = 0.4684 → Slope HOMOGEN — asumsi terpenuhi ✓
Model Interaksi: address × absences → G2
model_int_G2_addr <- lm(G2 ~ address * absences, data = df)
cat("MODEL: G2 ~ address * absences\n")
## MODEL: G2 ~ address * absences
cat(rep("─", 50), "\n", sep = "")
## ──────────────────────────────────────────────────
print(Anova(model_int_G2_addr, type = 2))
## Anova Table (Type II tests)
##
## Response: G2
## Sum Sq Df F value Pr(>F)
## address 87.4 1 6.2455 0.01286 *
## absences 4.5 1 0.3185 0.57282
## address:absences 11.0 1 0.7861 0.37581
## Residuals 5470.7 391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_int_G2_addr <- Anova(model_int_G2_addr, type = 2)["address:absences", "Pr(>F)"]
cat("\nInteraksi address × absences → G2: p =", round(p_int_G2_addr, 4), "→",
ifelse(p_int_G2_addr > 0.05,
"Slope HOMOGEN — asumsi terpenuhi ✓",
"Slope TIDAK HOMOGEN — asumsi dilanggar ✗"), "\n")
##
## Interaksi address × absences → G2: p = 0.3758 → Slope HOMOGEN — asumsi terpenuhi ✓
8D Visualisasi Homogenitas Slope
# ── Visualisasi slope per grup X1 (sex) ──
slope_sex <- df %>%
group_by(sex) %>%
summarise(
slope_G1 = round(coef(lm(G1 ~ absences))[2], 4),
slope_G2 = round(coef(lm(G2 ~ absences))[2], 4),
.groups = "drop"
)
cat("Slope X3 → Y1 (G1) per Grup sex:\n")
## Slope X3 → Y1 (G1) per Grup sex:
print(slope_sex)
## # A tibble: 2 × 3
## sex slope_G1 slope_G2
## <fct> <dbl> <dbl>
## 1 Perempuan 0.0083 0.0112
## 2 Laki-laki -0.0623 -0.0768
p_slope_G1_sex <- ggplot(df, aes(x = absences, y = G1, color = sex)) +
geom_point(alpha = 0.3, size = 1.8) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
annotate("text", x = 50, y = 18,
label = paste0("Interaksi: p = ", round(p_int_G1_sex, 3)),
size = 3.5, color = "gray30") +
labs(title = "Slope X3 → Y1 (G1) per Sex",
subtitle = paste0("p-interaksi = ", round(p_int_G1_sex, 4),
ifelse(p_int_G1_sex > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
x = "X3: Absences", y = "Y1: G1", color = "Sex") +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
p_slope_G2_sex <- ggplot(df, aes(x = absences, y = G2, color = sex)) +
geom_point(alpha = 0.3, size = 1.8) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
annotate("text", x = 50, y = 18,
label = paste0("Interaksi: p = ", round(p_int_G2_sex, 3)),
size = 3.5, color = "gray30") +
labs(title = "Slope X3 → Y2 (G2) per Sex",
subtitle = paste0("p-interaksi = ", round(p_int_G2_sex, 4),
ifelse(p_int_G2_sex > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
x = "X3: Absences", y = "Y2: G2", color = "Sex") +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
grid.arrange(p_slope_G1_sex, p_slope_G2_sex, ncol = 2,
top = "Homogenitas Slope: X3 (absences) per Grup X1 (sex)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# ── Visualisasi slope per grup X2 (address) ──
slope_addr <- df %>%
group_by(address) %>%
summarise(
slope_G1 = round(coef(lm(G1 ~ absences))[2], 4),
slope_G2 = round(coef(lm(G2 ~ absences))[2], 4),
.groups = "drop"
)
cat("Slope X3 → Y1 (G1) per Grup address:\n")
## Slope X3 → Y1 (G1) per Grup address:
print(slope_addr)
## # A tibble: 2 × 3
## address slope_G1 slope_G2
## <fct> <dbl> <dbl>
## 1 Rural 0.0102 0.0174
## 2 Urban -0.0224 -0.0275
p_slope_G1_addr <- ggplot(df, aes(x = absences, y = G1, color = address)) +
geom_point(alpha = 0.3, size = 1.8) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
annotate("text", x = 50, y = 18,
label = paste0("Interaksi: p = ", round(p_int_G1_addr, 3)),
size = 3.5, color = "gray30") +
labs(title = "Slope X3 → Y1 (G1) per Address",
subtitle = paste0("p-interaksi = ", round(p_int_G1_addr, 4),
ifelse(p_int_G1_addr > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
x = "X3: Absences", y = "Y1: G1", color = "Address") +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
p_slope_G2_addr <- ggplot(df, aes(x = absences, y = G2, color = address)) +
geom_point(alpha = 0.3, size = 1.8) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1.4) +
scale_color_manual(values = c("Rural" = "#FF8F00", "Urban" = "#388E3C")) +
annotate("text", x = 50, y = 18,
label = paste0("Interaksi: p = ", round(p_int_G2_addr, 3)),
size = 3.5, color = "gray30") +
labs(title = "Slope X3 → Y2 (G2) per Address",
subtitle = paste0("p-interaksi = ", round(p_int_G2_addr, 4),
ifelse(p_int_G2_addr > 0.05, " → Slope Homogen ✓", " → Tidak Homogen ✗")),
x = "X3: Absences", y = "Y2: G2", color = "Address") +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
grid.arrange(p_slope_G1_addr, p_slope_G2_addr, ncol = 2,
top = "Homogenitas Slope: X3 (absences) per Grup X2 (address)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# ── Tabel ringkasan lengkap ──
tabel_step8 <- data.frame(
Uji = c(
"Linearitas (Pearson): X3 vs Y1 — Global",
"Linearitas (Pearson): X3 vs Y2 — Global",
"Homogenitas Slope: sex × absences → Y1 (G1)",
"Homogenitas Slope: sex × absences → Y2 (G2)",
"Homogenitas Slope: address × absences → Y1 (G1)",
"Homogenitas Slope: address × absences → Y2 (G2)"
),
p_value = c(
round(cor.test(df$absences, df$G1)$p.value, 4),
round(cor.test(df$absences, df$G2)$p.value, 4),
round(p_int_G1_sex, 4),
round(p_int_G2_sex, 4),
round(p_int_G1_addr, 4),
round(p_int_G2_addr, 4)
),
Berlaku_untuk = c(
"ANCOVA & MANCOVA",
"ANCOVA & MANCOVA",
"ANCOVA & MANCOVA",
"ANCOVA & MANCOVA",
"ANCOVA & MANCOVA",
"ANCOVA & MANCOVA"
)
)
tabel_step8$Kesimpulan <- c(
ifelse(tabel_step8$p_value[1] < 0.05, "Hubungan linear signifikan ✓", "Hubungan linear lemah ⚠"),
ifelse(tabel_step8$p_value[2] < 0.05, "Hubungan linear signifikan ✓", "Hubungan linear lemah ⚠"),
ifelse(tabel_step8$p_value[3] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗"),
ifelse(tabel_step8$p_value[4] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗"),
ifelse(tabel_step8$p_value[5] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗"),
ifelse(tabel_step8$p_value[6] > 0.05, "Slope Homogen ✓", "Slope TIDAK Homogen ✗")
)
# Warna baris
warna_kes <- c(
ifelse(tabel_step8$p_value[1] < 0.05, "darkgreen", "orange"),
ifelse(tabel_step8$p_value[2] < 0.05, "darkgreen", "orange"),
ifelse(tabel_step8$p_value[3] > 0.05, "darkgreen", "red"),
ifelse(tabel_step8$p_value[4] > 0.05, "darkgreen", "red"),
ifelse(tabel_step8$p_value[5] > 0.05, "darkgreen", "red"),
ifelse(tabel_step8$p_value[6] > 0.05, "darkgreen", "red")
)
kable(tabel_step8,
caption = "Ringkasan Tahap 8 — Uji Linearitas & Homogenitas Slope") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = TRUE) %>%
column_spec(4, color = warna_kes, bold = TRUE) %>%
row_spec(1:2, background = "#F3E5F5") %>% # Linearitas — ungu muda
row_spec(3:6, background = "#E3F2FD") # Slope homogen — biru muda
| Uji | p_value | Berlaku_untuk | Kesimpulan |
|---|---|---|---|
| Linearitas (Pearson): X3 vs Y1 — Global | 0.5390 | ANCOVA & MANCOVA | Hubungan linear lemah ⚠ |
| Linearitas (Pearson): X3 vs Y2 — Global | 0.5289 | ANCOVA & MANCOVA | Hubungan linear lemah ⚠ |
| Homogenitas Slope: sex × absences → Y1 (G1) | 0.1363 | ANCOVA & MANCOVA | Slope Homogen ✓ |
| Homogenitas Slope: sex × absences → Y2 (G2) | 0.1012 | ANCOVA & MANCOVA | Slope Homogen ✓ |
| Homogenitas Slope: address × absences → Y1 (G1) | 0.4684 | ANCOVA & MANCOVA | Slope Homogen ✓ |
| Homogenitas Slope: address × absences → Y2 (G2) | 0.3758 | ANCOVA & MANCOVA | Slope Homogen ✓ |
Model: Y1 + Y2 ~ X1 + X2 (tanpa covariate)
Statistik uji: Wilks’ Lambda
Pertanyaan: Apakah sex dan/atau address berpengaruh signifikan terhadap kombinasi G1 dan G2?
if (!require(heplots)) install.packages("heplots")
library(heplots)
# Fit MANOVA
manova_model <- manova(cbind(G1, G2) ~ sex + address, data = df)
cat("===== MANOVA: cbind(G1, G2) ~ sex + address =====\n")
## ===== MANOVA: cbind(G1, G2) ~ sex + address =====
summary(manova_model, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## sex 1 0.99087 1.8022 2 391 0.16630
## address 1 0.97826 4.3442 2 391 0.01361 *
## Residuals 392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Pillai's Trace ---\n")
##
## --- Pillai's Trace ---
print(summary(manova_model, test = "Pillai"))
## Df Pillai approx F num Df den Df Pr(>F)
## sex 1 0.0091344 1.8022 2 391 0.16630
## address 1 0.0217377 4.3442 2 391 0.01361 *
## Residuals 392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Hotelling-Lawley Trace ---\n")
##
## --- Hotelling-Lawley Trace ---
print(summary(manova_model, test = "Hotelling-Lawley"))
## Df Hotelling-Lawley approx F num Df den Df Pr(>F)
## sex 1 0.0092186 1.8022 2 391 0.16630
## address 1 0.0222208 4.3442 2 391 0.01361 *
## Residuals 392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Roy's Largest Root ---\n")
##
## --- Roy's Largest Root ---
print(summary(manova_model, test = "Roy"))
## Df Roy approx F num Df den Df Pr(>F)
## sex 1 0.0092186 1.8022 2 391 0.16630
## address 1 0.0222208 4.3442 2 391 0.01361 *
## Residuals 392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("===== ANOVA Univariat (follow-up MANOVA) =====\n")
## ===== ANOVA Univariat (follow-up MANOVA) =====
summary.aov(manova_model)
## Response G1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 36.6 36.611 3.3521 0.06788 .
## address 1 22.7 22.723 2.0805 0.14999
## Residuals 392 4281.4 10.922
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response G2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 46.3 46.265 3.3362 0.06853 .
## address 1 92.3 92.318 6.6571 0.01024 *
## Residuals 392 5436.1 13.868
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
wil_sex <- summary(manova_model, test = "Wilks")$stats["sex", ]
wil_addr <- summary(manova_model, test = "Wilks")$stats["address", ]
tabel_manova <- data.frame(
Efek = c("sex (X1)", "address (X2)"),
Lambda = round(c(wil_sex["Wilks"], wil_addr["Wilks"]), 4),
F_value = round(c(wil_sex["approx F"], wil_addr["approx F"]), 4),
p_value = round(c(wil_sex["Pr(>F)"], wil_addr["Pr(>F)"]), 4)
)
tabel_manova$Kesimpulan <- ifelse(
tabel_manova$p_value < 0.05,
"Signifikan — ada perbedaan multivariat ✓",
"Tidak signifikan ✗"
)
kable(tabel_manova,
caption = "Ringkasan MANOVA — Wilks' Lambda",
col.names = c("Efek", "Wilks λ", "F", "p-value", "Kesimpulan")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = TRUE) %>%
column_spec(5,
color = ifelse(tabel_manova$p_value < 0.05, "darkgreen", "red"),
bold = TRUE)
| Efek | Wilks λ | F | p-value | Kesimpulan |
|---|---|---|---|---|
| sex (X1) | 0.9909 | 1.8022 | 0.1663 | Tidak signifikan ✗ |
| address (X2) | 0.9783 | 4.3442 | 0.0136 | Signifikan — ada perbedaan multivariat ✓ |
par(mfrow = c(1, 2))
heplot(manova_model,
main = "HE Plot: sex (X1)\n[G1 vs G2]",
xlab = "Y1: G1 (Nilai Periode 1)",
ylab = "Y2: G2 (Nilai Periode 2)",
col = c("blue", "red"),
lwd = 2)
heplot(manova_model,
terms = "address",
main = "HE Plot: address (X2)\n[G1 vs G2]",
xlab = "Y1: G1 (Nilai Periode 1)",
ylab = "Y2: G2 (Nilai Periode 2)",
col = c("darkgreen", "orange"),
lwd = 2)
par(mfrow = c(1, 1))
Model: Y1 ~ X1 + X3 (covariate: absences)
Tujuan: Mengendalikan pengaruh absensi (X3) sebelum menguji efek sex (X1) terhadap G1
Output utama: Adjusted means (rata-rata terkoreksi covariate)
if (!require(emmeans)) install.packages("emmeans")
## Loading required package: emmeans
## Warning: package 'emmeans' was built under R version 4.5.3
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
if (!require(effectsize)) install.packages("effectsize")
## Loading required package: effectsize
## Warning: package 'effectsize' was built under R version 4.5.3
library(emmeans)
library(effectsize)
# ANCOVA: G1 ~ sex + absences
ancova_model <- lm(G1 ~ sex + absences, data = df)
cat("===== ANCOVA: G1 ~ sex + absences =====\n")
## ===== ANCOVA: G1 ~ sex + absences =====
print(Anova(ancova_model, type = 2))
## Anova Table (Type II tests)
##
## Response: G1
## Sum Sq Df F value Pr(>F)
## sex 35.1 1 3.2017 0.07433 .
## absences 2.7 1 0.2454 0.62058
## Residuals 4301.4 392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emm_ancova <- emmeans(ancova_model, ~ sex)
cat("===== Adjusted Means (Controlling for absences) =====\n")
## ===== Adjusted Means (Controlling for absences) =====
print(summary(emm_ancova))
## sex emmean SE df lower.CL upper.CL
## Perempuan 10.6 0.230 392 10.2 11.1
## Laki-laki 11.2 0.243 392 10.7 11.7
##
## Confidence level used: 0.95
# Tabel rapi
emm_df <- as.data.frame(summary(emm_ancova))
kable(emm_df,
caption = "Adjusted Means G1 per Sex (Setelah Kontrol Absences)",
digits = 3,
col.names = c("Sex", "Adj. Mean", "SE", "df", "Lower CI", "Upper CI")) %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Sex | Adj. Mean | SE | df | Lower CI | Upper CI |
|---|---|---|---|---|---|
| Perempuan | 10.625 | 0.230 | 392 | 10.173 | 11.077 |
| Laki-laki | 11.224 | 0.243 | 392 | 10.747 | 11.701 |
cat("===== Pairwise Contrast (Adjusted Means) =====\n")
## ===== Pairwise Contrast (Adjusted Means) =====
pairs(emm_ancova)
## contrast estimate SE df t.ratio p.value
## Perempuan - (Laki-laki) -0.599 0.335 392 -1.789 0.0743
eta_ancova <- eta_squared(Anova(ancova_model, type = 2), partial = TRUE)
kable(eta_ancova,
caption = "Effect Size ANCOVA — Partial η²",
digits = 4) %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)
| Parameter | Eta2_partial | CI | CI_low | CI_high |
|---|---|---|---|---|
| sex | 0.0081 | 0.95 | 0 | 1 |
| absences | 0.0006 | 0.95 | 0 | 1 |
emm_plot_df <- as.data.frame(summary(emm_ancova))
ggplot(emm_plot_df, aes(x = sex, y = emmean, fill = sex)) +
geom_col(alpha = 0.8, width = 0.5, color = "white") +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
width = 0.15, linewidth = 0.9, color = "gray30") +
geom_text(aes(label = round(emmean, 2)),
vjust = -0.6, size = 4.5, fontface = "bold") +
scale_fill_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
labs(title = "ANCOVA — Adjusted Means G1 per Sex",
subtitle = "Setelah dikontrol covariate: Absences (X3)",
x = "Jenis Kelamin (X1)", y = "Adjusted Mean G1", fill = "Sex") +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
ggplot(df, aes(x = absences, y = G1, color = sex)) +
geom_point(alpha = 0.35, size = 2) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1.3) +
scale_color_manual(values = c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2")) +
labs(title = "ANCOVA — Hubungan Absences & G1 per Sex",
subtitle = "Garis regresi paralel = asumsi homogenitas slope terpenuhi",
x = "X3: Absences", y = "Y1: G1", color = "Sex") +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'
anv_ancova <- Anova(ancova_model, type = 2)
anv_df <- as.data.frame(anv_ancova)
anv_df$Variabel <- rownames(anv_df)
# Buang baris Residuals yang tidak punya p-value (NA)
anv_df <- anv_df[!is.na(anv_df$`Pr(>F)`), ]
anv_df$Kesimpulan <- ifelse(anv_df$`Pr(>F)` < 0.05,
"Signifikan ✓", "Tidak Signifikan ✗")
warna_kes <- ifelse(anv_df$`Pr(>F)` < 0.05, "darkgreen", "red")
kable(anv_df[, c("Variabel", "Sum Sq", "Df", "F value", "Pr(>F)", "Kesimpulan")],
caption = "Tabel ANCOVA: G1 ~ sex + absences",
digits = 4,
row.names = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) %>%
column_spec(6, color = warna_kes, bold = TRUE)
| Variabel | Sum Sq | Df | F value | Pr(>F) | Kesimpulan |
|---|---|---|---|---|---|
| sex | 35.1325 | 1 | 3.2017 | 0.0743 | Tidak Signifikan ✗ |
| absences | 2.6933 | 1 | 0.2454 | 0.6206 | Tidak Signifikan ✗ |
mancova_model <- manova(cbind(G1, G2) ~ sex + address + absences, data = df)
cat("===== MANCOVA: cbind(G1, G2) ~ sex + address + absences =====\n")
## ===== MANCOVA: cbind(G1, G2) ~ sex + address + absences =====
cat("\n--- Pillai's Trace ---\n")
##
## --- Pillai's Trace ---
print(summary(mancova_model, test = "Pillai"))
## Df Pillai approx F num Df den Df Pr(>F)
## sex 1 0.0091394 1.7986 2 390 0.16690
## address 1 0.0217429 4.3341 2 390 0.01375 *
## absences 1 0.0005512 0.1075 2 390 0.89807
## Residuals 391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Wilks' Lambda ---\n")
##
## --- Wilks' Lambda ---
print(summary(mancova_model, test = "Wilks"))
## Df Wilks approx F num Df den Df Pr(>F)
## sex 1 0.99086 1.7986 2 390 0.16690
## address 1 0.97826 4.3341 2 390 0.01375 *
## absences 1 0.99945 0.1075 2 390 0.89807
## Residuals 391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Hotelling-Lawley Trace ---\n")
##
## --- Hotelling-Lawley Trace ---
print(summary(mancova_model, test = "Hotelling-Lawley"))
## Df Hotelling-Lawley approx F num Df den Df Pr(>F)
## sex 1 0.0092237 1.7986 2 390 0.16690
## address 1 0.0222262 4.3341 2 390 0.01375 *
## absences 1 0.0005515 0.1075 2 390 0.89807
## Residuals 391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Roy's Largest Root ---\n")
##
## --- Roy's Largest Root ---
print(summary(mancova_model, test = "Roy"))
## Df Roy approx F num Df den Df Pr(>F)
## sex 1 0.0092237 1.7986 2 390 0.16690
## address 1 0.0222262 4.3341 2 390 0.01375 *
## absences 1 0.0005515 0.1075 2 390 0.89807
## Residuals 391
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("===== ANOVA Univariat per DV (setelah kontrol absences) =====\n")
## ===== ANOVA Univariat per DV (setelah kontrol absences) =====
summary.aov(mancova_model)
## Response G1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 36.6 36.611 3.3453 0.06816 .
## address 1 22.7 22.723 2.0762 0.15041
## absences 1 2.2 2.248 0.2054 0.65062
## Residuals 391 4279.1 10.944
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response G2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## sex 1 46.3 46.265 3.3293 0.06882 .
## address 1 92.3 92.318 6.6434 0.01032 *
## absences 1 2.7 2.674 0.1924 0.66115
## Residuals 391 5433.4 13.896
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Pillai ringkasan per efek
pil <- summary(mancova_model, test = "Pillai")$stats
tabel_mancova <- data.frame(
Efek = rownames(pil)[1:(nrow(pil)-1)],
Pillai = round(pil[1:(nrow(pil)-1), "Pillai"], 4),
F_value = round(pil[1:(nrow(pil)-1), "approx F"], 4),
p_value = round(pil[1:(nrow(pil)-1), "Pr(>F)"], 4)
)
tabel_mancova$Kesimpulan <- ifelse(
tabel_mancova$p_value < 0.05,
"Signifikan ✓",
"Tidak Signifikan ✗"
)
kable(tabel_mancova,
caption = "Ringkasan MANCOVA — Pillai's Trace",
col.names = c("Efek", "Pillai", "F", "p-value", "Kesimpulan")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = TRUE) %>%
column_spec(5,
color = ifelse(tabel_mancova$p_value < 0.05, "darkgreen", "red"),
bold = TRUE)
| Efek | Pillai | F | p-value | Kesimpulan | |
|---|---|---|---|---|---|
| sex | sex | 0.0091 | 1.7986 | 0.1669 | Tidak Signifikan ✗ |
| address | address | 0.0217 | 4.3341 | 0.0138 | Signifikan ✓ |
| absences | absences | 0.0006 | 0.1075 | 0.8981 | Tidak Signifikan ✗ |
# Model univariat per DV (kontrol absences)
lm_G1 <- lm(G1 ~ sex + address + absences, data = df)
lm_G2 <- lm(G2 ~ sex + address + absences, data = df)
emm_G1_sex <- emmeans(lm_G1, ~ sex)
emm_G2_sex <- emmeans(lm_G2, ~ sex)
emm_G1_addr <- emmeans(lm_G1, ~ address)
emm_G2_addr <- emmeans(lm_G2, ~ address)
cat("=== Adjusted Means G1 per sex ===\n")
## === Adjusted Means G1 per sex ===
print(summary(emm_G1_sex))
## sex emmean SE df lower.CL upper.CL
## Perempuan 10.5 0.257 391 9.95 11.0
## Laki-laki 11.1 0.264 391 10.55 11.6
##
## Results are averaged over the levels of: address
## Confidence level used: 0.95
cat("\n=== Adjusted Means G2 per sex ===\n")
##
## === Adjusted Means G2 per sex ===
print(summary(emm_G2_sex))
## sex emmean SE df lower.CL upper.CL
## Perempuan 10.1 0.290 391 9.49 10.6
## Laki-laki 10.8 0.298 391 10.18 11.3
##
## Results are averaged over the levels of: address
## Confidence level used: 0.95
cat("\n=== Adjusted Means G1 per address ===\n")
##
## === Adjusted Means G1 per address ===
print(summary(emm_G1_addr))
## address emmean SE df lower.CL upper.CL
## Rural 10.5 0.353 391 9.79 11.2
## Urban 11.1 0.189 391 10.68 11.4
##
## Results are averaged over the levels of: sex
## Confidence level used: 0.95
cat("\n=== Adjusted Means G2 per address ===\n")
##
## === Adjusted Means G2 per address ===
print(summary(emm_G2_addr))
## address emmean SE df lower.CL upper.CL
## Rural 9.83 0.398 391 9.05 10.6
## Urban 10.99 0.213 391 10.57 11.4
##
## Results are averaged over the levels of: sex
## Confidence level used: 0.95
make_emm_plot <- function(emm_obj, fill_col, title_str, y_lab) {
df_plot <- as.data.frame(summary(emm_obj))
x_var <- names(df_plot)[1] # kolom pertama = faktor
ggplot(df_plot, aes_string(x = x_var, y = "emmean", fill = x_var)) +
geom_col(alpha = 0.82, width = 0.5, color = "white") +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
width = 0.15, linewidth = 0.8, color = "gray30") +
geom_text(aes(label = round(emmean, 2)),
vjust = -0.55, size = 4, fontface = "bold") +
scale_fill_manual(values = fill_col) +
labs(title = title_str,
x = x_var, y = y_lab) +
theme_minimal(base_size = 12) +
theme(legend.position = "none")
}
p1 <- make_emm_plot(emm_G1_sex,
c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
"Adj. Mean G1 per Sex\n(dikontrol absences)", "Adj. Mean G1")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
p2 <- make_emm_plot(emm_G2_sex,
c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
"Adj. Mean G2 per Sex\n(dikontrol absences)", "Adj. Mean G2")
p3 <- make_emm_plot(emm_G1_addr,
c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
"Adj. Mean G1 per Address\n(dikontrol absences)", "Adj. Mean G1")
p4 <- make_emm_plot(emm_G2_addr,
c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
"Adj. Mean G2 per Address\n(dikontrol absences)", "Adj. Mean G2")
grid.arrange(p1, p2, p3, p4, ncol = 2,
top = "MANCOVA — Adjusted Means (G1 & G2) setelah Kontrol Absences")
wil_manova <- summary(manova_model, test = "Wilks")$stats
pil_mancova <- summary(mancova_model, test = "Pillai")$stats
efek_names <- c("sex", "address")
rows_manova <- efek_names[efek_names %in% rownames(wil_manova)]
rows_mancova <- efek_names[efek_names %in% rownames(pil_mancova)]
compare_df <- data.frame(
Efek = efek_names,
MANOVA_p = round(wil_manova[rows_manova, "Pr(>F)"], 4),
MANCOVA_p = round(pil_mancova[rows_mancova, "Pr(>F)"], 4)
)
compare_df$Perubahan <- ifelse(
(compare_df$MANOVA_p < 0.05) == (compare_df$MANCOVA_p < 0.05),
"Konsisten ✓", "Berubah setelah kontrol covariate ⚠"
)
kable(compare_df,
caption = "Perbandingan Signifikansi MANOVA vs MANCOVA",
col.names = c("Efek", "p-value MANOVA\n(Wilks)", "p-value MANCOVA\n(Pillai)", "Perubahan")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) %>%
column_spec(4, bold = TRUE)
| Efek | p-value MANOVA (Wilks |
p-value MANCOVA
(Pilla
|
)|Perubahan | |
|---|---|---|---|---|
| sex | sex | 0.1663 | 0.1669 | Konsisten ✓ |
| address | address | 0.0136 | 0.0138 | Konsisten ✓ |
Post-hoc dilakukan pada variabel yang signifikan di MANOVA.
Karena X1 (sex) hanya 2 level, pairwise langsung informatif.
if (!require(emmeans)) install.packages("emmeans")
if (!require(multcomp)) install.packages("multcomp")
## Loading required package: multcomp
## Warning: package 'multcomp' was built under R version 4.5.3
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.5.3
##
## Attaching package: 'mvtnorm'
## The following object is masked from 'package:effectsize':
##
## standardize
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:MVN':
##
## royston
## Loading required package: TH.data
## Warning: package 'TH.data' was built under R version 4.5.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
library(emmeans)
library(multcomp)
lm_G1_sex <- lm(G1 ~ sex, data = df)
emm_ph_G1_sex <- emmeans(lm_G1_sex, ~ sex)
cat("===== Post-hoc: sex → G1 (Tukey) =====\n")
## ===== Post-hoc: sex → G1 (Tukey) =====
print(pairs(emm_ph_G1_sex, adjust = "tukey"))
## contrast estimate SE df t.ratio p.value
## Perempuan - (Laki-laki) -0.61 0.333 393 -1.828 0.0683
lm_G2_sex <- lm(G2 ~ sex, data = df)
emm_ph_G2_sex <- emmeans(lm_G2_sex, ~ sex)
cat("===== Post-hoc: sex → G2 (Tukey) =====\n")
## ===== Post-hoc: sex → G2 (Tukey) =====
print(pairs(emm_ph_G2_sex, adjust = "tukey"))
## contrast estimate SE df t.ratio p.value
## Perempuan - (Laki-laki) -0.685 0.378 393 -1.814 0.0705
lm_G1_addr <- lm(G1 ~ address, data = df)
emm_ph_G1_addr <- emmeans(lm_G1_addr, ~ address)
cat("===== Post-hoc: address → G1 (Tukey) =====\n")
## ===== Post-hoc: address → G1 (Tukey) =====
print(pairs(emm_ph_G1_addr, adjust = "tukey"))
## contrast estimate SE df t.ratio p.value
## Rural - Urban -0.555 0.401 393 -1.385 0.1668
lm_G2_addr <- lm(G2 ~ address, data = df)
emm_ph_G2_addr <- emmeans(lm_G2_addr, ~ address)
cat("===== Post-hoc: address → G2 (Tukey) =====\n")
## ===== Post-hoc: address → G2 (Tukey) =====
print(pairs(emm_ph_G2_addr, adjust = "tukey"))
## contrast estimate SE df t.ratio p.value
## Rural - Urban -1.14 0.452 393 -2.519 0.0122
plot_posthoc <- function(emm_obj, fill_vals, title_str, y_lbl) {
df_p <- as.data.frame(summary(emm_obj))
xv <- names(df_p)[1]
ggplot(df_p, aes_string(x = xv, y = "emmean", fill = xv)) +
geom_col(alpha = 0.8, width = 0.5, color = "white") +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
width = 0.15, linewidth = 0.8, color = "gray40") +
geom_text(aes(label = round(emmean, 2)),
vjust = -0.5, size = 4.2, fontface = "bold") +
scale_fill_manual(values = fill_vals) +
labs(title = title_str, x = xv, y = y_lbl) +
theme_minimal(base_size = 12) +
theme(legend.position = "none")
}
p_ph1 <- plot_posthoc(emm_ph_G1_sex,
c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
"Post-hoc: sex → G1", "Mean G1")
p_ph2 <- plot_posthoc(emm_ph_G2_sex,
c("Perempuan" = "#E91E8C", "Laki-laki" = "#1976D2"),
"Post-hoc: sex → G2", "Mean G2")
p_ph3 <- plot_posthoc(emm_ph_G1_addr,
c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
"Post-hoc: address → G1", "Mean G1")
p_ph4 <- plot_posthoc(emm_ph_G2_addr,
c("Rural" = "#FF8F00", "Urban" = "#388E3C"),
"Post-hoc: address → G2", "Mean G2")
grid.arrange(p_ph1, p_ph2, p_ph3, p_ph4, ncol = 2,
top = "Post-hoc Pairwise Comparisons (Tukey) — MANOVA")
library(effectsize)
# MANOVA — univariate follow-up
aov_manova_G1 <- aov(G1 ~ sex + address, data = df)
aov_manova_G2 <- aov(G2 ~ sex + address, data = df)
eta_G1 <- eta_squared(aov_manova_G1, partial = TRUE)
eta_G2 <- eta_squared(aov_manova_G2, partial = TRUE)
cat("===== Effect Size MANOVA — Y1 (G1) =====\n")
## ===== Effect Size MANOVA — Y1 (G1) =====
print(eta_G1)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## -----------------------------------------
## sex | 8.48e-03 | [0.00, 1.00]
## address | 5.28e-03 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
cat("\n===== Effect Size MANOVA — Y2 (G2) =====\n")
##
## ===== Effect Size MANOVA — Y2 (G2) =====
print(eta_G2)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## -----------------------------------------
## sex | 8.44e-03 | [0.00, 1.00]
## address | 0.02 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
# ANCOVA
cat("\n===== Effect Size ANCOVA — G1 ~ sex + absences =====\n")
##
## ===== Effect Size ANCOVA — G1 ~ sex + absences =====
print(eta_squared(Anova(ancova_model, type = 2), partial = TRUE))
## # Effect Size for ANOVA (Type II)
##
## Parameter | Eta2 (partial) | 95% CI
## -----------------------------------------
## sex | 8.10e-03 | [0.00, 1.00]
## absences | 6.26e-04 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
# MANCOVA univariate
aov_mc_G1 <- aov(G1 ~ sex + address + absences, data = df)
aov_mc_G2 <- aov(G2 ~ sex + address + absences, data = df)
cat("\n===== Effect Size MANCOVA — Y1 (G1) =====\n")
##
## ===== Effect Size MANCOVA — Y1 (G1) =====
print(eta_squared(aov_mc_G1, partial = TRUE))
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## -----------------------------------------
## sex | 8.48e-03 | [0.00, 1.00]
## address | 5.28e-03 | [0.00, 1.00]
## absences | 5.25e-04 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
cat("\n===== Effect Size MANCOVA — Y2 (G2) =====\n")
##
## ===== Effect Size MANCOVA — Y2 (G2) =====
print(eta_squared(aov_mc_G2, partial = TRUE))
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## -----------------------------------------
## sex | 8.44e-03 | [0.00, 1.00]
## address | 0.02 | [0.00, 1.00]
## absences | 4.92e-04 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
# Ambil nilai partial eta squared
get_eta <- function(model, dv_name, model_name) {
eta_obj <- tryCatch(
eta_squared(model, partial = TRUE),
error = function(e) NULL
)
if (is.null(eta_obj)) return(NULL)
df_eta <- as.data.frame(eta_obj)
df_eta$DV <- dv_name
df_eta$Model <- model_name
df_eta
}
eta_rows <- rbind(
get_eta(aov_manova_G1, "G1", "MANOVA"),
get_eta(aov_manova_G2, "G2", "MANOVA"),
get_eta(aov_mc_G1, "G1", "MANCOVA"),
get_eta(aov_mc_G2, "G2", "MANCOVA")
)
if (!is.null(eta_rows)) {
eta_rows$Interpretasi <- ifelse(
eta_rows$Eta2_partial >= 0.14, "Besar (≥.14)",
ifelse(eta_rows$Eta2_partial >= 0.06, "Sedang (≥.06)",
ifelse(eta_rows$Eta2_partial >= 0.01, "Kecil (≥.01)", "Trivial (<.01)"))
)
kable(eta_rows[, c("Model", "DV", "Parameter", "Eta2_partial", "CI_low", "CI_high", "Interpretasi")],
caption = "Tabel Effect Size (Partial η²) — MANOVA & MANCOVA",
digits = 4,
row.names = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = TRUE) %>%
column_spec(7, bold = TRUE,
color = ifelse(eta_rows$Eta2_partial >= 0.06, "darkgreen",
ifelse(eta_rows$Eta2_partial >= 0.01, "orange", "red")))
}
| Model | DV | Parameter | Eta2_partial | CI_low | CI_high | Interpretasi |
|---|---|---|---|---|---|---|
| MANOVA | G1 | sex | 0.0085 | 0.0000 | 1 | Trivial (<.01) |
| MANOVA | G1 | address | 0.0053 | 0.0000 | 1 | Trivial (<.01) |
| MANOVA | G2 | sex | 0.0084 | 0.0000 | 1 | Trivial (<.01) |
| MANOVA | G2 | address | 0.0167 | 0.0022 | 1 | Kecil (≥.01) |
| MANCOVA | G1 | sex | 0.0085 | 0.0000 | 1 | Trivial (<.01) |
| MANCOVA | G1 | address | 0.0053 | 0.0000 | 1 | Trivial (<.01) |
| MANCOVA | G1 | absences | 0.0005 | 0.0000 | 1 | Trivial (<.01) |
| MANCOVA | G2 | sex | 0.0084 | 0.0000 | 1 | Trivial (<.01) |
| MANCOVA | G2 | address | 0.0167 | 0.0022 | 1 | Kecil (≥.01) |
| MANCOVA | G2 | absences | 0.0005 | 0.0000 | 1 | Trivial (<.01) |