Load package

packages <- c(
  "tidyverse",    
  "haven",        
  "skimr",        
  "naniar",       
  "ggcorrplot::ggcorrplot()",   
  "GGally::ggpairs()",
  "GGally::wrap()",       
  "mvnormtest",   
  "MVN::mvn()",          
  "car::leveneTest()",          
  "biotools::boxM()",
  "heplots::manova()",
  "psych::describe()",        
  "gridExtra::grid.arrange()",    
  "ggpubr",       
  "mice",         
  "VIM::aggr()",          
  "moments::skewness()", 
  "moments::kurtosis()",
  "rstatix"       
)

cat("semua package berhasil dimuat.\n\n")

## semua package berhasil dimuat.

Load data

gss <- read.csv("GSS2024.csv")

gss_raw <- read.csv("GSS2024.csv", stringsAsFactors = FALSE, na.strings = c("", "NA", "NaN"))
 
cat(sprintf("Dataset dimuat: %d baris × %d kolom\n\n", nrow(gss_raw), ncol(gss_raw)))

## Dataset dimuat: 3986 baris × 980 kolom

Seleksi variabel relevan

library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.5.3

## Warning: package 'dplyr' was built under R version 4.5.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

dv_wellbeing <- c(
  "happy",   
  "health",   
  "life",     
  "satfin"    
)

dv_trust <- c(
  "trust",   
  "helpful",  
  "fair"      
)

iv_categorical <- c(
  "sex",     
  "race",    
  "marital",  
  "degree"   
) 
covariates <- c(
  "age",    
  "educ",   
  "income"  
)

all_vars <- c(dv_wellbeing, dv_trust, iv_categorical, covariates)
gss <- gss_raw %>%
  select(all_of(all_vars)) %>%
  as.data.frame()

cat("Variabel yang dipilih:\n")

## Variabel yang dipilih:

cat("  DV Kesejahteraan :", paste(dv_wellbeing, collapse = ", "), "\n")

##   DV Kesejahteraan : happy, health, life, satfin

cat("  DV Kepercayaan   :", paste(dv_trust,    collapse = ", "), "\n")

##   DV Kepercayaan   : trust, helpful, fair

cat("  IV Kategorik     :", paste(iv_categorical, collapse = ", "), "\n")

##   IV Kategorik     : sex, race, marital, degree

cat("  Kovariat         :", paste(covariates, collapse = ", "), "\n")

##   Kovariat         : age, educ, income

cat(sprintf("\n  Subset: %d baris × %d kolom\n\n", nrow(gss), ncol(gss)))

## 
##   Subset: 3986 baris × 14 kolom

EDA AWAL

Struktur data

cat("─── Struktur Data ───\n")

## ─── Struktur Data ───

str(gss)

## 'data.frame':    3986 obs. of  14 variables:
##  $ happy  : int  3 2 2 2 3 2 2 3 1 2 ...
##  $ health : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ life   : int  NA NA 2 2 2 NA NA 1 2 2 ...
##  $ satfin : int  3 2 1 2 2 3 2 3 2 2 ...
##  $ trust  : int  NA 2 NA NA NA NA NA 2 NA NA ...
##  $ helpful: int  NA 3 NA NA NA NA NA 2 NA NA ...
##  $ fair   : int  NA 2 NA NA NA NA NA 1 NA NA ...
##  $ sex    : int  1 1 2 1 2 1 2 2 2 1 ...
##  $ race   : int  2 1 1 3 1 3 1 1 2 2 ...
##  $ marital: int  5 5 1 5 3 1 1 3 1 5 ...
##  $ degree : int  3 4 2 1 1 2 1 1 2 1 ...
##  $ age    : int  33 64 69 19 70 53 48 30 60 25 ...
##  $ educ   : int  16 16 14 12 13 14 13 14 14 12 ...
##  $ income : int  12 12 12 12 12 7 12 8 12 NA ...

Statistik deskriptif

library(skimr)

## Warning: package 'skimr' was built under R version 4.5.3

cat("\n─── Ringkasan Statistik (skimr) ───\n")

## 
## ─── Ringkasan Statistik (skimr) ───

skim_result <- skim(gss)
print(skim_result)

## ── Data Summary ────────────────────────
##                            Values
## Name                       gss   
## Number of rows             3986  
## Number of columns          14    
## _______________________          
## Column type frequency:           
##   numeric                  14    
## ________________________         
## Group variables            None  
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable n_missing complete_rate  mean     sd p0 p25 p50 p75 p100 hist 
##  1 happy                32         0.992  2.01  0.646  1   2   2   2    3 ▃▁▇▁▃
##  2 health               22         0.994  2.17  0.749  1   2   2   3    4 ▂▇▁▃▁
##  3 life               1320         0.669  1.70  0.584  1   1   2   2    3 ▅▁▇▁▁
##  4 satfin               22         0.994  2.10  0.731  1   2   2   3    3 ▃▁▇▁▆
##  5 trust              3040         0.237  1.86  0.587  1   2   2   2    3 ▃▁▇▁▂
##  6 helpful            3047         0.236  1.76  0.688  1   1   2   2    3 ▆▁▇▁▂
##  7 fair               3046         0.236  1.66  0.684  1   1   2   2    3 ▇▁▇▁▂
##  8 sex                  30         0.992  1.55  0.498  1   1   2   2    2 ▆▁▁▁▇
##  9 race                 80         0.980  1.54  0.764  1   1   1   2    3 ▇▁▂▁▂
## 10 marital              15         0.996  2.80  1.75   1   1   3   5    5 ▇▁▃▁▆
## 11 degree                7         0.998  1.87  1.27   0   1   1   3    4 ▂▇▂▃▂
## 12 age                 127         0.968 49.0  17.7   18  34  48  64   89 ▆▇▆▇▂
## 13 educ                 34         0.991 14.2   2.92   0  12  14  16   20 ▁▁▅▇▃
## 14 income              423         0.894 11.0   2.48   1  12  12  12   12 ▁▁▁▁▇

Statistik deskriptif per variabel

cat("\n─── Statistik Deskriptif Detil (psych) ───\n")

## 
## ─── Statistik Deskriptif Detil (psych) ───

desc_stats <- psych::describe(gss)
print(desc_stats)

##         vars    n  mean    sd median trimmed   mad min max range  skew kurtosis
## happy      1 3954  2.01  0.65      2    2.02  0.00   1   3     2 -0.01    -0.61
## health     2 3964  2.17  0.75      2    2.15  0.00   1   4     3  0.34    -0.07
## life       3 2666  1.70  0.58      2    1.67  0.00   1   3     2  0.17    -0.60
## satfin     4 3964  2.10  0.73      2    2.12  1.48   1   3     2 -0.15    -1.12
## trust      5  946  1.86  0.59      2    1.83  0.00   1   3     2  0.03    -0.24
## helpful    6  939  1.76  0.69      2    1.70  1.48   1   3     2  0.35    -0.89
## fair       7  940  1.66  0.68      2    1.58  1.48   1   3     2  0.54    -0.79
## sex        8 3956  1.55  0.50      2    1.56  0.00   1   2     1 -0.19    -1.96
## race       9 3906  1.54  0.76      1    1.43  0.00   1   3     2  0.98    -0.59
## marital   10 3971  2.80  1.75      3    2.75  2.97   1   5     4  0.21    -1.70
## degree    11 3979  1.87  1.27      1    1.82  1.48   0   4     4  0.41    -1.16
## age       12 3859 49.03 17.66     48   48.68 22.24  18  89    71  0.16    -1.00
## educ      13 3952 14.24  2.92     14   14.27  2.97   0  20    20 -0.65     2.38
## income    14 3563 11.02  2.48     12   11.72  0.00   1  12    11 -2.95     7.95
##           se
## happy   0.01
## health  0.01
## life    0.01
## satfin  0.01
## trust   0.02
## helpful 0.02
## fair    0.02
## sex     0.01
## race    0.01
## marital 0.03
## degree  0.02
## age     0.28
## educ    0.05
## income  0.04

ANALISIS MISSING VALUE

jumlah dan persentase missing

cat("─── Missing Values per Variabel ───\n")

## ─── Missing Values per Variabel ───

missing_summary <- data.frame(
  Variabel    = names(gss),
  N_Missing   = colSums(is.na(gss)),
  Pct_Missing = round(colMeans(is.na(gss)) * 100, 2)
) %>% arrange(desc(Pct_Missing))
print(missing_summary)

##         Variabel N_Missing Pct_Missing
## helpful  helpful      3047       76.44
## fair        fair      3046       76.42
## trust      trust      3040       76.27
## life        life      1320       33.12
## income    income       423       10.61
## age          age       127        3.19
## race        race        80        2.01
## educ        educ        34        0.85
## happy      happy        32        0.80
## sex          sex        30        0.75
## health    health        22        0.55
## satfin    satfin        22        0.55
## marital  marital        15        0.38
## degree    degree         7        0.18

Total missing value

cat(sprintf("Total missing values : %d\n", sum(is.na(gss))))

## Total missing values : 11245

cat(sprintf("Total sel data       : %d\n", nrow(gss) * ncol(gss)))

## Total sel data       : 55804

cat(sprintf("Persentase missing   : %.2f%%\n\n", mean(is.na(gss)) * 100))

## Persentase missing   : 20.15%

PREPROCESSING

Recording nilai valid

cat("─── Recoding Nilai Tidak Valid ───\n")

## ─── Recoding Nilai Tidak Valid ───

recode_gss_na <- function(x, invalid_codes = c(8, 9, 98, 99)) {
  x[x %in% invalid_codes] <- NA
  return(x)
}
 
ordinal_vars <- c(dv_wellbeing, dv_trust, iv_categorical)
gss_clean <- gss
gss_clean[ordinal_vars] <- lapply(gss[ordinal_vars], recode_gss_na)
 
gss_clean$income[gss_clean$income > 12] <- NA
 
cat("  Recoding selesai. Missing values setelah recoding:\n")

##   Recoding selesai. Missing values setelah recoding:

print(colSums(is.na(gss_clean)))

##   happy  health    life  satfin   trust helpful    fair     sex    race marital 
##      32      22    1320      22    3040    3047    3046      30      80      15 
##  degree     age    educ  income 
##       7     127      34     423

Konversi tipe variabel

cat("\n─── Konversi Tipe Variabel ───\n")

## 
## ─── Konversi Tipe Variabel ───

# Variabel kategorik → factor dengan label
gss_clean$sex <- factor(gss_clean$sex,
                         levels = 1:2,
                         labels = c("Pria", "Wanita"))
 
gss_clean$race <- factor(gss_clean$race,
                          levels = 1:3,
                          labels = c("Putih", "Hitam", "Lainnya"))
 
gss_clean$marital <- factor(gss_clean$marital,
                             levels = 1:5,
                             labels = c("Menikah", "Janda/Duda", "Cerai",
                                        "Pisah", "Belum Menikah"))
 
gss_clean$degree <- factor(gss_clean$degree,
                            levels = 0:4,
                            labels = c("< SMA", "SMA", "Diploma",
                                       "Sarjana", "Pascasarjana"),
                            ordered = TRUE)

# Variabel kontinu → numeric
gss_clean$age    <- as.numeric(gss_clean$age)
gss_clean$educ   <- as.numeric(gss_clean$educ)
gss_clean$income <- as.numeric(gss_clean$income)
 
# Variabel DV → numeric
gss_clean[c(dv_wellbeing, dv_trust)] <- lapply(
  gss_clean[c(dv_wellbeing, dv_trust)], as.numeric
)
 
cat("  Tipe variabel setelah konversi:\n")

##   Tipe variabel setelah konversi:

str(gss_clean)

## 'data.frame':    3986 obs. of  14 variables:
##  $ happy  : num  3 2 2 2 3 2 2 3 1 2 ...
##  $ health : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ life   : num  NA NA 2 2 2 NA NA 1 2 2 ...
##  $ satfin : num  3 2 1 2 2 3 2 3 2 2 ...
##  $ trust  : num  NA 2 NA NA NA NA NA 2 NA NA ...
##  $ helpful: num  NA 3 NA NA NA NA NA 2 NA NA ...
##  $ fair   : num  NA 2 NA NA NA NA NA 1 NA NA ...
##  $ sex    : Factor w/ 2 levels "Pria","Wanita": 1 1 2 1 2 1 2 2 2 1 ...
##  $ race   : Factor w/ 3 levels "Putih","Hitam",..: 2 1 1 3 1 3 1 1 2 2 ...
##  $ marital: Factor w/ 5 levels "Menikah","Janda/Duda",..: 5 5 1 5 3 1 1 3 1 5 ...
##  $ degree : Ord.factor w/ 5 levels "< SMA"<"SMA"<..: 4 5 3 2 2 3 2 2 3 2 ...
##  $ age    : num  33 64 69 19 70 53 48 30 60 25 ...
##  $ educ   : num  16 16 14 12 13 14 13 14 14 12 ...
##  $ income : num  12 12 12 12 12 7 12 8 12 NA ...

Handling missing value

cat("\n─── Penanganan Missing Values ───\n")

## 
## ─── Penanganan Missing Values ───

# Hitung baris dengan data lengkap
n_complete <- sum(complete.cases(gss_clean))
n_total    <- nrow(gss_clean)
cat(sprintf("  Baris complete cases: %d dari %d (%.1f%%)\n\n",
            n_complete, n_total, n_complete / n_total * 100))

##   Baris complete cases: 398 dari 3986 (10.0%)

gss_listwise <- gss_clean %>% drop_na()
cat(sprintf("  [Opsi A] Listwise deletion: %d observasi tersisa\n", nrow(gss_listwise)))

##   [Opsi A] Listwise deletion: 398 observasi tersisa

get_mode <- function(x) {
  ux <- unique(x[!is.na(x)])
  ux[which.max(tabulate(match(x, ux)))]
}
 
gss_imputed <- gss_clean

#Imputasi kovariat kontinu dengan median
for (var in covariates) {
  if (is.numeric(gss_imputed[[var]])) {
    gss_imputed[[var]][is.na(gss_imputed[[var]])] <-
      median(gss_imputed[[var]], na.rm = TRUE)
  }
}
 
# Imputasi DV dengan median
for (var in c(dv_wellbeing, dv_trust)) {
  if (is.numeric(gss_imputed[[var]])) {
    gss_imputed[[var]][is.na(gss_imputed[[var]])] <-
      median(gss_imputed[[var]], na.rm = TRUE)
  }
}
 
# Imputasi variabel kategorik dengan modus
for (var in iv_categorical) {
  if (is.factor(gss_imputed[[var]])) {
    mode_val <- get_mode(gss_imputed[[var]])
    gss_imputed[[var]][is.na(gss_imputed[[var]])] <- mode_val
  }
}
 
cat(sprintf("  [Opsi B] Imputasi median/modus: %d observasi (tidak ada baris hilang)\n",
            nrow(gss_imputed)))

##   [Opsi B] Imputasi median/modus: 3986 observasi (tidak ada baris hilang)

gss_final <- gss_listwise
cat(sprintf("\nDataset final (listwise): %d observasi × %d variabel\n",
            nrow(gss_final), ncol(gss_final)))

## 
## Dataset final (listwise): 398 observasi × 14 variabel

write.csv(gss_final, "GSS2024_clean.csv", row.names = FALSE)
cat(sprintf(" Checkpoint: GSS2024_clean.csv disimpan ke %s\n\n", getwd()))

##  Checkpoint: GSS2024_clean.csv disimpan ke C:/Users/Asus/ANMUL

# ─────────────────────────────────────────────────────────────────────────────

Deteksi dan penanganan outlier

cat("\n─── Deteksi Outlier Univariat ───\n")

## 
## ─── Deteksi Outlier Univariat ───

detect_outlier_iqr <- function(x, multiplier = 1.5) {
  Q1 <- quantile(x, 0.25, na.rm = TRUE)
  Q3 <- quantile(x, 0.75, na.rm = TRUE)
  IQR_val <- Q3 - Q1
  lower <- Q1 - multiplier * IQR_val
  upper <- Q3 + multiplier * IQR_val
  return(x < lower | x > upper)
}
 
outlier_summary <- data.frame()
for (var in c(dv_wellbeing, dv_trust, covariates)) {
  if (is.numeric(gss_final[[var]])) {
    n_out <- sum(detect_outlier_iqr(gss_final[[var]]), na.rm = TRUE)
    pct_out <- round(n_out / nrow(gss_final) * 100, 2)
    outlier_summary <- rbind(outlier_summary,
                              data.frame(Variabel = var,
                                         N_Outlier = n_out,
                                         Pct_Outlier = pct_out))
  }
}
print(outlier_summary)

##    Variabel N_Outlier Pct_Outlier
## 1     happy         0        0.00
## 2    health         0        0.00
## 3      life         0        0.00
## 4    satfin         0        0.00
## 5     trust         0        0.00
## 6   helpful         0        0.00
## 7      fair         0        0.00
## 8       age         0        0.00
## 9      educ         1        0.25
## 10   income        32        8.04

# Winsorizing outlier 
winsorize <- function(x, multiplier = 1.5) {
  Q1 <- quantile(x, 0.25, na.rm = TRUE)
  Q3 <- quantile(x, 0.75, na.rm = TRUE)
  IQR_val <- Q3 - Q1
  lower <- Q1 - multiplier * IQR_val
  upper <- Q3 + multiplier * IQR_val
  x <- pmin(pmax(x, lower), upper)
  return(x)
}

for (var in covariates) {
  if (is.numeric(gss_final[[var]])) {
    gss_final[[var]] <- winsorize(gss_final[[var]])
  }
}
cat("Winsorizing diterapkan pada kovariat kontinu\n")

## Winsorizing diterapkan pada kovariat kontinu

EDA LANJUTAN

Distribusi variabel kategorik

cat("─── Distribusi Variabel Kategorik (Faktor) ───\n")

## ─── Distribusi Variabel Kategorik (Faktor) ───

plot_list_cat <- list()
for (var in iv_categorical) {
  if (is.factor(gss_final[[var]])) {
    freq_tbl <- as.data.frame(table(gss_final[[var]]))
    names(freq_tbl) <- c("Kategori", "Frekuensi")
    freq_tbl$Persen <- round(freq_tbl$Frekuensi / sum(freq_tbl$Frekuensi) * 100, 1)
 
    cat(sprintf("  Variabel: %s\n", var))
    print(freq_tbl)
    cat("\n")
 
    p <- ggplot(freq_tbl, aes(x = Kategori, y = Frekuensi, fill = Kategori)) +
      geom_col(alpha = 0.85) +
      geom_text(aes(label = paste0(Persen, "%")),
                vjust = -0.5, size = 3.5) +
      scale_fill_brewer(palette = "Set2") +
      labs(title = paste("Distribusi:", var),
           x = NULL, y = "Frekuensi") +
      theme_minimal(base_size = 11) +
      theme(legend.position = "none",
            axis.text.x = element_text(angle = 30, hjust = 1))
    plot_list_cat[[var]] <- p
  }
}

##   Variabel: sex
##   Kategori Frekuensi Persen
## 1     Pria       187     47
## 2   Wanita       211     53
## 
##   Variabel: race
##   Kategori Frekuensi Persen
## 1    Putih       232   58.3
## 2    Hitam       112   28.1
## 3  Lainnya        54   13.6
## 
##   Variabel: marital
##        Kategori Frekuensi Persen
## 1       Menikah       140   35.2
## 2    Janda/Duda        45   11.3
## 3         Cerai        70   17.6
## 4         Pisah        15    3.8
## 5 Belum Menikah       128   32.2
## 
##   Variabel: degree
##       Kategori Frekuensi Persen
## 1        < SMA        45   11.3
## 2          SMA       203   51.0
## 3      Diploma        45   11.3
## 4      Sarjana        70   17.6
## 5 Pascasarjana        35    8.8

do.call(gridExtra::grid.arrange, c(plot_list_cat, ncol = 2))

Distribusi variabel kontinu (kovariat)

cat("\n─── Distribusi Kovariat Kontinu ───\n")

## 
## ─── Distribusi Kovariat Kontinu ───

plot_list_cont <- list()
for (var in covariates) {
  if (is.numeric(gss_final[[var]])) {
    skew_val <- round(moments::skewness(gss_final[[var]], na.rm = TRUE), 3)
    kurt_val <- round(moments::kurtosis(gss_final[[var]], na.rm = TRUE), 3)
    cat(sprintf("  %s → Skewness: %.3f, Kurtosis: %.3f\n", var, skew_val, kurt_val))
 
    p <- ggplot(gss_final, aes_string(x = var)) +
      geom_histogram(aes(y = ..density..),
                     bins = 30, fill = "#3498db", color = "white", alpha = 0.7) +
      geom_density(color = "#e74c3c", size = 1.2) +
      labs(title = paste("Distribusi:", var),
           subtitle = paste("Skewness:", skew_val, "| Kurtosis:", kurt_val),
           x = var, y = "Densitas") +
      theme_minimal(base_size = 11)
    plot_list_cont[[var]] <- p
  }
}

##   age → Skewness: -0.005, Kurtosis: 2.014

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

##   educ → Skewness: 0.122, Kurtosis: 3.197
##   income → Skewness: -1.487, Kurtosis: 3.800

do.call(gridExtra::grid.arrange, c(plot_list_cont, ncol = 2))

## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Distribusi variabel dependen

cat("\n─── Distribusi Variabel Dependen ───\n")

## 
## ─── Distribusi Variabel Dependen ───

all_dvs <- c(dv_wellbeing, dv_trust)
plot_list_dv <- list()
for (var in all_dvs) {
  if (is.numeric(gss_final[[var]])) {
    freq_tbl <- as.data.frame(table(gss_final[[var]]))
    names(freq_tbl) <- c("Nilai", "Frekuensi")
    freq_tbl$Persen <- round(freq_tbl$Frekuensi / sum(freq_tbl$Frekuensi) * 100, 1)
 
    p <- ggplot(freq_tbl, aes(x = Nilai, y = Frekuensi)) +
      geom_col(fill = "#9b59b6", alpha = 0.8) +
      geom_text(aes(label = paste0(Persen, "%")),
                vjust = -0.4, size = 3) +
      labs(title = paste("DV:", var), x = "Nilai", y = "Frekuensi") +
      theme_minimal(base_size = 10)
    plot_list_dv[[var]] <- p
  }
}
do.call(gridExtra::grid.arrange, c(plot_list_dv, ncol = 3))

Matriks korelasi variabel dependen

dv_data <- gss_final[, all_dvs]
cor_matrix <- cor(dv_data, use = "complete.obs", method = "pearson")
 
cat("─── Matriks Korelasi antar DV ───\n")

## ─── Matriks Korelasi antar DV ───

print(round(cor_matrix, 3))

##          happy health   life satfin  trust helpful   fair
## happy    1.000  0.221  0.318  0.195  0.006   0.098 -0.108
## health   0.221  1.000  0.196  0.261  0.057   0.106 -0.147
## life     0.318  0.196  1.000  0.180  0.044   0.051 -0.101
## satfin   0.195  0.261  0.180  1.000  0.180   0.179 -0.151
## trust    0.006  0.057  0.044  0.180  1.000   0.353 -0.113
## helpful  0.098  0.106  0.051  0.179  0.353   1.000 -0.163
## fair    -0.108 -0.147 -0.101 -0.151 -0.113  -0.163  1.000

# Heatmap korelasi
p_corr <- ggcorrplot::ggcorrplot(cor_matrix,
                      method     = "circle",
                      type       = "lower",
                      lab        = TRUE,
                      lab_size   = 3.5,
                      colors     = c("#e74c3c", "white", "#3498db"),
                      title      = "Korelasi antar Variabel Dependen (DV)",
                      ggtheme    = theme_minimal()) +
  labs(subtitle = "GSS 2024 | Pearson Correlation")
print(p_corr)

Korelasi variabel dependen dengan variabel kontinu

cat("\n─── Korelasi DV dengan Kovariat ───\n")

## 
## ─── Korelasi DV dengan Kovariat ───

cov_data <- gss_final[, c(all_dvs, covariates)]
cor_dv_cov <- cor(cov_data, use = "complete.obs")[all_dvs, covariates]
print(round(cor_dv_cov, 3))

##            age   educ income
## happy   -0.057 -0.105 -0.124
## health   0.032 -0.153 -0.238
## life    -0.039 -0.088 -0.073
## satfin  -0.188 -0.158 -0.095
## trust   -0.235 -0.077 -0.090
## helpful -0.209 -0.060 -0.092
## fair     0.156  0.089  0.141

UJI ASUMSI MANOVA

Uji normalitas multivariat

cat("─── Uji Normalitas Multivariat (Mardia's Test) ───\n")

## ─── Uji Normalitas Multivariat (Mardia's Test) ───

cat("  H0: Data berdistribusi multivariat normal\n\n")

##   H0: Data berdistribusi multivariat normal

dv_matrix <- as.matrix(gss_final[, all_dvs])
dv_complete_mat <- dv_matrix[complete.cases(dv_matrix), ]
 
mardia_test <- function(X) {
  X   <- scale(X, center = TRUE, scale = FALSE)
  n   <- nrow(X)
  p   <- ncol(X)
  S   <- cov(X)
  S_inv <- solve(S)
  D   <- X %*% S_inv %*% t(X)          # matriks jarak Mahalanobis n×n
 
  # Skewness multivariat
  b1p <- sum(D^3) / n^2
  k_skew <- n * b1p / 6
  df_skew <- p * (p + 1) * (p + 2) / 6
  p_skew  <- pchisq(k_skew, df = df_skew, lower.tail = FALSE)
  
    # Kurtosis multivariat
  b2p  <- sum(diag(D)^2) / n
  mean_b2p <- p * (p + 2)
  se_b2p   <- sqrt(8 * p * (p + 2) / n)
  z_kurt   <- (b2p - mean_b2p) / se_b2p
  p_kurt   <- 2 * pnorm(abs(z_kurt), lower.tail = FALSE)
 
  list(
    skewness   = b1p,  stat_skew = k_skew,  df_skew = df_skew, p_skew = p_skew,
    kurtosis   = b2p,  z_kurt    = z_kurt,               p_kurt = p_kurt,
    n = n, p = p
  )
}
 
mvn_res <- mardia_test(dv_complete_mat)
 
cat(sprintf("  n = %d observasi, p = %d variabel\n\n", mvn_res$n, mvn_res$p))

##   n = 398 observasi, p = 7 variabel

cat("  ┌─────────────────────────────────────────────────────────┐\n")

##   ┌─────────────────────────────────────────────────────────┐

cat("  │              Hasil Mardia's Test                        │\n")

##   │              Hasil Mardia's Test                        │

cat("  ├──────────────┬──────────┬────────┬──────────┬───────────┤\n")

##   ├──────────────┬──────────┬────────┬──────────┬───────────┤

cat("  │ Uji          │ Statistik│   df   │  p-value │ Kesimpulan│\n")

##   │ Uji          │ Statistik│   df   │  p-value │ Kesimpulan│

cat("  ├──────────────┼──────────┼────────┼──────────┼───────────┤\n")

##   ├──────────────┼──────────┼────────┼──────────┼───────────┤

cat(sprintf("  │ Skewness     │ %8.3f │ %6d │  %.4f  │ %s │\n",
            mvn_res$stat_skew, mvn_res$df_skew, mvn_res$p_skew,
            ifelse(mvn_res$p_skew > 0.05, "Normal", "Tdk Normal")))

##   │ Skewness     │  234.714 │     84 │  0.0000  │ Tdk Normal │

cat(sprintf("  │ Kurtosis (z) │ %8.3f │   -    │  %.4f  │ %s │\n",
            mvn_res$z_kurt, mvn_res$p_kurt,
            ifelse(mvn_res$p_kurt > 0.05, "Normal", "Tdk Normal")))

##   │ Kurtosis (z) │   -1.088 │   -    │  0.2768  │ Normal │

cat("  └──────────────┴──────────┴────────┴──────────┴───────────┘\n\n")

##   └──────────────┴──────────┴────────┴──────────┴───────────┘

cat(sprintf("Mardia Skewness  → b1p = %.4f | χ²(%d) = %.4f | p = %.4f  %s\n",
            mvn_res$skewness, mvn_res$df_skew, mvn_res$stat_skew, mvn_res$p_skew,
            ifelse(mvn_res$p_skew > 0.05, "→ Normal Multivariat",
                   "→ TIDAK Normal Multivariat")))

## Mardia Skewness  → b1p = 3.5384 | χ²(84) = 234.7140 | p = 0.0000  → TIDAK Normal Multivariat

cat(sprintf("Mardia Kurtosis  → b2p = %.4f | z = %.4f | p = %.4f  %s\n",
            mvn_res$kurtosis, mvn_res$z_kurt, mvn_res$p_kurt,
            ifelse(mvn_res$p_kurt > 0.05, "→ Normal Multivariat",
                   "→ TIDAK Normal Multivariat")))

## Mardia Kurtosis  → b2p = 61.7762 | z = -1.0875 | p = 0.2768  → Normal Multivariat

cat("Jika tidak normal: MANOVA tetap robust untuk N besar (N > 20 per grup)\n\n")

## Jika tidak normal: MANOVA tetap robust untuk N besar (N > 20 per grup)

# Q-Q Plot Mahalanobis untuk cek normalitas multivariat secara visual
X_c   <- scale(dv_complete_mat, center = TRUE, scale = FALSE)
S_inv <- solve(cov(X_c))
mah_norm <- rowSums((X_c %*% S_inv) * X_c)
df_qq_norm <- data.frame(
  observed = sort(mah_norm),
  expected = qchisq(ppoints(length(mah_norm)), df = ncol(dv_complete_mat))
)
p_qq_norm <- ggplot(df_qq_norm, aes(x = expected, y = observed)) +
  geom_point(alpha = 0.4, color = "#2980b9", size = 1.5) +
  geom_abline(color = "#e74c3c", linewidth = 1, linetype = "dashed") +
  labs(title    = "Q-Q Plot Normalitas Multivariat (Mahalanobis vs Chi-Square)",
       subtitle = paste("Mardia Skewness p =", round(mvn_res$p_skew, 4),
                        "| Kurtosis p =", round(mvn_res$p_kurt, 4)),
       x = "Chi-Square Quantile", y = "Mahalanobis Distance") +
  theme_minimal(base_size = 12)
print(p_qq_norm)

# Uji normalitas univariat per DV (Shapiro-Wilk)
cat("\n  Uji Normalitas Univariat (Shapiro-Wilk) per DV:\n")

## 
##   Uji Normalitas Univariat (Shapiro-Wilk) per DV:

cat(sprintf("  %-10s | %8s | %8s | %s\n", "Variabel", "W", "p-value", "Kesimpulan"))

##   Variabel   |        W |  p-value | Kesimpulan

cat(paste0("  ", strrep("-", 50), "\n"))

##   --------------------------------------------------

for (dv in all_dvs) {
  vals <- gss_final[[dv]][!is.na(gss_final[[dv]])]
 
  if (length(vals) > 5000) vals <- sample(vals, 5000)
  if (length(vals) >= 3) {
    sw <- shapiro.test(vals)
    cat(sprintf("  %-10s | %8.4f | %8.4f | %s\n",
                dv, sw$statistic, sw$p.value,
                ifelse(sw$p.value > 0.05, "Normal", "Tidak Normal️")))
  }
}

##   happy      |   0.7980 |   0.0000 | Tidak Normal️
##   health     |   0.8463 |   0.0000 | Tidak Normal️
##   life       |   0.7280 |   0.0000 | Tidak Normal️
##   satfin     |   0.8038 |   0.0000 | Tidak Normal️
##   trust      |   0.7458 |   0.0000 | Tidak Normal️
##   helpful    |   0.7775 |   0.0000 | Tidak Normal️
##   fair       |   0.7516 |   0.0000 | Tidak Normal️

Uji homogenitas Matrix Varians-Kovarians (Box’s M Test)

cat("\n─── Uji Homogenitas Varians Univariat (Levene's Test) ───\n")

## 
## ─── Uji Homogenitas Varians Univariat (Levene's Test) ───

cat("  H0: Varians antar kelompok sama untuk setiap DV\n\n")

##   H0: Varians antar kelompok sama untuk setiap DV

for (iv in c("sex", "race")) {
  cat(sprintf("  Kelompok berdasarkan: %s\n", iv))
  for (dv in all_dvs) {
    formula_str <- as.formula(paste(dv, "~", iv))
    tryCatch({
      lev_test <- car::leveneTest(formula_str, data = gss_final, center = median)
      p_val    <- lev_test$`Pr(>F)`[1]
      cat(sprintf("    %-10s | F = %6.3f | p = %.4f %s\n",
                  dv,
                  lev_test$`F value`[1],
                  p_val,
                  ifelse(p_val > 0.05, "Homogen", "Tidak homogen")))
    }, error = function(e) {
      cat(sprintf("    %-10s | Error: %s\n", dv, conditionMessage(e)))
    })
  }
  cat("\n")
}

##   Kelompok berdasarkan: sex
##     happy      | F =  0.140 | p = 0.7088 Homogen
##     health     | F =  4.737 | p = 0.0301 Tidak homogen
##     life       | F =  1.880 | p = 0.1712 Homogen
##     satfin     | F =  3.274 | p = 0.0711 Homogen
##     trust      | F =  5.246 | p = 0.0225 Tidak homogen
##     helpful    | F =  0.098 | p = 0.7547 Homogen
##     fair       | F =  0.176 | p = 0.6754 Homogen
## 
##   Kelompok berdasarkan: race
##     happy      | F =  0.307 | p = 0.7356 Homogen
##     health     | F =  0.620 | p = 0.5384 Homogen
##     life       | F =  0.629 | p = 0.5338 Homogen
##     satfin     | F =  1.832 | p = 0.1614 Homogen
##     trust      | F = 11.898 | p = 0.0000 Tidak homogen
##     helpful    | F =  5.484 | p = 0.0045 Tidak homogen
##     fair       | F =  3.354 | p = 0.0359 Tidak homogen

Uji Homogenitas varians univariat (Levene’s Test)

cat("\n─── Uji Homogenitas Matriks Kovarians (Box's M Test) ───\n")

## 
## ─── Uji Homogenitas Matriks Kovarians (Box's M Test) ───

cat("  H0: Matriks varians-kovarians antar kelompok sama (homogen)\n\n")

##   H0: Matriks varians-kovarians antar kelompok sama (homogen)

boxM_manual <- function(data, group) {
  data  <- as.matrix(data[complete.cases(data), ])
  group <- group[complete.cases(data)]
  group <- droplevels(as.factor(group))
  groups <- levels(group)
  p      <- ncol(data)
  n_tot  <- nrow(data)
  k      <- length(groups)
 
  ns   <- tapply(group, group, length)
  covs <- lapply(groups, function(g) cov(data[group == g, , drop = FALSE]))

  S_pool <- Reduce("+", mapply(function(S, n) (n - 1) * S,
                                covs, ns, SIMPLIFY = FALSE)) / (n_tot - k)
 
  # Statistik Box's M
  M <- (n_tot - k) * log(det(S_pool)) -
    sum(mapply(function(S, n) (n - 1) * log(det(S)), covs, ns))
  
    # Koreksi chi-square
  c1 <- (sum(1 / (ns - 1)) - 1 / (n_tot - k)) *
    (2 * p^2 + 3 * p - 1) / (6 * (p + 1) * (k - 1))
  chi_sq <- M * (1 - c1)
  df_chi <- p * (p + 1) * (k - 1) / 2
  p_val  <- pchisq(chi_sq, df = df_chi, lower.tail = FALSE)
 
  list(M = M, chi_sq = chi_sq, df = df_chi, p.value = p_val,
       n_groups = k, n_total = n_tot)
}
 
# Jalankan Box's M untuk sex dan race
for (iv in c("sex", "race")) {
  grp_var  <- gss_final[[iv]]
  dv_sub   <- gss_final[, all_dvs]
  idx_ok   <- complete.cases(dv_sub) & !is.na(grp_var)
  dv_sub   <- dv_sub[idx_ok, ]
  grp_var  <- grp_var[idx_ok]
 
  bm <- boxM_manual(dv_sub, grp_var)
  cat(sprintf("  Box's M Test (berdasarkan %s):\n", iv))
  cat(sprintf("    M = %.4f | χ²(%d) = %.4f | p = %.6f\n",
              bm$M, bm$df, bm$chi_sq, bm$p.value))
  cat(sprintf("     %s\n",
              ifelse(bm$p.value > 0.001,
                     "Matriks kovarians HOMOGEN  (p > 0.001)",
                     "Matriks kovarians TIDAK homogen ️ (p ≤ 0.001)")))
  cat("    💡 Box's M sensitif terhadap N besar; pelanggaran minor masih dapat diterima\n\n")
}

##   Box's M Test (berdasarkan sex):
##     M = 21.6191 | χ²(28) = 21.2145 | p = 0.816291
##      Matriks kovarians HOMOGEN  (p > 0.001)
##     💡 Box's M sensitif terhadap N besar; pelanggaran minor masih dapat diterima
## 
##   Box's M Test (berdasarkan race):
##     M = 67.2356 | χ²(56) = 64.7832 | p = 0.196999
##      Matriks kovarians HOMOGEN  (p > 0.001)
##     💡 Box's M sensitif terhadap N besar; pelanggaran minor masih dapat diterima

Uji normalitas Univariat per Kelompok (Shapiro-Wilk)

cat("─── Uji Normalitas Univariat per Kelompok (Shapiro-Wilk) ───\n")

## ─── Uji Normalitas Univariat per Kelompok (Shapiro-Wilk) ───

cat("  H0: Distribusi normal dalam setiap sel kelompok\n\n")

##   H0: Distribusi normal dalam setiap sel kelompok

for (iv in c("sex", "race")) {
  cat(sprintf("  Kelompok: %s\n", iv))
  groups <- levels(gss_final[[iv]])
  for (grp in groups) {
    subset_data <- gss_final[gss_final[[iv]] == grp, all_dvs, drop = FALSE]
    cat(sprintf("    [%s] n = %d\n", grp, nrow(subset_data)))
    for (dv in all_dvs) {
      vals <- subset_data[[dv]][!is.na(subset_data[[dv]])]
      if (length(vals) >= 3 && length(vals) <= 5000) {
        sw <- shapiro.test(vals)
        cat(sprintf("      %-10s | W = %.4f | p = %.4f %s\n",
                    dv, sw$statistic, sw$p.value,
                    ifelse(sw$p.value > 0.05, "✅", "⚠️")))
      } else {
        cat(sprintf("      %-10s | N = %d (di luar rentang SW)\n", dv, length(vals)))
      }
    }
  }
  cat("\n")
}

##   Kelompok: sex
##     [Pria] n = 187
##       happy      | W = 0.7989 | p = 0.0000 ⚠️
##       health     | W = 0.8623 | p = 0.0000 ⚠️
##       life       | W = 0.7309 | p = 0.0000 ⚠️
##       satfin     | W = 0.8047 | p = 0.0000 ⚠️
##       trust      | W = 0.7549 | p = 0.0000 ⚠️
##       helpful    | W = 0.7724 | p = 0.0000 ⚠️
##       fair       | W = 0.7645 | p = 0.0000 ⚠️
##     [Wanita] n = 211
##       happy      | W = 0.7958 | p = 0.0000 ⚠️
##       health     | W = 0.8246 | p = 0.0000 ⚠️
##       life       | W = 0.7223 | p = 0.0000 ⚠️
##       satfin     | W = 0.7974 | p = 0.0000 ⚠️
##       trust      | W = 0.7263 | p = 0.0000 ⚠️
##       helpful    | W = 0.7814 | p = 0.0000 ⚠️
##       fair       | W = 0.7314 | p = 0.0000 ⚠️
## 
##   Kelompok: race
##     [Putih] n = 232
##       happy      | W = 0.8003 | p = 0.0000 ⚠️
##       health     | W = 0.8345 | p = 0.0000 ⚠️
##       life       | W = 0.7144 | p = 0.0000 ⚠️
##       satfin     | W = 0.8056 | p = 0.0000 ⚠️
##       trust      | W = 0.7736 | p = 0.0000 ⚠️
##       helpful    | W = 0.7647 | p = 0.0000 ⚠️
##       fair       | W = 0.7779 | p = 0.0000 ⚠️
##     [Hitam] n = 112
##       happy      | W = 0.7892 | p = 0.0000 ⚠️
##       health     | W = 0.8502 | p = 0.0000 ⚠️
##       life       | W = 0.7447 | p = 0.0000 ⚠️
##       satfin     | W = 0.8038 | p = 0.0000 ⚠️
##       trust      | W = 0.5866 | p = 0.0000 ⚠️
##       helpful    | W = 0.7525 | p = 0.0000 ⚠️
##       fair       | W = 0.6322 | p = 0.0000 ⚠️
##     [Lainnya] n = 54
##       happy      | W = 0.7885 | p = 0.0000 ⚠️
##       health     | W = 0.8571 | p = 0.0000 ⚠️
##       life       | W = 0.7387 | p = 0.0000 ⚠️
##       satfin     | W = 0.7513 | p = 0.0000 ⚠️
##       trust      | W = 0.7487 | p = 0.0000 ⚠️
##       helpful    | W = 0.7961 | p = 0.0000 ⚠️
##       fair       | W = 0.7381 | p = 0.0000 ⚠️

Uji Multikolinearitas antar variabel dependen

cat("─── Uji Multikolinearitas antar DV ───\n")

## ─── Uji Multikolinearitas antar DV ───

high_corr <- which(abs(cor_matrix) > 0.85 & abs(cor_matrix) < 1.00,
                    arr.ind = TRUE)
if (nrow(high_corr) > 0) {
  cat("️ Pasangan DV dengan korelasi tinggi (|r| > 0.85):\n")
  for (i in seq_len(nrow(high_corr))) {
    r1 <- rownames(cor_matrix)[high_corr[i, 1]]
    c1 <- colnames(cor_matrix)[high_corr[i, 2]]
    if (r1 != c1) {
      cat(sprintf("    %s ~ %s: r = %.3f\n",
                  r1, c1, cor_matrix[high_corr[i, 1], high_corr[i, 2]]))
    }
  }
} else {
  cat("  Tidak ada pasangan DV dengan korelasi terlalu tinggi (|r| > 0.85)\n")
}

##   Tidak ada pasangan DV dengan korelasi terlalu tinggi (|r| > 0.85)

Deteksi Outlier Multivariat (Mahalanobis Distance)

cat("\n─── Deteksi Outlier Multivariat (Mahalanobis Distance) ───\n")

## 
## ─── Deteksi Outlier Multivariat (Mahalanobis Distance) ───

cat("  Kriteria: p < 0.001 berdasarkan distribusi Chi-square (df = jumlah DV)\n\n")

##   Kriteria: p < 0.001 berdasarkan distribusi Chi-square (df = jumlah DV)

dv_complete <- gss_final[complete.cases(gss_final[, all_dvs]), all_dvs]
mah_dist    <- mahalanobis(dv_complete,
                            colMeans(dv_complete),
                            cov(dv_complete))
p_mah       <- pchisq(mah_dist, df = length(all_dvs), lower.tail = FALSE)
n_outlier   <- sum(p_mah < 0.001)
pct_outlier <- round(n_outlier / nrow(dv_complete) * 100, 2)
 
cat(sprintf("  Jumlah outlier multivariat : %d dari %d (%.2f%%)\n",
            n_outlier, nrow(dv_complete), pct_outlier))

##   Jumlah outlier multivariat : 0 dari 398 (0.00%)

# Q-Q Plot Mahalanobis
df_mah <- data.frame(observed = sort(mah_dist),
                      expected  = qchisq(ppoints(length(mah_dist)), df = length(all_dvs)))
p_mah_qq <- ggplot(df_mah, aes(x = expected, y = observed)) +
  geom_point(alpha = 0.4, color = "#2980b9") +
  geom_abline(color = "#e74c3c", size = 1, linetype = "dashed") +
  labs(title    = "Q-Q Plot: Mahalanobis Distance vs Chi-Square",
       subtitle = paste("df =", length(all_dvs), "| Outlier multivariat (p<0.001):", n_outlier),
       x = "Chi-Square Quantile", y = "Mahalanobis Distance") +
  theme_minimal(base_size = 12)
print(p_mah_qq)

gss_final$mahalaobis_dist <- NA
idx_complete <- which(complete.cases(gss_final[, all_dvs]))
gss_final$mahalaobis_dist[idx_complete] <- mah_dist
gss_final$is_mv_outlier <- gss_final$mahalaobis_dist > qchisq(0.999, df = length(all_dvs))
 
cat(sprintf("  Kolom 'is_mv_outlier' ditambahkan ke dataset\n"))

##   Kolom 'is_mv_outlier' ditambahkan ke dataset

gss_no_outlier <- gss_final %>%
  filter(!is_mv_outlier | is.na(is_mv_outlier)) %>%
  select(-mahalaobis_dist, -is_mv_outlier)
cat(sprintf("  Dataset tanpa outlier multivariat: %d observasi\n", nrow(gss_no_outlier)))

##   Dataset tanpa outlier multivariat: 398 observasi

MANOVA

load data clean

packages <- c(
  "tidyverse",   
  "emmeans",     
  "effectsize",  
  "car",         
  "ggpubr",      
  "gridExtra",   
  "rstatix",     
  "knitr",       
  "broom"        
)

cat("Package berhasil dimuat.\n\n")

## Package berhasil dimuat.

gss_export <- read.csv("GSS2024_clean.csv", stringsAsFactors = FALSE)
 
# Konversi ulang ke factor (read.csv tidak menyimpan factor)
gss_export$sex <- factor(gss_export$sex,
                          levels = c("Pria", "Wanita"))
 
gss_export$race <- factor(gss_export$race,
                           levels = c("Putih", "Hitam", "Lainnya"))
 
gss_export$marital <- factor(gss_export$marital,
                              levels = c("Menikah", "Janda/Duda", "Cerai",
                                         "Pisah", "Belum Menikah"))
 
gss_export$degree <- factor(gss_export$degree,
                             levels = c("< SMA", "SMA", "Diploma",
                                        "Sarjana", "Pascasarjana"),
                             ordered = TRUE)

# Definisi variabel
dv_wellbeing    <- c("happy", "health", "life", "satfin")
dv_trust        <- c("trust", "helpful", "fair")
all_dvs         <- c(dv_wellbeing, dv_trust)
iv_categorical  <- c("sex", "race", "marital", "degree")
covariates      <- c("age", "educ", "income")

gss_export[all_dvs]  <- lapply(gss_export[all_dvs], as.numeric)
gss_export[covariates] <- lapply(gss_export[covariates], as.numeric)

analisis_vars <- c(all_dvs, "sex", "race", "age", "educ", "income")
df <- gss_export[complete.cases(gss_export[, analisis_vars]), ]
 
cat(sprintf("  Dataset siap: %d observasi × %d variabel\n", nrow(df), ncol(df)))

##   Dataset siap: 398 observasi × 14 variabel

cat(sprintf("  DV           : %s\n", paste(all_dvs, collapse = ", ")))

##   DV           : happy, health, life, satfin, trust, helpful, fair

cat(sprintf("  IV           : sex, race\n"))

##   IV           : sex, race

cat(sprintf("  Kovariat     : %s\n\n", paste(covariates, collapse = ", ")))

##   Kovariat     : age, educ, income

dv_cbind <- paste0("cbind(", paste(all_dvs, collapse = ", "), ")")
 
make_manova_formula <- function(rhs) {
  as.formula(paste(dv_cbind, "~", rhs))
}

fungsi helper

partial_eta2_manova <- function(model) {
  ss  <- summary(model, test = "Pillai")
  tbl <- ss$stats
  
  aov_list <- summary.aov(model)
  results  <- list()
  for (dv_name in names(aov_list)) {
    aov_tbl <- aov_list[[dv_name]]
    ss_vals <- aov_tbl[, "Sum Sq"]
    df_vals <- aov_tbl[, "Df"]
    terms   <- rownames(aov_tbl)
    for (i in seq_along(terms)) {
      if (terms[i] != "Residuals") {
        ss_effect <- ss_vals[i]
        ss_resid  <- ss_vals[grep("Residuals", terms)]
        pe2 <- ss_effect / (ss_effect + ss_resid)
        results[[paste(dv_name, terms[i], sep = "|")]] <- round(pe2, 4)
      }
    }
  }
  return(results)
}
 
interpret_pe2 <- function(pe2) {
  ifelse(pe2 < 0.01, "Sangat Kecil",
  ifelse(pe2 < 0.06, "Kecil",
  ifelse(pe2 < 0.14, "Sedang", "Besar")))
}


print_manova_table <- function(model, label = "") {
  cat(sprintf("\n  ┌─────────────────────────────────────────────────────────────────────┐\n"))
  cat(sprintf("  │  MANOVA: %-60s│\n", label))
  cat(sprintf("  ├───────────────────┬──────────┬───────┬────────┬──────────┬──────────┤\n"))
  cat(sprintf("  │ Uji               │ Stat     │ df1   │ df2    │ F        │ p-value  │\n"))
  cat(sprintf("  ├───────────────────┼──────────┼───────┼────────┼──────────┼──────────┤\n"))
 
  tests <- c("Pillai", "Wilks", "Hotelling-Lawley", "Roy")
  for (test in tests) {
    s    <- summary(model, test = test)$stats
    nrow_s <- nrow(s) - 1   # baris terakhir adalah Residuals
    for (i in seq_len(nrow_s)) {
      effect_name <- rownames(s)[i]
      stat_val    <- s[i, 2]
      df1         <- s[i, 3]
      df2         <- s[i, 4]
      f_val       <- s[i, 5]
      p_val       <- s[i, 6]
      sig         <- ifelse(is.na(p_val), "",
                     ifelse(p_val < 0.001, "***",
                     ifelse(p_val < 0.01,  "**",
                     ifelse(p_val < 0.05,  "*", ""))))
      cat(sprintf("  │ %-6s %-12s│ %8.4f │ %5.0f │ %6.2f │ %8.4f │ %6.4f%s  │\n",
                  test, effect_name, 
                  stat_val, df1, df2, f_val, ifelse(is.na(p_val), 0, p_val), sig))
    }
    if (test != "Roy") cat("  │                   │          │       │        │          │          │\n")
  }
  cat("  └───────────────────┴──────────┴───────┴────────┴──────────┴──────────┘\n")
  cat("  Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05\n\n")
}

MANOVA ONE WAY

sex

cat("\n─── MANOVA: DV ~ sex ───\n")

## 
## ─── MANOVA: DV ~ sex ───

manova_sex <- manova(make_manova_formula("sex"), data = df)
print_manova_table(manova_sex, "DV ~ sex")

## 
##   ┌─────────────────────────────────────────────────────────────────────┐
##   │  MANOVA: DV ~ sex                                                    │
##   ├───────────────────┬──────────┬───────┬────────┬──────────┬──────────┤
##   │ Uji               │ Stat     │ df1   │ df2    │ F        │ p-value  │
##   ├───────────────────┼──────────┼───────┼────────┼──────────┼──────────┤
##   │ Pillai sex         │   0.0335 │     2 │   7.00 │ 390.0000 │ 0.0638  │
##   │                   │          │       │        │          │          │
##   │ Wilks  sex         │   0.9665 │     2 │   7.00 │ 390.0000 │ 0.0638  │
##   │                   │          │       │        │          │          │
##   │ Hotelling-Lawley sex         │   0.0346 │     2 │   7.00 │ 390.0000 │ 0.0638  │
##   │                   │          │       │        │          │          │
##   │ Roy    sex         │   0.0346 │     2 │   7.00 │ 390.0000 │ 0.0638  │
##   └───────────────────┴──────────┴───────┴────────┴──────────┴──────────┘
##   Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05

# Follow-up: ANOVA univariat per DV
cat("  Follow-up ANOVA Univariat (DV ~ sex):\n")

##   Follow-up ANOVA Univariat (DV ~ sex):

cat(sprintf("  %-10s | %8s | %5s | %8s | %-12s\n",
            "DV", "F", "df", "p-value", "Signifikan"))

##   DV         |        F |    df |  p-value | Signifikan

cat(paste0("  ", strrep("-", 55), "\n"))

##   -------------------------------------------------------

aov_sex <- summary.aov(manova_sex)
for (dv_name in all_dvs) {
  tbl   <- aov_sex[[dv_name]]
  f_val <- tbl["sex", "F value"]
  p_val <- tbl["sex", "Pr(>F)"]
  sig   <- ifelse(p_val < 0.001, "***",
           ifelse(p_val < 0.01,  "**",
           ifelse(p_val < 0.05,  "*",
           ifelse(p_val < 0.10,  ".",  "ns"))))
  cat(sprintf("  %-10s | %8.4f | %5.0f | %8.4f | %s\n",
              dv_name, f_val, tbl["sex", "Df"], p_val, sig))
}
cat("  Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05 | . p<0.10 | ns = tidak signifikan\n\n")

##   Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05 | . p<0.10 | ns = tidak signifikan

race

cat("─── MANOVA: DV ~ race ───\n")

## ─── MANOVA: DV ~ race ───

manova_race <- manova(make_manova_formula("race"), data = df)
print_manova_table(manova_race, "DV ~ race")

## 
##   ┌─────────────────────────────────────────────────────────────────────┐
##   │  MANOVA: DV ~ race                                                   │
##   ├───────────────────┬──────────┬───────┬────────┬──────────┬──────────┤
##   │ Uji               │ Stat     │ df1   │ df2    │ F        │ p-value  │
##   ├───────────────────┼──────────┼───────┼────────┼──────────┼──────────┤
##   │ Pillai race        │   0.1050 │     3 │  14.00 │ 780.0000 │ 0.0001***  │
##   │                   │          │       │        │          │          │
##   │ Wilks  race        │   0.8972 │     3 │  14.00 │ 778.0000 │ 0.0001***  │
##   │                   │          │       │        │          │          │
##   │ Hotelling-Lawley race        │   0.1122 │     3 │  14.00 │ 776.0000 │ 0.0001***  │
##   │                   │          │       │        │          │          │
##   │ Roy    race        │   0.0829 │     5 │   7.00 │ 390.0000 │ 0.0001***  │
##   └───────────────────┴──────────┴───────┴────────┴──────────┴──────────┘
##   Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05

cat("  Follow-up ANOVA Univariat (DV ~ race):\n")

##   Follow-up ANOVA Univariat (DV ~ race):

cat(sprintf("  %-10s | %8s | %5s | %8s | %-12s\n",
            "DV", "F", "df", "p-value", "Signifikan"))

##   DV         |        F |    df |  p-value | Signifikan

cat(paste0("  ", strrep("-", 55), "\n"))

##   -------------------------------------------------------

aov_race <- summary.aov(manova_race)
for (dv_name in all_dvs) {
  tbl   <- aov_race[[dv_name]]
  f_val <- tbl["race", "F value"]
  p_val <- tbl["race", "Pr(>F)"]
  sig   <- ifelse(p_val < 0.001, "***",
           ifelse(p_val < 0.01,  "**",
           ifelse(p_val < 0.05,  "*",
           ifelse(p_val < 0.10,  ".",  "ns"))))
  cat(sprintf("  %-10s | %8.4f | %5.0f | %8.4f | %s\n",
              dv_name, f_val, tbl["race", "Df"], p_val, sig))
}
cat("\n")

degree

cat("─── MANOVA: DV ~ degree ───\n")

## ─── MANOVA: DV ~ degree ───

manova_degree <- manova(make_manova_formula("degree"), data = df)
print_manova_table(manova_degree, "DV ~ degree")

## 
##   ┌─────────────────────────────────────────────────────────────────────┐
##   │  MANOVA: DV ~ degree                                                 │
##   ├───────────────────┬──────────┬───────┬────────┬──────────┬──────────┤
##   │ Uji               │ Stat     │ df1   │ df2    │ F        │ p-value  │
##   ├───────────────────┼──────────┼───────┼────────┼──────────┼──────────┤
##   │ Pillai degree      │   0.1182 │     2 │  28.00 │ 1560.0000 │ 0.0131*  │
##   │                   │          │       │        │          │          │
##   │ Wilks  degree      │   0.8846 │     2 │  28.00 │ 1396.7705 │ 0.0109*  │
##   │                   │          │       │        │          │          │
##   │ Hotelling-Lawley degree      │   0.1272 │     2 │  28.00 │ 1542.0000 │ 0.0091**  │
##   │                   │          │       │        │          │          │
##   │ Roy    degree      │   0.0967 │     5 │   7.00 │ 390.0000 │ 0.0000***  │
##   └───────────────────┴──────────┴───────┴────────┴──────────┴──────────┘
##   Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05

cat("  Follow-up ANOVA Univariat (DV ~ degree):\n")

##   Follow-up ANOVA Univariat (DV ~ degree):

cat(sprintf("  %-10s | %8s | %5s | %8s | %-12s\n",
            "DV", "F", "df", "p-value", "Signifikan"))

##   DV         |        F |    df |  p-value | Signifikan

cat(paste0("  ", strrep("-", 55), "\n"))

##   -------------------------------------------------------

aov_degree <- summary.aov(manova_degree)
for (dv_name in all_dvs) {
  tbl   <- aov_degree[[dv_name]]
  f_val <- tbl["degree.L", "F value"]   # .L = linear trend (ordered factor)
  p_val <- tbl["degree.L", "Pr(>F)"]
  sig   <- ifelse(p_val < 0.001, "***",
           ifelse(p_val < 0.01,  "**",
           ifelse(p_val < 0.05,  "*",
           ifelse(p_val < 0.10,  ".",  "ns"))))
  cat(sprintf("  %-10s | %8.4f | %5.0f | %8.4f | %s\n",
              dv_name, f_val, tbl["degree.L", "Df"], p_val, sig))
}
cat("\n")

MANOVA TWO WAY

sex vs race

cat("═══════════════════════════════════════════════════════════\n")

## ═══════════════════════════════════════════════════════════

cat("  BAGIAN 3: MANOVA DUA FAKTOR (sex × race)\n")

##   BAGIAN 3: MANOVA DUA FAKTOR (sex × race)

cat("═══════════════════════════════════════════════════════════\n\n")

## ═══════════════════════════════════════════════════════════

manova_2way <- manova(make_manova_formula("sex * race"), data = df)
 
cat("─── Hasil Pillai's Trace (sex × race) ───\n")

## ─── Hasil Pillai's Trace (sex × race) ───

sum_2way <- summary(manova_2way, test = "Pillai")
print(sum_2way)

##            Df   Pillai approx F num Df den Df    Pr(>F)    
## sex         1 0.034773   1.9865      7    386   0.05583 .  
## race        2 0.108020   3.1565     14    774 7.653e-05 ***
## sex:race    2 0.044932   1.2706     14    774   0.21971    
## Residuals 392                                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n─── Follow-up ANOVA Univariat (sex × race) ───\n")

## 
## ─── Follow-up ANOVA Univariat (sex × race) ───

cat(sprintf("  %-10s | %-12s | %8s | %5s | %8s | %s\n",
            "DV", "Efek", "F", "df", "p-value", "Sig"))

##   DV         | Efek         |        F |    df |  p-value | Sig

cat(paste0("  ", strrep("-", 65), "\n"))

##   -----------------------------------------------------------------

aov_2way <- summary.aov(manova_2way)
efek_list <- c("sex", "race", "sex:race")
for (dv_name in all_dvs) {
  tbl <- aov_2way[[dv_name]]
  for (efek in efek_list) {
    if (efek %in% rownames(tbl)) {
      f_val <- tbl[efek, "F value"]
      p_val <- tbl[efek, "Pr(>F)"]
      df_v  <- tbl[efek, "Df"]
      sig   <- ifelse(p_val < 0.001, "***",
               ifelse(p_val < 0.01,  "**",
               ifelse(p_val < 0.05,  "*",
               ifelse(p_val < 0.10,  ".",  "ns"))))
      cat(sprintf("  %-10s | %-12s | %8.4f | %5.0f | %8.4f | %s\n",
                  dv_name, efek, f_val, df_v, p_val, sig))
    }
  }
  cat(paste0("  ", strrep("·", 65), "\n"))
}

##   ·································································
##   ·································································
##   ·································································
##   ·································································
##   ·································································
##   ·································································
##   ·································································

# Interaksi plot (sex × race) untuk setiap DV
cat("\n─── Interaksi Plot: sex × race ───\n")

## 
## ─── Interaksi Plot: sex × race ───

interaction_plots <- list()
for (dv in all_dvs) {
  mean_int <- df %>%
    group_by(sex, race) %>%
    summarise(mean_dv = mean(.data[[dv]], na.rm = TRUE),
              se_dv   = sd(.data[[dv]], na.rm = TRUE) / sqrt(n()),
              .groups = "drop")
 
  p_int <- ggplot(mean_int, aes(x = race, y = mean_dv, color = sex, group = sex)) +
    geom_line(linewidth = 1) + # Sedikit tipiskan garis
    geom_point(size = 2) +      # Sedikit kecilkan titik
    geom_errorbar(aes(ymin = mean_dv - se_dv, ymax = mean_dv + se_dv),
                  width = 0.1, linewidth = 0.5) +
    scale_color_manual(values = c("#2980b9", "#e74c3c")) +
    labs(title = dv,           # Judul cukup nama variabelnya saja
         x = "",               # Kosongkan label X agar tidak penuh
         y = "Mean") +         # Persingkat label Y
    theme_minimal(base_size = 9) +
    theme(legend.position = "none") # Sembunyikan legenda individual
  interaction_plots[[dv]] <- p_int
}
do.call(gridExtra::grid.arrange,
        c(interaction_plots, ncol = 3,
          top = "Interaksi Plot MANOVA Dua Faktor: sex × race"))

MANCOVA

load data clean

packages <- c(
  "tidyverse",   
  "emmeans",     
  "effectsize",  
  "car",         
  "ggpubr",      
  "gridExtra",   
  "rstatix",     
  "knitr",       
  "broom"        
)

cat("Package berhasil dimuat.\n\n")

## Package berhasil dimuat.

gss_export <- read.csv("GSS2024_clean.csv", stringsAsFactors = FALSE)

gss_export$sex <- factor(gss_export$sex,
                          levels = c("Pria", "Wanita"))

gss_export$race <- factor(gss_export$race,
                           levels = c("Putih", "Hitam", "Lainnya"))

gss_export$marital <- factor(gss_export$marital,
                              levels = c("Menikah", "Janda/Duda", "Cerai",
                                         "Pisah", "Belum Menikah"))

gss_export$degree <- factor(gss_export$degree,
                             levels = c("< SMA", "SMA", "Diploma",
                                        "Sarjana", "Pascasarjana"),
                             ordered = TRUE)

# Definisi variabel
dv_wellbeing    <- c("happy", "health", "life", "satfin")
dv_trust        <- c("trust", "helpful", "fair")
all_dvs         <- c(dv_wellbeing, dv_trust)
iv_categorical  <- c("sex", "race", "marital", "degree")
covariates      <- c("age", "educ", "income")

gss_export[all_dvs]    <- lapply(gss_export[all_dvs], as.numeric)
gss_export[covariates] <- lapply(gss_export[covariates], as.numeric)

analisis_vars <- c(all_dvs, "sex", "race", "age", "educ", "income")
df <- gss_export[complete.cases(gss_export[, analisis_vars]), ]

cat(sprintf("  Dataset siap: %d observasi x %d variabel\n", nrow(df), ncol(df)))

##   Dataset siap: 398 observasi x 14 variabel

cat(sprintf("  DV           : %s\n", paste(all_dvs, collapse = ", ")))

##   DV           : happy, health, life, satfin, trust, helpful, fair

cat(sprintf("  IV           : sex, race\n"))

##   IV           : sex, race

cat(sprintf("  Kovariat     : %s\n\n", paste(covariates, collapse = ", ")))

##   Kovariat     : age, educ, income

dv_cbind <- paste0("cbind(", paste(all_dvs, collapse = ", "), ")")

make_mancova_formula <- function(rhs) {
  as.formula(paste(dv_cbind, "~", rhs))
}

fungsi helper

# Hitung partial eta-squared dari objek mancova
partial_eta2_mancova <- function(model) {
  aov_list <- summary.aov(model)
  results  <- list()
  for (dv_name in names(aov_list)) {
    aov_tbl <- aov_list[[dv_name]]
    ss_vals <- aov_tbl[, "Sum Sq"]
    terms   <- rownames(aov_tbl)
    for (i in seq_along(terms)) {
      if (terms[i] != "Residuals") {
        ss_effect <- ss_vals[i]
        ss_resid  <- ss_vals[grep("Residuals", terms)]
        pe2 <- ss_effect / (ss_effect + ss_resid)
        results[[paste(dv_name, terms[i], sep = "|")]] <- round(pe2, 4)
      }
    }
  }
  return(results)
}

# Interpretasi partial eta-squared
interpret_pe2 <- function(pe2) {
  ifelse(pe2 < 0.01, "Sangat Kecil",
  ifelse(pe2 < 0.06, "Kecil",
  ifelse(pe2 < 0.14, "Sedang", "Besar")))
}

print_mancova_table <- function(model, label = "") {
  cat(sprintf("\n  +-------------------------------------------------------------------------+\n"))
  cat(sprintf("  |  MANCOVA: %-60s|\n", label))
  cat(sprintf("  +-------------------+----------+-------+--------+----------+----------+\n"))
  cat(sprintf("  | Uji               | Stat     | df1   | df2    | F        | p-value  |\n"))
  cat(sprintf("  +-------------------+----------+-------+--------+----------+----------+\n"))

  tests <- c("Pillai", "Wilks", "Hotelling-Lawley", "Roy")
  for (test in tests) {
    s      <- summary(model, test = test)$stats
    nrow_s <- nrow(s) - 1   # baris terakhir adalah Residuals
    for (i in seq_len(nrow_s)) {
      effect_name <- rownames(s)[i]
      stat_val    <- s[i, 2]
      df1         <- s[i, 3]
      df2         <- s[i, 4]
      f_val       <- s[i, 5]
      p_val       <- s[i, 6]
      sig         <- ifelse(is.na(p_val), "",
                     ifelse(p_val < 0.001, "***",
                     ifelse(p_val < 0.01,  "**",
                     ifelse(p_val < 0.05,  "*", ""))))
      cat(sprintf("  | %-6s %-12s| %8.4f | %5.0f | %6.2f | %8.4f | %6.4f%s  |\n",
                  test, effect_name,
                  stat_val, df1, df2, f_val,
                  ifelse(is.na(p_val), 0, p_val), sig))
    }
    if (test != "Roy") cat("  |                   |          |       |        |          |          |\n")
  }
  cat("  +-------------------+----------+-------+--------+----------+----------+\n")
  cat("  Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05\n\n")
}

MANCOVA ONE WAY

sex

cat("\n --- MANCOVA: DV ~ sex + age + educ + income ---\n")

## 
##  --- MANCOVA: DV ~ sex + age + educ + income ---

mancova_sex <- manova(make_mancova_formula("sex + age + educ + income"), data = df)
print_mancova_table(mancova_sex, "DV ~ sex + age + educ + income")

## 
##   +-------------------------------------------------------------------------+
##   |  MANCOVA: DV ~ sex + age + educ + income                              |
##   +-------------------+----------+-------+--------+----------+----------+
##   | Uji               | Stat     | df1   | df2    | F        | p-value  |
##   +-------------------+----------+-------+--------+----------+----------+
##   | Pillai sex         |   0.0364 |     2 |   7.00 | 387.0000 | 0.0436*  |
##   | Pillai age         |   0.1143 |     7 |   7.00 | 387.0000 | 0.0000***  |
##   | Pillai educ        |   0.0463 |     3 |   7.00 | 387.0000 | 0.0100*  |
##   | Pillai income      |   0.0557 |     3 |   7.00 | 387.0000 | 0.0022**  |
##   |                   |          |       |        |          |          |
##   | Wilks  sex         |   0.9636 |     2 |   7.00 | 387.0000 | 0.0436*  |
##   | Wilks  age         |   0.8857 |     7 |   7.00 | 387.0000 | 0.0000***  |
##   | Wilks  educ        |   0.9537 |     3 |   7.00 | 387.0000 | 0.0100*  |
##   | Wilks  income      |   0.9443 |     3 |   7.00 | 387.0000 | 0.0022**  |
##   |                   |          |       |        |          |          |
##   | Hotelling-Lawley sex         |   0.0378 |     2 |   7.00 | 387.0000 | 0.0436*  |
##   | Hotelling-Lawley age         |   0.1291 |     7 |   7.00 | 387.0000 | 0.0000***  |
##   | Hotelling-Lawley educ        |   0.0486 |     3 |   7.00 | 387.0000 | 0.0100*  |
##   | Hotelling-Lawley income      |   0.0590 |     3 |   7.00 | 387.0000 | 0.0022**  |
##   |                   |          |       |        |          |          |
##   | Roy    sex         |   0.0378 |     2 |   7.00 | 387.0000 | 0.0436*  |
##   | Roy    age         |   0.1291 |     7 |   7.00 | 387.0000 | 0.0000***  |
##   | Roy    educ        |   0.0486 |     3 |   7.00 | 387.0000 | 0.0100*  |
##   | Roy    income      |   0.0590 |     3 |   7.00 | 387.0000 | 0.0022**  |
##   +-------------------+----------+-------+--------+----------+----------+
##   Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05

cat("  Follow-up ANCOVA Univariat (DV ~ sex + kovariat):\n")

##   Follow-up ANCOVA Univariat (DV ~ sex + kovariat):

cat(sprintf("  %-10s | %8s | %5s | %8s | %-12s\n",
            "DV", "F", "df", "p-value", "Signifikan"))

##   DV         |        F |    df |  p-value | Signifikan

cat(paste0("  ", strrep("-", 55), "\n"))

##   -------------------------------------------------------

aov_mancova_sex <- summary.aov(mancova_sex)
for (dv_name in all_dvs) {
  tbl   <- aov_mancova_sex[[dv_name]]
  f_val <- tbl["sex", "F value"]
  p_val <- tbl["sex", "Pr(>F)"]
  sig   <- ifelse(p_val < 0.001, "***",
           ifelse(p_val < 0.01,  "**",
           ifelse(p_val < 0.05,  "*",
           ifelse(p_val < 0.10,  ".",  "ns"))))
  cat(sprintf("  %-10s | %8.4f | %5.0f | %8.4f | %s\n",
              dv_name, f_val, tbl["sex", "Df"], p_val, sig))
}
cat("  Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05 | . p<0.10 | ns = tidak signifikan\n\n")

##   Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05 | . p<0.10 | ns = tidak signifikan

race

cat("--- MANCOVA: DV ~ race + age + educ + income ---\n")

## --- MANCOVA: DV ~ race + age + educ + income ---

mancova_race <- manova(make_mancova_formula("race + age + educ + income"), data = df)
print_mancova_table(mancova_race, "DV ~ race + age + educ + income")

## 
##   +-------------------------------------------------------------------------+
##   |  MANCOVA: DV ~ race + age + educ + income                             |
##   +-------------------+----------+-------+--------+----------+----------+
##   | Uji               | Stat     | df1   | df2    | F        | p-value  |
##   +-------------------+----------+-------+--------+----------+----------+
##   | Pillai race        |   0.1094 |     3 |  14.00 | 774.0000 | 0.0001***  |
##   | Pillai age         |   0.0967 |     6 |   7.00 | 386.0000 | 0.0000***  |
##   | Pillai educ        |   0.0427 |     2 |   7.00 | 386.0000 | 0.0177*  |
##   | Pillai income      |   0.0528 |     3 |   7.00 | 386.0000 | 0.0037**  |
##   |                   |          |       |        |          |          |
##   | Wilks  race        |   0.8929 |     3 |  14.00 | 772.0000 | 0.0001***  |
##   | Wilks  age         |   0.9033 |     6 |   7.00 | 386.0000 | 0.0000***  |
##   | Wilks  educ        |   0.9573 |     2 |   7.00 | 386.0000 | 0.0177*  |
##   | Wilks  income      |   0.9472 |     3 |   7.00 | 386.0000 | 0.0037**  |
##   |                   |          |       |        |          |          |
##   | Hotelling-Lawley race        |   0.1173 |     3 |  14.00 | 770.0000 | 0.0001***  |
##   | Hotelling-Lawley age         |   0.1070 |     6 |   7.00 | 386.0000 | 0.0000***  |
##   | Hotelling-Lawley educ        |   0.0446 |     2 |   7.00 | 386.0000 | 0.0177*  |
##   | Hotelling-Lawley income      |   0.0558 |     3 |   7.00 | 386.0000 | 0.0037**  |
##   |                   |          |       |        |          |          |
##   | Roy    race        |   0.0869 |     5 |   7.00 | 387.0000 | 0.0000***  |
##   | Roy    age         |   0.1070 |     6 |   7.00 | 386.0000 | 0.0000***  |
##   | Roy    educ        |   0.0446 |     2 |   7.00 | 386.0000 | 0.0177*  |
##   | Roy    income      |   0.0558 |     3 |   7.00 | 386.0000 | 0.0037**  |
##   +-------------------+----------+-------+--------+----------+----------+
##   Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05

cat("  Follow-up ANCOVA Univariat (DV ~ race + kovariat):\n")

##   Follow-up ANCOVA Univariat (DV ~ race + kovariat):

cat(sprintf("  %-10s | %8s | %5s | %8s | %-12s\n",
            "DV", "F", "df", "p-value", "Signifikan"))

##   DV         |        F |    df |  p-value | Signifikan

cat(paste0("  ", strrep("-", 55), "\n"))

##   -------------------------------------------------------

aov_mancova_race <- summary.aov(mancova_race)
for (dv_name in all_dvs) {
  tbl   <- aov_mancova_race[[dv_name]]
  f_val <- tbl["race", "F value"]
  p_val <- tbl["race", "Pr(>F)"]
  sig   <- ifelse(p_val < 0.001, "***",
           ifelse(p_val < 0.01,  "**",
           ifelse(p_val < 0.05,  "*",
           ifelse(p_val < 0.10,  ".",  "ns"))))
  cat(sprintf("  %-10s | %8.4f | %5.0f | %8.4f | %s\n",
              dv_name, f_val, tbl["race", "Df"], p_val, sig))
}
cat("\n")

degree

cat("--- MANCOVA: DV ~ degree + age + educ + income ---\n")

## --- MANCOVA: DV ~ degree + age + educ + income ---

mancova_degree <- manova(make_mancova_formula("degree + age + educ + income"), data = df)
print_mancova_table(mancova_degree, "DV ~ degree + age + educ + income")

## 
##   +-------------------------------------------------------------------------+
##   |  MANCOVA: DV ~ degree + age + educ + income                           |
##   +-------------------+----------+-------+--------+----------+----------+
##   | Uji               | Stat     | df1   | df2    | F        | p-value  |
##   +-------------------+----------+-------+--------+----------+----------+
##   | Pillai degree      |   0.1261 |     2 |  28.00 | 1548.0000 | 0.0065**  |
##   | Pillai age         |   0.1049 |     6 |   7.00 | 384.0000 | 0.0000***  |
##   | Pillai educ        |   0.0046 |     0 |   7.00 | 384.0000 | 0.9718  |
##   | Pillai income      |   0.0534 |     3 |   7.00 | 384.0000 | 0.0035**  |
##   |                   |          |       |        |          |          |
##   | Wilks  degree      |   0.8771 |     2 |  28.00 | 1385.9539 | 0.0051**  |
##   | Wilks  age         |   0.8951 |     6 |   7.00 | 384.0000 | 0.0000***  |
##   | Wilks  educ        |   0.9954 |     0 |   7.00 | 384.0000 | 0.9718  |
##   | Wilks  income      |   0.9466 |     3 |   7.00 | 384.0000 | 0.0035**  |
##   |                   |          |       |        |          |          |
##   | Hotelling-Lawley degree      |   0.1366 |     2 |  28.00 | 1530.0000 | 0.0040**  |
##   | Hotelling-Lawley age         |   0.1172 |     6 |   7.00 | 384.0000 | 0.0000***  |
##   | Hotelling-Lawley educ        |   0.0046 |     0 |   7.00 | 384.0000 | 0.9718  |
##   | Hotelling-Lawley income      |   0.0564 |     3 |   7.00 | 384.0000 | 0.0035**  |
##   |                   |          |       |        |          |          |
##   | Roy    degree      |   0.1049 |     6 |   7.00 | 387.0000 | 0.0000***  |
##   | Roy    age         |   0.1172 |     6 |   7.00 | 384.0000 | 0.0000***  |
##   | Roy    educ        |   0.0046 |     0 |   7.00 | 384.0000 | 0.9718  |
##   | Roy    income      |   0.0564 |     3 |   7.00 | 384.0000 | 0.0035**  |
##   +-------------------+----------+-------+--------+----------+----------+
##   Signifikansi: *** p<0.001 | ** p<0.01 | * p<0.05

cat("  Follow-up ANCOVA Univariat (DV ~ degree + kovariat):\n")

##   Follow-up ANCOVA Univariat (DV ~ degree + kovariat):

cat(sprintf("  %-10s | %8s | %5s | %8s | %-12s\n",
            "DV", "F", "df", "p-value", "Signifikan"))

##   DV         |        F |    df |  p-value | Signifikan

cat(paste0("  ", strrep("-", 55), "\n"))

##   -------------------------------------------------------

aov_mancova_degree <- summary.aov(mancova_degree)
for (dv_name in all_dvs) {
  tbl   <- aov_mancova_degree[[dv_name]]
  f_val <- tbl["degree.L", "F value"]   # .L = linear trend (ordered factor)
  p_val <- tbl["degree.L", "Pr(>F)"]
  sig   <- ifelse(p_val < 0.001, "***",
           ifelse(p_val < 0.01,  "**",
           ifelse(p_val < 0.05,  "*",
           ifelse(p_val < 0.10,  ".",  "ns"))))
  cat(sprintf("  %-10s | %8.4f | %5.0f | %8.4f | %s\n",
              dv_name, f_val, tbl["degree.L", "Df"], p_val, sig))
}
cat("\n")

MANCOVA TWO WAY

sex vs race

cat("===========================================================\n")

## ===========================================================

cat("  BAGIAN 3: MANCOVA DUA FAKTOR (sex x race + Kovariat)\n")

##   BAGIAN 3: MANCOVA DUA FAKTOR (sex x race + Kovariat)

cat("===========================================================\n\n")

## ===========================================================

mancova_2way <- manova(make_mancova_formula("sex * race + age + educ + income"), data = df)

cat("--- Hasil Pillai's Trace (sex x race + kovariat) ---\n")

## --- Hasil Pillai's Trace (sex x race + kovariat) ---

sum_2way_cov <- summary(mancova_2way, test = "Pillai")
print(sum_2way_cov)

##            Df   Pillai approx F num Df den Df    Pr(>F)    
## sex         1 0.037283   2.1189      7    383  0.040803 *  
## race        2 0.112710   3.2761     14    768 4.244e-05 ***
## age         1 0.097004   5.8777      7    383 1.689e-06 ***
## educ        1 0.042428   2.4243      7    383  0.019346 *  
## income      1 0.054255   3.1388      7    383  0.003092 ** 
## sex:race    2 0.043434   1.2178     14    768  0.256521    
## Residuals 389                                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Follow-up ANCOVA Univariat (sex x race + kovariat) ---\n")

## 
## --- Follow-up ANCOVA Univariat (sex x race + kovariat) ---

cat(sprintf("  %-10s | %-12s | %8s | %5s | %8s | %s\n",
            "DV", "Efek", "F", "df", "p-value", "Sig"))

##   DV         | Efek         |        F |    df |  p-value | Sig

cat(paste0("  ", strrep("-", 65), "\n"))

##   -----------------------------------------------------------------

aov_2way_cov <- summary.aov(mancova_2way)
efek_list    <- c("sex", "race", "sex:race", "age", "educ", "income")

for (dv_name in all_dvs) {
  tbl <- aov_2way_cov[[dv_name]]
  for (efek in efek_list) {
    if (efek %in% rownames(tbl)) {
      f_val <- tbl[efek, "F value"]
      p_val <- tbl[efek, "Pr(>F)"]
      df_v  <- tbl[efek, "Df"]
      sig   <- ifelse(p_val < 0.001, "***",
               ifelse(p_val < 0.01,  "**",
               ifelse(p_val < 0.05,  "*",
               ifelse(p_val < 0.10,  ".",  "ns"))))
      cat(sprintf("  %-10s | %-12s | %8.4f | %5.0f | %8.4f | %s\n",
                  dv_name, efek, f_val, df_v, p_val, sig))
    }
  }
  cat(paste0("  ", strrep(".", 65), "\n"))
}

##   .................................................................
##   .................................................................
##   .................................................................
##   .................................................................
##   .................................................................
##   .................................................................
##   .................................................................

# Interaksi plot (sex x race) 
cat("\n--- Interaksi Plot: sex x race (Adjusted Means) ---\n")

## 
## --- Interaksi Plot: sex x race (Adjusted Means) ---

interaction_plots_cov <- list()
for (dv in all_dvs) {
  form_ancova <- as.formula(
    paste(dv, "~ sex * race + age + educ + income")
  )
  fit_ancova <- lm(form_ancova, data = df)
  emm_int    <- as.data.frame(emmeans::emmeans(fit_ancova, ~ sex * race))

  p_int <- ggplot(emm_int,
                  aes(x = race, y = emmean,
                      color = sex, group = sex,
                      ymin = lower.CL, ymax = upper.CL)) +
    geom_line(linewidth = 1.2) +
    geom_point(size = 3.5) +
    geom_errorbar(width = 0.15, linewidth = 0.8) +
    scale_color_manual(values = c("#2980b9", "#e74c3c")) +
    labs(title    = paste("Interaksi:", dv),
         subtitle = "sex x race (adjusted, error bar = 95% CI)",
         x = "Ras", y = paste("Adj. Mean", dv),
         color = "Jenis Kelamin") +
    theme_minimal(base_size = 10) +
    theme(legend.position = "bottom")
  interaction_plots_cov[[dv]] <- p_int
}
do.call(gridExtra::grid.arrange,
        c(interaction_plots_cov, ncol = 3,
          top = "Interaksi Plot MANCOVA Dua Faktor: sex x race (Adjusted Means)"))

UJI ASUMSI ANCOVA

packages <- c(
  "tidyverse",  
  "emmeans",    
  "effectsize", 
  "car",        
  "ggpubr",     
  "gridExtra",  
  "rstatix",    
  "broom"       
)

new_pkg <- packages[!(packages %in% installed.packages()[, "Package"])]
if (length(new_pkg) > 0) install.packages(new_pkg, dependencies = TRUE)
 
library(tidyverse)
library(emmeans)

## Warning: package 'emmeans' was built under R version 4.5.3

## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

library(effectsize)

## Warning: package 'effectsize' was built under R version 4.5.3

library(car)

## Warning: package 'car' was built under R version 4.5.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.5.3

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.5.3

library(gridExtra)

## Warning: package 'gridExtra' was built under R version 4.5.3

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library(rstatix)

## Warning: package 'rstatix' was built under R version 4.5.3

## 
## Attaching package: 'rstatix'

## The following object is masked _by_ '.GlobalEnv':
## 
##     get_mode

## The following objects are masked from 'package:effectsize':
## 
##     cohens_d, eta_squared

## The following object is masked from 'package:stats':
## 
##     filter

library(broom)

cat(" Package berhasil dimuat.\n\n")

##  Package berhasil dimuat.

if (exists("df") && is.data.frame(df) && nrow(df) > 0) {
  cat("Menggunakan objek 'df' dari environment (session aktif).\n\n")
} else if (exists("gss_export") && is.data.frame(gss_export)) {
  cat("Menggunakan 'gss_export', membuat ulang 'df'...\n\n")
  analisis_vars <- c("happy","health","life","satfin",
                     "trust","helpful","fair",
                     "sex","race","age","educ","income")
  df <- gss_export[complete.cases(gss_export[, analisis_vars]), ]
} else {
  cat(" Memuat dari GSS2024_clean.csv...\n\n")
  if (!file.exists("GSS2024_clean.csv")) {
    stop(paste0(
      " File 'GSS2024_clean.csv' tidak ditemukan!\n",
      "   Jalankan dulu skrip EDA kemudian skrip MANOVA.\n",
      "   Working directory: ", getwd()
    ))
  }
  gss_export <- read.csv("GSS2024_clean.csv", stringsAsFactors = FALSE)
  gss_export$sex    <- factor(gss_export$sex,
                               levels = c("Pria","Wanita"))
  gss_export$race   <- factor(gss_export$race,
                               levels = c("Putih","Hitam","Lainnya"))
  gss_export$degree <- factor(gss_export$degree,
                               levels = c("< SMA","SMA","Diploma",
                                          "Sarjana","Pascasarjana"),
                               ordered = TRUE)
  analisis_vars <- c("happy","health","life","satfin",
                     "trust","helpful","fair",
                     "sex","race","age","educ","income")
  gss_export[c("happy","health","life","satfin",
               "trust","helpful","fair",
               "age","educ","income")] <-
    lapply(gss_export[c("happy","health","life","satfin",
                         "trust","helpful","fair",
                         "age","educ","income")], as.numeric)
  df <- gss_export[complete.cases(gss_export[, analisis_vars]), ]
}

## Menggunakan objek 'df' dari environment (session aktif).

dv_wellbeing <- c("happy", "health", "life", "satfin")
dv_trust     <- c("trust", "helpful", "fair")
all_dvs      <- c(dv_wellbeing, dv_trust)
covariates   <- c("age", "educ", "income")
 
# Koreksi alpha Bonferroni untuk follow-up ANCOVA
alpha_bonf <- 0.05 / length(all_dvs)
 
cat(sprintf("  Dataset     : %d observasi\n", nrow(df)))

##   Dataset     : 398 observasi

cat(sprintf("  DV          : %s\n", paste(all_dvs, collapse = ", ")))

##   DV          : happy, health, life, satfin, trust, helpful, fair

cat(sprintf("  Kovariat    : %s\n", paste(covariates, collapse = ", ")))

##   Kovariat    : age, educ, income

cat(sprintf("  α Bonferroni: %.4f (0.05 / %d DV)\n\n", alpha_bonf, length(all_dvs)))

##   α Bonferroni: 0.0071 (0.05 / 7 DV)

get_pe2 <- function(model, term) {
  a <- car::Anova(model, type = 3)
  if (!term %in% rownames(a)) return(NA)
  ss_e <- a[term, "Sum Sq"]
  ss_r <- a["Residuals", "Sum Sq"]
  round(ss_e / (ss_e + ss_r), 4)
}
 
# Interpretasi partial eta-squared (Cohen, 1988)
interp_pe2 <- function(x) {
  ifelse(is.na(x), "NA",
  ifelse(x < 0.01, "Sangat Kecil",
  ifelse(x < 0.06, "Kecil",
  ifelse(x < 0.14, "Sedang", "Besar"))))
}
 
# Interpretasi Cohen's d
interp_d <- function(d) {
  d <- abs(d)
  ifelse(is.na(d), "NA",
  ifelse(d < 0.20, "Sangat Kecil",
  ifelse(d < 0.50, "Kecil",
  ifelse(d < 0.80, "Sedang", "Besar"))))
}

# Cetak header bagian
print_header <- function(judul) {
  cat(sprintf("\n%s\n  %s\n%s\n",
              strrep("═", 60), judul, strrep("═", 60)))
}
 
# Cetak sub-header
print_sub <- function(judul) {
  cat(sprintf("\n─── %s ───\n", judul))
}

Normalitas Residual

print_sub("Normalitas Residual (Shapiro-Wilk per DV)")

## 
## ─── Normalitas Residual (Shapiro-Wilk per DV) ───

cat("  H0: Residual berdistribusi normal\n")

##   H0: Residual berdistribusi normal

cat("  Model: DV ~ sex + race + age + educ + income\n\n")

##   Model: DV ~ sex + race + age + educ + income

cat(sprintf("  %-10s | %8s | %8s | %-14s | %s\n",
            "DV", "W", "p-value", "Kesimpulan", "Catatan"))

##   DV         |        W |  p-value | Kesimpulan     | Catatan

cat(paste0("  ", strrep("-", 65), "\n"))

##   -----------------------------------------------------------------

normality_results <- data.frame()
for (dv in all_dvs) {
  model_tmp <- lm(as.formula(
    paste(dv, "~ sex + race + age + educ + income")), data = df)
  resid_vals <- residuals(model_tmp)
 
  # SW max 5000; subsampel jika lebih
  resid_sw <- if (length(resid_vals) > 5000) {
    set.seed(42); sample(resid_vals, 5000)
  } else resid_vals
 
  sw  <- shapiro.test(resid_sw)
  sig <- ifelse(sw$p.value > 0.05, "Normal", "Tidak Normal️")
  cat_note <- ifelse(sw$p.value > 0.05, "",
              ifelse(nrow(df) > 200,
                     "N besar → robust", "Periksa Q-Q plot"))
    cat(sprintf("  %-10s | %8.4f | %8.4f | %-14s | %s\n",
              dv, sw$statistic, sw$p.value, sig, cat_note))
 
  normality_results <- rbind(normality_results, data.frame(
    DV = dv, W = round(sw$statistic, 4),
    p_value = round(sw$p.value, 4), Normal = sw$p.value > 0.05
  ))
}

##   happy      |   0.9067 |   0.0000 | Tidak Normal️ | N besar → robust
##   health     |   0.9613 |   0.0000 | Tidak Normal️ | N besar → robust
##   life       |   0.8255 |   0.0000 | Tidak Normal️ | N besar → robust
##   satfin     |   0.9495 |   0.0000 | Tidak Normal️ | N besar → robust
##   trust      |   0.9271 |   0.0000 | Tidak Normal️ | N besar → robust
##   helpful    |   0.9354 |   0.0000 | Tidak Normal️ | N besar → robust
##   fair       |   0.9240 |   0.0000 | Tidak Normal️ | N besar → robust

cat(" ANCOVA robust terhadap non-normalitas residual jika N > 200 per kelompok\n")

##  ANCOVA robust terhadap non-normalitas residual jika N > 200 per kelompok

# Q-Q plot residual
qqplot_list <- list()
for (dv in all_dvs) {
  model_tmp <- lm(as.formula(
    paste(dv, "~ sex + race + age + educ + income")), data = df)
  resid_df  <- data.frame(resid = residuals(model_tmp))
  p_qq <- ggplot(resid_df, aes(sample = resid)) +
    stat_qq(alpha = 0.4, color = "#2980b9", size = 0.8) +
    stat_qq_line(color = "#e74c3c", linewidth = 0.9) +
    labs(title = paste("Q-Q:", dv), x = "Theoretical", y = "Sample") +
    theme_minimal(base_size = 9)
  qqplot_list[[dv]] <- p_qq
}
do.call(gridExtra::grid.arrange,
        c(qqplot_list, ncol = 4,
          top = "Q-Q Plot Residual ANCOVA (DV ~ sex + race + kovariat)"))

Homogenitas Varians (Levene’s Test)

print_sub("Homogenitas Varians Residual (Levene's Test)")

## 
## ─── Homogenitas Varians Residual (Levene's Test) ───

cat("  H0: Varians residual sama di semua kelompok\n\n")

##   H0: Varians residual sama di semua kelompok

levene_results <- data.frame()
for (iv in c("sex", "race")) {
  cat(sprintf("  Kelompok: %s\n", iv))
  cat(sprintf("  %-10s | %8s | %5s | %8s | %s\n",
              "DV", "F", "df", "p-value", "Kesimpulan"))
  cat(paste0("  ", strrep("-", 50), "\n"))
 
  for (dv in all_dvs) {
    lev <- car::leveneTest(
      as.formula(paste(dv, "~", iv)), data = df, center = median)
    p_v <- lev$`Pr(>F)`[1]
    f_v <- lev$`F value`[1]
    sig <- ifelse(p_v > 0.05, "Homogen", "Tidak Homogen️")
    cat(sprintf("  %-10s | %8.4f | %5.0f | %8.4f | %s\n",
                dv, f_v, lev$Df[1], p_v, sig))
    levene_results <- rbind(levene_results, data.frame(
      IV = iv, DV = dv,
      F_val   = round(f_v, 4),
      p_value = round(p_v, 4),
      Homogen = p_v > 0.05
    ))
  }
  cat("\n")
}

##   Kelompok: sex
##   DV         |        F |    df |  p-value | Kesimpulan
##   --------------------------------------------------
##   happy      |   0.1397 |     1 |   0.7088 | Homogen
##   health     |   4.7374 |     1 |   0.0301 | Tidak Homogen️
##   life       |   1.8795 |     1 |   0.1712 | Homogen
##   satfin     |   3.2744 |     1 |   0.0711 | Homogen
##   trust      |   5.2464 |     1 |   0.0225 | Tidak Homogen️
##   helpful    |   0.0978 |     1 |   0.7547 | Homogen
##   fair       |   0.1756 |     1 |   0.6754 | Homogen
## 
##   Kelompok: race
##   DV         |        F |    df |  p-value | Kesimpulan
##   --------------------------------------------------
##   happy      |   0.3073 |     2 |   0.7356 | Homogen
##   health     |   0.6201 |     2 |   0.5384 | Homogen
##   life       |   0.6288 |     2 |   0.5338 | Homogen
##   satfin     |   1.8322 |     2 |   0.1614 | Homogen
##   trust      |  11.8977 |     2 |   0.0000 | Tidak Homogen️
##   helpful    |   5.4842 |     2 |   0.0045 | Tidak Homogen️
##   fair       |   3.3543 |     2 |   0.0359 | Tidak Homogen️

cat("Jika tidak homogen: gunakan Welch's ANCOVA atau transformasi data\n")

## Jika tidak homogen: gunakan Welch's ANCOVA atau transformasi data

Homogenitas Slope Regresi

print_sub("1C. Homogenitas Slope Regresi (Uji Interaksi IV × Kovariat)")

## 
## ─── 1C. Homogenitas Slope Regresi (Uji Interaksi IV × Kovariat) ───

cat("  H0: Slope regresi kovariat sama di semua kelompok (tidak ada interaksi)\n")

##   H0: Slope regresi kovariat sama di semua kelompok (tidak ada interaksi)

homoslope_results <- data.frame()
for (iv in c("sex", "race")) {
  cat(sprintf("  ─ Faktor: %s ─\n", iv))
  cat(sprintf("  %-10s | %-14s | %8s | %5s | %8s | %s\n",
              "DV", "Interaksi", "F", "df", "p-value", "Kesimpulan"))
  cat(paste0("  ", strrep("-", 70), "\n"))
 
  for (dv in all_dvs) {
    # Model dengan interaksi IV × setiap kovariat
    formula_int <- as.formula(paste(
      dv, "~", iv, "+",
      paste(covariates, collapse = " + "), "+",
      paste(iv, "*", covariates, collapse = " + ")
    ))
    model_int <- lm(formula_int, data = df)
    aov_int   <- car::Anova(model_int, type = 3)
 
    # Cek setiap interaksi IV:kovariat
    for (cov in covariates) {
      term <- paste0(iv, ":", cov)
      if (!term %in% rownames(aov_int)) term <- paste0(cov, ":", iv)
      if (term %in% rownames(aov_int)) {
        f_v  <- aov_int[term, "F value"]
        df_v <- aov_int[term, "Df"]
        p_v  <- aov_int[term, "Pr(>F)"]
        sig  <- ifelse(p_v > 0.05, "Homogen",
                       "Tidak Homogen")
        cat(sprintf("  %-10s | %-14s | %8.4f | %5.0f | %8.4f | %s\n",
                    dv, term, f_v, df_v, p_v, sig))
        homoslope_results <- rbind(homoslope_results, data.frame(
          IV = iv, DV = dv, Interaksi = term,
          F_val   = round(f_v, 4),
          p_value = round(p_v, 4),
          Homogen = p_v > 0.05
        ))
      }
    }
  }
  cat("\n")
}

##   ─ Faktor: sex ─
##   DV         | Interaksi      |        F |    df |  p-value | Kesimpulan
##   ----------------------------------------------------------------------
##   happy      | sex:age        |   0.4522 |     1 |   0.5017 | Homogen
##   happy      | sex:educ       |   1.0464 |     1 |   0.3070 | Homogen
##   happy      | sex:income     |   0.4646 |     1 |   0.4959 | Homogen
##   health     | sex:age        |   2.1771 |     1 |   0.1409 | Homogen
##   health     | sex:educ       |   1.5738 |     1 |   0.2104 | Homogen
##   health     | sex:income     |   0.7260 |     1 |   0.3947 | Homogen
##   life       | sex:age        |   1.0544 |     1 |   0.3051 | Homogen
##   life       | sex:educ       |   0.1717 |     1 |   0.6789 | Homogen
##   life       | sex:income     |   0.8625 |     1 |   0.3536 | Homogen
##   satfin     | sex:age        |   4.0199 |     1 |   0.0457 | Tidak Homogen
##   satfin     | sex:educ       |   0.2007 |     1 |   0.6544 | Homogen
##   satfin     | sex:income     |   1.6979 |     1 |   0.1933 | Homogen
##   trust      | sex:age        |   1.3772 |     1 |   0.2413 | Homogen
##   trust      | sex:educ       |   0.5974 |     1 |   0.4400 | Homogen
##   trust      | sex:income     |   1.3119 |     1 |   0.2527 | Homogen
##   helpful    | sex:age        |   0.5217 |     1 |   0.4705 | Homogen
##   helpful    | sex:educ       |   0.2195 |     1 |   0.6397 | Homogen
##   helpful    | sex:income     |   3.3791 |     1 |   0.0668 | Homogen
##   fair       | sex:age        |   1.1769 |     1 |   0.2786 | Homogen
##   fair       | sex:educ       |   0.6792 |     1 |   0.4104 | Homogen
##   fair       | sex:income     |   0.0497 |     1 |   0.8238 | Homogen
## 
##   ─ Faktor: race ─
##   DV         | Interaksi      |        F |    df |  p-value | Kesimpulan
##   ----------------------------------------------------------------------
##   happy      | race:age       |   0.4050 |     2 |   0.6673 | Homogen
##   happy      | race:educ      |   0.9489 |     2 |   0.3881 | Homogen
##   happy      | race:income    |   1.4838 |     2 |   0.2281 | Homogen
##   health     | race:age       |   0.0993 |     2 |   0.9055 | Homogen
##   health     | race:educ      |   0.8056 |     2 |   0.4476 | Homogen
##   health     | race:income    |   2.2561 |     2 |   0.1061 | Homogen
##   life       | race:age       |   0.5025 |     2 |   0.6054 | Homogen
##   life       | race:educ      |   0.0034 |     2 |   0.9966 | Homogen
##   life       | race:income    |   0.1242 |     2 |   0.8832 | Homogen
##   satfin     | race:age       |   1.8377 |     2 |   0.1606 | Homogen
##   satfin     | race:educ      |   1.6516 |     2 |   0.1931 | Homogen
##   satfin     | race:income    |   4.0611 |     2 |   0.0180 | Tidak Homogen
##   trust      | race:age       |   0.7487 |     2 |   0.4737 | Homogen
##   trust      | race:educ      |   0.3098 |     2 |   0.7338 | Homogen
##   trust      | race:income    |   0.6399 |     2 |   0.5279 | Homogen
##   helpful    | race:age       |   0.4096 |     2 |   0.6642 | Homogen
##   helpful    | race:educ      |   1.2549 |     2 |   0.2863 | Homogen
##   helpful    | race:income    |   3.2470 |     2 |   0.0400 | Tidak Homogen
##   fair       | race:age       |   1.0448 |     2 |   0.3527 | Homogen
##   fair       | race:educ      |   0.2526 |     2 |   0.7769 | Homogen
##   fair       | race:income    |   1.0324 |     2 |   0.3571 | Homogen

Linearitas Kovariat terhadap variabel dependen

print_sub("1D. Linearitas Kovariat terhadap DV")

## 
## ─── 1D. Linearitas Kovariat terhadap DV ───

cat("  Cek visual: scatter plot kovariat vs DV per kelompok\n\n")

##   Cek visual: scatter plot kovariat vs DV per kelompok

# Plot linearitas untuk kovariat age (contoh representatif)
lin_plots <- list()
for (dv in dv_wellbeing) {
  p_lin <- ggplot(df, aes(x = age, y = .data[[dv]], color = sex)) +
    geom_point(alpha = 0.2, size = 0.8) +
    geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
    scale_color_manual(values = c("#2980b9", "#e74c3c")) +
    labs(title   = paste("Linearitas: age →", dv),
         x = "Age", y = dv, color = "Sex") +
    theme_minimal(base_size = 9) +
    theme(legend.position = "bottom")
  lin_plots[[dv]] <- p_lin
}
do.call(gridExtra::grid.arrange,
        c(lin_plots, ncol = 4,
          top = "Uji Linearitas: age vs DV (slope per kelompok sex)"))

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Independensi kovariat

print_sub("1E. Independensi Kovariat dari IV (Kovariat tidak berkorelasi dengan IV)")

## 
## ─── 1E. Independensi Kovariat dari IV (Kovariat tidak berkorelasi dengan IV) ───

cat("  H0: Tidak ada perbedaan mean kovariat antar kelompok\n\n")

##   H0: Tidak ada perbedaan mean kovariat antar kelompok

cat(sprintf("  %-12s | %-8s | %8s | %5s | %8s | %s\n",
            "Kovariat", "IV", "F", "df", "p-value", "Kesimpulan"))

##   Kovariat     | IV       |        F |    df |  p-value | Kesimpulan

cat(paste0("  ", strrep("-", 65), "\n"))

##   -----------------------------------------------------------------

for (cov in covariates) {
  for (iv in c("sex", "race")) {
    aov_cov <- aov(as.formula(paste(cov, "~", iv)), data = df)
    f_v     <- summary(aov_cov)[[1]]["F value"][1, 1]
    p_v     <- summary(aov_cov)[[1]]["Pr(>F)"][1, 1]
    df_v    <- summary(aov_cov)[[1]]["Df"][1, 1]
    sig     <- ifelse(p_v > 0.05,
                      "Independen",
                      "Berkorelasi")
    cat(sprintf("  %-12s | %-8s | %8.4f | %5.0f | %8.4f | %s\n",
                cov, iv, f_v, df_v, p_v, sig))
  }
}

##   age          | sex      |   0.1504 |     1 |   0.6983 | Independen
##   age          | race     |   8.2279 |     2 |   0.0003 | Berkorelasi
##   educ         | sex      |   0.3857 |     1 |   0.5349 | Independen
##   educ         | race     |   5.9087 |     2 |   0.0030 | Berkorelasi
##   income       | sex      |   2.8854 |     1 |   0.0902 | Independen
##   income       | race     |   3.0911 |     2 |   0.0466 | Berkorelasi

cat("  💡 Jika kovariat berkorelasi dengan IV → ANCOVA masih valid, namun\n")

##   💡 Jika kovariat berkorelasi dengan IV → ANCOVA masih valid, namun

cat("     interpretasi 'mengontrol kovariat' perlu kehati-hatian\n")

##      interpretasi 'mengontrol kovariat' perlu kehati-hatian

ANCOVA

sex

print_header("ANCOVA ONE WAY — sex")

## 
## ════════════════════════════════════════════════════════════
##   ANCOVA ONE WAY — sex
## ════════════════════════════════════════════════════════════

cat(sprintf("  Model: DV ~ sex + age + educ + income\n"))

##   Model: DV ~ sex + age + educ + income

cat(sprintf("  α Bonferroni = %.4f\n\n", alpha_bonf))

##   α Bonferroni = 0.0071

ancova_sex_models  <- list()
ancova_sex_results <- data.frame()
 
for (dv in all_dvs) {
  formula_ancova <- as.formula(
    paste(dv, "~ sex + age + educ + income"))
  model <- lm(formula_ancova, data = df)
  ancova_sex_models[[dv]] <- model
 
  # Type III SS (kontrol urutan variabel)
  aov3 <- car::Anova(model, type = 3)
 
  # Ambil statistik untuk faktor sex
  f_sex <- aov3["sex", "F value"]
  p_sex <- aov3["sex", "Pr(>F)"]
  df_sex <- aov3["sex", "Df"]
 
  # partial eta-squared
  pe2_sex <- get_pe2(model, "sex")
 
  # Kovariat
  p_age   <- aov3["age",    "Pr(>F)"]
  p_educ  <- aov3["educ",   "Pr(>F)"]
  p_income<- aov3["income", "Pr(>F)"]
 
  sig_bonf <- ifelse(p_sex < alpha_bonf, "Signifikan", "Tidak Signifikan")
  sig_star <- ifelse(p_sex < 0.001, "***",
              ifelse(p_sex < 0.01,  "**",
              ifelse(p_sex < 0.05,  "*",  "ns")))
 
  ancova_sex_results <- rbind(ancova_sex_results, data.frame(
    DV            = dv,
    F_sex         = round(f_sex,  4),
    df_sex        = df_sex,
    p_sex         = round(p_sex,  4),
    Sig           = sig_star,
    Sig_Bonf      = sig_bonf,
    Partial_eta2  = pe2_sex,
    Interpretasi  = interp_pe2(pe2_sex),
    p_age         = round(p_age,   4),
    p_educ        = round(p_educ,  4),
    p_income      = round(p_income,4)
  ))
}
 
cat("  Hasil ANCOVA: DV ~ sex + age + educ + income (Type III SS)\n\n")

##   Hasil ANCOVA: DV ~ sex + age + educ + income (Type III SS)

cat(sprintf("  %-10s | %8s | %5s | %8s | %4s | %-18s | %6s | %-12s\n",
            "DV", "F(sex)", "df", "p-value", "Sig",
            "Sig Bonferroni", "η²_p", "Interpretasi"))

##   DV         |   F(sex) |    df |  p-value |  Sig | Sig Bonferroni     | η²_p | Interpretasi

cat(paste0("  ", strrep("-", 90), "\n"))

##   ------------------------------------------------------------------------------------------

for (i in seq_len(nrow(ancova_sex_results))) {
  r <- ancova_sex_results[i, ]
  cat(sprintf("  %-10s | %8.4f | %5.0f | %8.4f | %4s | %-18s | %6.4f | %s\n",
              r$DV, r$F_sex, r$df_sex, r$p_sex, r$Sig,
              r$Sig_Bonf, r$Partial_eta2, r$Interpretasi))
}

##   happy      |   0.7096 |     1 |   0.4001 |   ns | Tidak Signifikan   | 0.0018 | Sangat Kecil
##   health     |   2.7917 |     1 |   0.0955 |   ns | Tidak Signifikan   | 0.0071 | Sangat Kecil
##   life       |   1.0570 |     1 |   0.3045 |   ns | Tidak Signifikan   | 0.0027 | Sangat Kecil
##   satfin     |   1.0827 |     1 |   0.2987 |   ns | Tidak Signifikan   | 0.0027 | Sangat Kecil
##   trust      |   5.1652 |     1 |   0.0236 |    * | Tidak Signifikan   | 0.0130 | Kecil
##   helpful    |   0.2501 |     1 |   0.6173 |   ns | Tidak Signifikan   | 0.0006 | Sangat Kecil
##   fair       |   1.9853 |     1 |   0.1596 |   ns | Tidak Signifikan   | 0.0050 | Sangat Kecil

cat(sprintf("\n  Kovariat — p-value (signifikansi pengaruh kovariat per DV):\n"))

## 
##   Kovariat — p-value (signifikansi pengaruh kovariat per DV):

cat(sprintf("  %-10s | %8s | %8s | %8s\n", "DV", "p_age", "p_educ", "p_income"))

##   DV         |    p_age |   p_educ | p_income

cat(paste0("  ", strrep("-", 45), "\n"))

##   ---------------------------------------------

for (i in seq_len(nrow(ancova_sex_results))) {
  r <- ancova_sex_results[i, ]
  cat(sprintf("  %-10s | %8.4f | %8.4f | %8.4f\n",
              r$DV, r$p_age, r$p_educ, r$p_income))
}

##   happy      |   0.3556 |   0.1993 |   0.0758
##   health     |   0.2948 |   0.0989 |   0.0000
##   life       |   0.5231 |   0.2195 |   0.3152
##   satfin     |   0.0003 |   0.0082 |   0.4629
##   trust      |   0.0000 |   0.3235 |   0.3539
##   helpful    |   0.0001 |   0.6366 |   0.0946
##   fair       |   0.0035 |   0.3162 |   0.0728

race

print_header("ANCOVA ONE WAY — race")

## 
## ════════════════════════════════════════════════════════════
##   ANCOVA ONE WAY — race
## ════════════════════════════════════════════════════════════

cat(sprintf("  Model: DV ~ race + age + educ + income\n"))

##   Model: DV ~ race + age + educ + income

cat(sprintf("  α Bonferroni = %.4f\n\n", alpha_bonf))

##   α Bonferroni = 0.0071

ancova_race_models  <- list()
ancova_race_results <- data.frame()
 
for (dv in all_dvs) {
  formula_ancova <- as.formula(
    paste(dv, "~ race + age + educ + income"))
  model <- lm(formula_ancova, data = df)
  ancova_race_models[[dv]] <- model
 
  aov3   <- car::Anova(model, type = 3)
  f_race <- aov3["race",   "F value"]
  p_race <- aov3["race",   "Pr(>F)"]
  df_race<- aov3["race",   "Df"]
  pe2    <- get_pe2(model, "race")
 
  p_age   <- aov3["age",    "Pr(>F)"]
  p_educ  <- aov3["educ",   "Pr(>F)"]
  p_income<- aov3["income", "Pr(>F)"]
 
  sig_star <- ifelse(p_race < 0.001, "***",
              ifelse(p_race < 0.01,  "**",
              ifelse(p_race < 0.05,  "*", "ns")))
    ancova_race_results <- rbind(ancova_race_results, data.frame(
    DV           = dv,
    F_race       = round(f_race, 4),
    df_race      = df_race,
    p_race       = round(p_race, 4),
    Sig          = sig_star,
    Sig_Bonf     = ifelse(p_race < alpha_bonf,
                          "Signifikan", "Tidak Signifikan"),
    Partial_eta2 = pe2,
    Interpretasi = interp_pe2(pe2),
    p_age        = round(p_age,    4),
    p_educ       = round(p_educ,   4),
    p_income     = round(p_income, 4)
  ))
}
 
cat("  Hasil ANCOVA: DV ~ race + age + educ + income (Type III SS)\n\n")

##   Hasil ANCOVA: DV ~ race + age + educ + income (Type III SS)

cat(sprintf("  %-10s | %8s | %5s | %8s | %4s | %-18s | %6s | %-12s\n",
            "DV", "F(race)", "df", "p-value", "Sig",
            "Sig Bonferroni", "η²_p", "Interpretasi"))

##   DV         |  F(race) |    df |  p-value |  Sig | Sig Bonferroni     | η²_p | Interpretasi

cat(paste0("  ", strrep("-", 90), "\n"))

##   ------------------------------------------------------------------------------------------

for (i in seq_len(nrow(ancova_race_results))) {
  r <- ancova_race_results[i, ]
  cat(sprintf("  %-10s | %8.4f | %5.0f | %8.4f | %4s | %-18s | %6.4f | %s\n",
              r$DV, r$F_race, r$df_race, r$p_race, r$Sig,
              r$Sig_Bonf, r$Partial_eta2, r$Interpretasi))
}

##   happy      |   1.1033 |     2 |   0.3328 |   ns | Tidak Signifikan   | 0.0056 | Sangat Kecil
##   health     |   0.8356 |     2 |   0.4344 |   ns | Tidak Signifikan   | 0.0042 | Sangat Kecil
##   life       |   0.2872 |     2 |   0.7505 |   ns | Tidak Signifikan   | 0.0015 | Sangat Kecil
##   satfin     |   1.2060 |     2 |   0.3005 |   ns | Tidak Signifikan   | 0.0061 | Sangat Kecil
##   trust      |   0.5566 |     2 |   0.5736 |   ns | Tidak Signifikan   | 0.0028 | Sangat Kecil
##   helpful    |   2.4783 |     2 |   0.0852 |   ns | Tidak Signifikan   | 0.0125 | Kecil
##   fair       |   8.4154 |     2 |   0.0003 |  *** | Signifikan         | 0.0412 | Kecil

sex dan race

print_header("ANCOVA TWO WAY — sex + race + kovariat")

## 
## ════════════════════════════════════════════════════════════
##   ANCOVA TWO WAY — sex + race + kovariat
## ════════════════════════════════════════════════════════════

cat("  Model: DV ~ sex + race + sex:race + age + educ + income\n\n")

##   Model: DV ~ sex + race + sex:race + age + educ + income

ancova_2way_models  <- list()
ancova_2way_results <- data.frame()
 
for (dv in all_dvs) {
  formula_2way <- as.formula(
    paste(dv, "~ sex * race + age + educ + income"))
  model <- lm(formula_2way, data = df)
  ancova_2way_models[[dv]] <- model
 
  aov3 <- car::Anova(model, type = 3)
 
  get_stat <- function(term) {
    if (!term %in% rownames(aov3)) return(c(NA, NA, NA))
    c(aov3[term, "F value"], aov3[term, "Df"], aov3[term, "Pr(>F)"])
  }
 
  st_sex  <- get_stat("sex")
  st_race <- get_stat("race")
  st_int  <- get_stat("sex:race")
 
  pe2_sex  <- get_pe2(model, "sex")
  pe2_race <- get_pe2(model, "race")
  pe2_int  <- get_pe2(model, "sex:race")
  
    ancova_2way_results <- rbind(ancova_2way_results, data.frame(
    DV             = dv,
    F_sex          = round(st_sex[1],  4),
    p_sex          = round(st_sex[3],  4),
    sig_sex        = ifelse(st_sex[3] < 0.001, "***",
                    ifelse(st_sex[3] < 0.01,  "**",
                    ifelse(st_sex[3] < 0.05,  "*", "ns"))),
    pe2_sex        = round(pe2_sex,    4),
    F_race         = round(st_race[1], 4),
    p_race         = round(st_race[3], 4),
    sig_race       = ifelse(st_race[3] < 0.001, "***",
                    ifelse(st_race[3] < 0.01,  "**",
                    ifelse(st_race[3] < 0.05,  "*", "ns"))),
    pe2_race       = round(pe2_race,   4),
    F_interaksi    = round(st_int[1],  4),
    p_interaksi    = round(st_int[3],  4),
    sig_int        = ifelse(st_int[3] < 0.001, "***",
                    ifelse(st_int[3] < 0.01,  "**",
                    ifelse(st_int[3] < 0.05,  "*", "ns"))),
    pe2_interaksi  = round(pe2_int,    4)
  ))
}
 
cat("  Hasil ANCOVA Dua Faktor (Type III SS):\n\n")

##   Hasil ANCOVA Dua Faktor (Type III SS):

cat(sprintf("  %-10s | %-16s | %-16s | %-16s\n",
            "DV", "sex", "race", "sex:race (interaksi)"))

##   DV         | sex              | race             | sex:race (interaksi)

cat(sprintf("  %-10s | %6s %5s %4s | %6s %5s %4s | %6s %5s %4s\n",
            "", "F", "p", "Sig", "F", "p", "Sig", "F", "p", "Sig"))

##              |      F     p  Sig |      F     p  Sig |      F     p  Sig

cat(paste0("  ", strrep("-", 75), "\n"))

##   ---------------------------------------------------------------------------

for (i in seq_len(nrow(ancova_2way_results))) {
  r <- ancova_2way_results[i, ]
  cat(sprintf(
    "  %-10s | %6.3f %5.3f %4s | %6.3f %5.3f %4s | %6.3f %5.3f %4s\n",
    r$DV,
    r$F_sex,  r$p_sex,  r$sig_sex,
    r$F_race, r$p_race, r$sig_race,
    r$F_interaksi, r$p_interaksi, r$sig_int))
}

##   happy      |  1.496 0.222   ns |  1.317 0.269   ns |  0.737 0.479   ns
##   health     |  1.912 0.168   ns |  1.273 0.281   ns |  0.259 0.772   ns
##   life       |  0.941 0.333   ns |  0.346 0.708   ns |  1.519 0.220   ns
##   satfin     |  2.969 0.086   ns |  1.442 0.238   ns |  0.864 0.422   ns
##   trust      |  5.088 0.025    * |  0.835 0.435   ns |  0.394 0.675   ns
##   helpful    |  0.002 0.963   ns |  0.335 0.715   ns |  2.130 0.120   ns
##   fair       |  3.448 0.064   ns |  5.877 0.003   ** |  1.076 0.342   ns

cat("  Sig: *** p<.001 | ** p<.01 | * p<.05 | ns = tidak signifikan\n")

##   Sig: *** p<.001 | ** p<.01 | * p<.05 | ns = tidak signifikan

# Interaksi plot jika ada DV dengan interaksi signifikan
dv_interaksi_sig <- ancova_2way_results$DV[
  !is.na(ancova_2way_results$p_interaksi) &
    ancova_2way_results$p_interaksi < 0.05]
 
if (length(dv_interaksi_sig) > 0) {
  cat(sprintf("\n DV dengan interaksi sex × race signifikan: %s\n",
              paste(dv_interaksi_sig, collapse = ", ")))
  int_plots <- list()
  for (dv in dv_interaksi_sig) {
    mean_int <- df %>%
      group_by(sex, race) %>%
      summarise(M = mean(.data[[dv]], na.rm = TRUE),
                SE = sd(.data[[dv]], na.rm = TRUE) / sqrt(n()),
                .groups = "drop")
    p_int <- ggplot(mean_int,
                    aes(x = race, y = M, color = sex, group = sex)) +
      geom_line(linewidth = 1.2) +
      geom_point(size = 3.5) +
      geom_errorbar(aes(ymin = M - SE, ymax = M + SE),
                    width = 0.15, linewidth = 0.8) +
      scale_color_manual(values = c("#2980b9", "#e74c3c")) +
      labs(title    = paste("Interaksi:", dv),
           subtitle = "sex × race (±1 SE)",
           x = "Ras", y = paste("Mean", dv),
           color = "Sex") +
      theme_minimal(base_size = 10) +
      theme(legend.position = "bottom")
    int_plots[[dv]] <- p_int
  }
  do.call(gridExtra::grid.arrange,
          c(int_plots, ncol = min(3, length(int_plots)),
            top = "Interaksi Plot: sex × race (ANCOVA)"))
} else {
  cat(" Tidak ada interaksi sex × race yang signifikan\n")
}

##  Tidak ada interaksi sex × race yang signifikan

Effect size

print_header("EFFECT SIZE")

## 
## ════════════════════════════════════════════════════════════
##   EFFECT SIZE
## ════════════════════════════════════════════════════════════

cat("  Partial η²: Cohen (1988) | Omega-squared: Olejnik & Algina (2003)\n")

##   Partial η²: Cohen (1988) | Omega-squared: Olejnik & Algina (2003)

cat("  Panduan: Kecil ≥ 0.01 | Sedang ≥ 0.06 | Besar ≥ 0.14\n\n")

##   Panduan: Kecil ≥ 0.01 | Sedang ≥ 0.06 | Besar ≥ 0.14

es_table <- data.frame()
for (dv in all_dvs) {
  # Partial eta-squared
  pe2_sex  <- get_pe2(ancova_sex_models[[dv]],  "sex")
  pe2_race <- get_pe2(ancova_race_models[[dv]], "race")
  pe2_age_s  <- get_pe2(ancova_sex_models[[dv]], "age")
  pe2_educ_s <- get_pe2(ancova_sex_models[[dv]], "educ")
  pe2_inc_s  <- get_pe2(ancova_sex_models[[dv]], "income")
 
  # Omega-squared
  tryCatch({
    om_sex  <- effectsize::omega_squared(
      ancova_sex_models[[dv]],  partial = TRUE)
    om_race <- effectsize::omega_squared(
      ancova_race_models[[dv]], partial = TRUE)
    omg_sex  <- round(om_sex$Omega2_partial[
      which(om_sex$Parameter == "sex")], 4)
    omg_race <- round(om_race$Omega2_partial[
      which(om_race$Parameter == "race")], 4)
  }, error = function(e) {
    omg_sex  <<- NA
    omg_race <<- NA
  })
  
    es_table <- rbind(es_table, data.frame(
    DV           = dv,
    pe2_sex      = pe2_sex,
    interp_sex   = interp_pe2(pe2_sex),
    omg2_sex     = if (exists("omg_sex"))  omg_sex  else NA,
    pe2_race     = pe2_race,
    interp_race  = interp_pe2(pe2_race),
    omg2_race    = if (exists("omg_race")) omg_race else NA,
    pe2_age      = pe2_age_s,
    pe2_educ     = pe2_educ_s,
    pe2_income   = pe2_inc_s
  ))
}
 
print_sub("Tabel Effect Size: Partial η² dan Omega-squared")

## 
## ─── Tabel Effect Size: Partial η² dan Omega-squared ───

print(es_table, row.names = FALSE)

##       DV pe2_sex   interp_sex omg2_sex pe2_race  interp_race omg2_race pe2_age
##    happy  0.0018 Sangat Kecil   0.0003   0.0056 Sangat Kecil    0.0059  0.0022
##   health  0.0071 Sangat Kecil   0.0014   0.0042 Sangat Kecil    0.0011  0.0028
##     life  0.0027 Sangat Kecil   0.0010   0.0015 Sangat Kecil    0.0000  0.0010
##   satfin  0.0027 Sangat Kecil   0.0015   0.0061 Sangat Kecil    0.0023  0.0330
##    trust  0.0130        Kecil   0.0129   0.0028 Sangat Kecil    0.0065  0.0529
##  helpful  0.0006 Sangat Kecil   0.0000   0.0125        Kecil    0.0184  0.0408
##     fair  0.0050 Sangat Kecil   0.0045   0.0412        Kecil    0.0537  0.0215
##  pe2_educ pe2_income
##    0.0042     0.0080
##    0.0069     0.0463
##    0.0038     0.0026
##    0.0177     0.0014
##    0.0025     0.0022
##    0.0006     0.0071
##    0.0026     0.0082

# Visualisasi effect size
es_long <- es_table %>%
  select(DV, pe2_sex, pe2_race, pe2_age, pe2_educ, pe2_income) %>%
  pivot_longer(-DV, names_to = "Prediktor", values_to = "PE2") %>%
  mutate(Prediktor = case_when(
  Prediktor == "pe2_sex"  ~ "sex",
  Prediktor == "pe2_race" ~ "race",
  TRUE ~ Prediktor  
))
 
p_es <- ggplot(es_long, aes(x = reorder(DV, PE2), y = PE2,
                              fill = Prediktor)) +
  geom_col(position = "dodge", alpha = 0.85) +
  geom_hline(yintercept = c(0.01, 0.06, 0.14),
             linetype = "dashed", color = "gray50", linewidth = 0.5) +
  annotate("text", x = 0.4,
           y    = c(0.013, 0.063, 0.145),
           label = c("Kecil", "Sedang", "Besar"),
           hjust = 0, size = 2.8, color = "gray40") +
  scale_fill_brewer(palette = "Set2") +
  coord_flip() +
  labs(title    = "Effect Size Partial η² — ANCOVA",
       subtitle = "Perbandingan efek IV (sex, race) dan kovariat per DV",
       x = "Variabel Dependen", y = "Partial η²",
       fill = "Prediktor") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "right")
print(p_es)

Post-hoc EMMEANS

print_header("POST-HOC EMMEANS (Adjusted Means)")

## 
## ════════════════════════════════════════════════════════════
##   POST-HOC EMMEANS (Adjusted Means)
## ════════════════════════════════════════════════════════════

cat("  EMM = rata-rata yang sudah dikontrol kovariat (age, educ, income)\n")

##   EMM = rata-rata yang sudah dikontrol kovariat (age, educ, income)

cat("  Koreksi: Bonferroni\n\n")

##   Koreksi: Bonferroni

print_sub("Post-hoc sex (Welch t-test, Bonferroni-adjusted)")

## 
## ─── Post-hoc sex (Welch t-test, Bonferroni-adjusted) ───

dv_sex_sig <- ancova_sex_results$DV[
  ancova_sex_results$p_sex < alpha_bonf]
 
if (length(dv_sex_sig) == 0) {
  dv_sex_sig <- ancova_sex_results$DV[
    ancova_sex_results$p_sex < 0.05]
  cat("  ℹ️  Tidak ada DV signifikan setelah Bonferroni;\n")
  cat("      menampilkan DV dengan p < 0.05 (sebelum koreksi)\n\n")
}

##   ℹ️  Tidak ada DV signifikan setelah Bonferroni;
##       menampilkan DV dengan p < 0.05 (sebelum koreksi)

ph_sex_table <- data.frame()
for (dv in all_dvs) {
  emm_obj <- emmeans::emmeans(
    ancova_sex_models[[dv]], ~ sex, adjust = "bonferroni")
  ct  <- emmeans::contrast(emm_obj, method = "pairwise",
                            adjust = "bonferroni")
  ct_df <- as.data.frame(summary(ct))
 
  # Cohen's d dari EMM
  emm_sum <- as.data.frame(summary(emm_obj))
  em_col  <- ifelse("emmean" %in% names(emm_sum), "emmean", "response")
  m_pria  <- emm_sum[emm_sum$sex == "Pria",   em_col]
  m_wanita<- emm_sum[emm_sum$sex == "Wanita", em_col]
  pooled_sd <- sd(df[[dv]], na.rm = TRUE)
  cohens_d  <- round((m_pria - m_wanita) / pooled_sd, 4)
  
    ph_sex_table <- rbind(ph_sex_table, data.frame(
    DV           = dv,
    EMM_Pria     = round(m_pria,         3),
    EMM_Wanita   = round(m_wanita,       3),
    Diff         = round(ct_df$estimate, 4),
    SE           = round(ct_df$SE,       4),
    t_ratio      = round(ct_df$t.ratio,  4),
    df           = round(ct_df$df,       1),
    p_adj        = round(ct_df$p.value,  4),
    Sig          = ifelse(ct_df$p.value < 0.001, "***",
                   ifelse(ct_df$p.value < 0.01,  "**",
                   ifelse(ct_df$p.value < 0.05,  "*", "ns"))),
    Cohens_d     = cohens_d,
    Interp_d     = interp_d(cohens_d)
  ))
}
 
cat("  Estimated Marginal Means & Kontras Pria vs Wanita:\n\n")

##   Estimated Marginal Means & Kontras Pria vs Wanita:

cat(sprintf("  %-10s | %8s | %8s | %8s | %8s | %4s | %8s | %-12s\n",
            "DV", "EMM_Pria", "EMM_Wanita", "Diff",
            "p_adj", "Sig", "Cohen's d", "Interpretasi"))

##   DV         | EMM_Pria | EMM_Wanita |     Diff |    p_adj |  Sig | Cohen's d | Interpretasi

cat(paste0("  ", strrep("-", 80), "\n"))

##   --------------------------------------------------------------------------------

for (i in seq_len(nrow(ph_sex_table))) {
  r <- ph_sex_table[i, ]
  cat(sprintf("  %-10s | %8.3f | %8.3f | %8.4f | %8.4f | %4s | %8.4f | %s\n",
              r$DV, r$EMM_Pria, r$EMM_Wanita, r$Diff,
              r$p_adj, r$Sig, r$Cohens_d, r$Interp_d))
}

##   happy      |    2.083 |    2.140 |  -0.0571 |   0.4001 |   ns |  -0.0843 | Sangat Kecil
##   health     |    2.370 |    2.246 |   0.1240 |   0.0955 |   ns |   0.1629 | Sangat Kecil
##   life       |    1.625 |    1.683 |  -0.0587 |   0.3045 |   ns |  -0.1034 | Sangat Kecil
##   satfin     |    2.089 |    2.163 |  -0.0743 |   0.2987 |   ns |  -0.1021 | Sangat Kecil
##   trust      |    1.733 |    1.862 |  -0.1283 |   0.0236 |    * |  -0.2214 | Kecil
##   helpful    |    1.709 |    1.741 |  -0.0324 |   0.6173 |   ns |  -0.0493 | Sangat Kecil
##   fair       |    1.654 |    1.562 |   0.0921 |   0.1596 |   ns |   0.1395 | Sangat Kecil

print_sub("Post-hoc race (Pairwise EMM, Bonferroni-adjusted)")

## 
## ─── Post-hoc race (Pairwise EMM, Bonferroni-adjusted) ───

ph_race_table <- data.frame()
for (dv in all_dvs) {
  emm_obj <- emmeans::emmeans(
    ancova_race_models[[dv]], ~ race, adjust = "bonferroni")
  ct    <- emmeans::contrast(emm_obj, method = "pairwise",
                              adjust = "bonferroni")
  ct_df <- as.data.frame(summary(ct))
  ct_df$DV <- dv
  ph_race_table <- rbind(ph_race_table, ct_df)
}

cat("  Pairwise kontras antar kelompok ras (adjusted means):\n\n")

##   Pairwise kontras antar kelompok ras (adjusted means):

cat(sprintf("  %-10s | %-28s | %8s | %8s | %4s\n",
            "DV", "Kontras", "Estimate", "p_adj", "Sig"))

##   DV         | Kontras                      | Estimate |    p_adj |  Sig

cat(paste0("  ", strrep("-", 65), "\n"))

##   -----------------------------------------------------------------

for (i in seq_len(nrow(ph_race_table))) {
  r   <- ph_race_table[i, ]
  sig <- ifelse(r$p.value < 0.001, "***",
         ifelse(r$p.value < 0.01,  "**",
         ifelse(r$p.value < 0.05,  "*", "ns")))
  cat(sprintf("  %-10s | %-28s | %8.4f | %8.4f | %s\n",
              r$DV, r$contrast, r$estimate, r$p.value, sig))
}

##   happy      | Putih - Hitam                |  -0.1165 |   0.4360 | ns
##   happy      | Putih - Lainnya              |  -0.0682 |   1.0000 | ns
##   happy      | Hitam - Lainnya              |   0.0483 |   1.0000 | ns
##   health     | Putih - Hitam                |  -0.0209 |   1.0000 | ns
##   health     | Putih - Lainnya              |   0.1307 |   0.7420 | ns
##   health     | Hitam - Lainnya              |   0.1515 |   0.6524 | ns
##   life       | Putih - Hitam                |   0.0509 |   1.0000 | ns
##   life       | Putih - Lainnya              |   0.0128 |   1.0000 | ns
##   life       | Hitam - Lainnya              |  -0.0381 |   1.0000 | ns
##   satfin     | Putih - Hitam                |   0.0731 |   1.0000 | ns
##   satfin     | Putih - Lainnya              |  -0.1086 |   0.9487 | ns
##   satfin     | Hitam - Lainnya              |  -0.1817 |   0.3703 | ns
##   trust      | Putih - Hitam                |  -0.0679 |   0.9333 | ns
##   trust      | Putih - Lainnya              |   0.0004 |   1.0000 | ns
##   trust      | Hitam - Lainnya              |   0.0683 |   1.0000 | ns
##   helpful    | Putih - Hitam                |  -0.0654 |   1.0000 | ns
##   helpful    | Putih - Lainnya              |  -0.2156 |   0.0832 | ns
##   helpful    | Hitam - Lainnya              |  -0.1502 |   0.4737 | ns
##   fair       | Putih - Hitam                |   0.3106 |   0.0001 | ***
##   fair       | Putih - Lainnya              |   0.1138 |   0.7303 | ns
##   fair       | Hitam - Lainnya              |  -0.1969 |   0.1914 | ns

print_sub("Plot Adjusted Means per DV")

## 
## ─── Plot Adjusted Means per DV ───

# Sex
emm_sex_plot <- data.frame()
for (dv in all_dvs) {
  emm_df <- as.data.frame(summary(
    emmeans::emmeans(ancova_sex_models[[dv]], ~ sex)))
  em_col <- ifelse("emmean" %in% names(emm_df), "emmean", "response")
  emm_df$DV <- dv
  emm_df$emmean_val <- emm_df[[em_col]]
  emm_sex_plot <- rbind(emm_sex_plot,
                         emm_df[, c("sex", "DV", "emmean_val",
                                    "lower.CL", "upper.CL")])
}
p_emm_sex <- ggplot(emm_sex_plot,
                     aes(x = DV, y = emmean_val,
                         color = sex, group = sex)) +
  geom_point(size = 3.5, position = position_dodge(0.4)) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
                width = 0.2, linewidth = 0.9,
                position = position_dodge(0.4)) +
  scale_color_manual(values = c("#2980b9", "#e74c3c")) +
  labs(title    = "Adjusted Means (EMM) berdasarkan Sex",
       subtitle = "Dikontrol: age, educ, income | 95% CI",
       x = "Variabel Dependen", y = "Estimated Marginal Mean",
       color = "Sex") +
  theme_minimal(base_size = 11) +
  theme(axis.text.x  = element_text(angle = 30, hjust = 1),
        legend.position = "bottom")
print(p_emm_sex)

# Race
emm_race_plot <- data.frame()
for (dv in all_dvs) {
  emm_df <- as.data.frame(summary(
    emmeans::emmeans(ancova_race_models[[dv]], ~ race)))
  em_col <- ifelse("emmean" %in% names(emm_df), "emmean", "response")
  emm_df$DV <- dv
  emm_df$emmean_val <- emm_df[[em_col]]
  emm_race_plot <- rbind(emm_race_plot,
                          emm_df[, c("race", "DV", "emmean_val",
                                     "lower.CL", "upper.CL")])
}

p_emm_race <- ggplot(emm_race_plot,
                      aes(x = DV, y = emmean_val,
                          color = race, group = race)) +
  geom_point(size = 3.5, position = position_dodge(0.4)) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
                width = 0.2, linewidth = 0.9,
                position = position_dodge(0.4)) +
  scale_color_brewer(palette = "Set1") +
  labs(title    = "Adjusted Means (EMM) berdasarkan Race",
       subtitle = "Dikontrol: age, educ, income | 95% CI",
       x = "Variabel Dependen", y = "Estimated Marginal Mean",
       color = "Ras") +
  theme_minimal(base_size = 11) +
  theme(axis.text.x  = element_text(angle = 30, hjust = 1),
        legend.position = "bottom")
print(p_emm_race)

print_header("VISUALISASI DIAGNOSTIK MODEL")

## 
## ════════════════════════════════════════════════════════════
##   VISUALISASI DIAGNOSTIK MODEL
## ════════════════════════════════════════════════════════════

dv_plot <- if (length(dv_sex_sig) > 0) dv_sex_sig else all_dvs[1:2]
 
for (dv in dv_plot[1:min(2, length(dv_plot))]) {
  model <- ancova_sex_models[[dv]]
  par(mfrow = c(2, 2))
  plot(model, main = paste("Diagnostik ANCOVA:", dv))
  par(mfrow = c(1, 1))
}

MODUL 2

Fitri Saidatur Hanifah (035) dan Royhan Jinan (167)

2026-04-22

Load package

Load data

Seleksi variabel relevan

EDA AWAL

Struktur data

Statistik deskriptif

Statistik deskriptif per variabel

ANALISIS MISSING VALUE

jumlah dan persentase missing

Total missing value

PREPROCESSING

Recording nilai valid

Konversi tipe variabel

Handling missing value

Deteksi dan penanganan outlier

EDA LANJUTAN

Distribusi variabel kategorik

Distribusi variabel kontinu (kovariat)

Distribusi variabel dependen

Matriks korelasi variabel dependen

Korelasi variabel dependen dengan variabel kontinu

UJI ASUMSI MANOVA

Uji normalitas multivariat

Uji homogenitas Matrix Varians-Kovarians (Box’s M Test)

Uji Homogenitas varians univariat (Levene’s Test)

Uji normalitas Univariat per Kelompok (Shapiro-Wilk)

Uji Multikolinearitas antar variabel dependen

Deteksi Outlier Multivariat (Mahalanobis Distance)

MANOVA

load data clean

fungsi helper

MANOVA ONE WAY

sex

race

degree

MANOVA TWO WAY

sex vs race

MANCOVA

load data clean

fungsi helper

MANCOVA ONE WAY

sex

race

degree

MANCOVA TWO WAY

sex vs race

UJI ASUMSI ANCOVA

Normalitas Residual

Homogenitas Varians (Levene’s Test)

Homogenitas Slope Regresi

Linearitas Kovariat terhadap variabel dependen

Independensi kovariat

ANCOVA

sex

race

sex dan race

Effect size

Post-hoc EMMEANS