FFCWS update 3

# ============================================================
# FFCWS: Admixture Regression Analysis
# African Ancestry Sample (Both-Black + BW Parents)
# City-Weighted, HC1 Robust SEs
#
# Updates:
#  - WAIS: conservative filter (drop only confirmed FB/Spanish)
#  - Average WAIS = inclusive average of valid parent scores
#  - Child TVIP at age 3 exclusion
#  - 3-test g (PPVT, WJ9, WJ10); DS in LSEM/CFA only
#  - Individual parent WAIS: parent self-ID, full APA sample
#  - Average WAIS: couple SIRE, excl BW
#  - 3-component SES (education, poverty, income)
#  - LSEM + multi-group CFA measurement invariance
# ============================================================

library(haven)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(sandwich)
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(mice)

## 
## Attaching package: 'mice'

## The following object is masked from 'package:stats':
## 
##     filter

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

library(sirt)

## Warning: package 'sirt' was built under R version 4.5.3

## - sirt 4.2-133 (2025-09-27 12:57:51)

library(lavaan)

## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.5.2

library(mediation)

## Warning: package 'mediation' was built under R version 4.5.2

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: Matrix

## Loading required package: mvtnorm

## mediation: Causal Mediation Analysis
## Version: 4.5.1

# ==============================================================
# 1. Load Data
# ==============================================================

d <- read_sav("C:/Users/mh198/Documents/Data/FFCWS/DS0001/31622-0001-Data.sav")
PGS <- read_sav("C:/Users/mh198/Documents/Data/FFCWS//DS0007/31622-0007-Data.sav")

d <- merge(d, PGS[, c("IDNUM", "K5ADMIXA", "K5PC1A", "K5PC2A",
                       "K5PC3A", "K5PC4A", "K5PC5A")],
           by = "IDNUM", all.x = TRUE)

# ==============================================================
# 2. African Ancestry PGS Sample
# ==============================================================

d <- d[d$K5ADMIXA %in% c(0, 1), ]

# ==============================================================
# 3. Black Parent Identification
# ==============================================================

d$father_black_nh <- d$CF1ETHRACE == 2
d$mother_black_nh <- d$CM1ETHRACE == 2

d$father_black_hisp <- (d$CF1ETHRACE == 3) & (d$F1H3 == 2)
d$mother_black_hisp <- (d$CM1ETHRACE == 3) & (d$M1H3 == 2)
d$father_black_hisp[is.na(d$father_black_hisp)] <- FALSE
d$mother_black_hisp[is.na(d$mother_black_hisp)] <- FALSE

d$father_any_black <- d$father_black_nh | d$father_black_hisp
d$mother_any_black <- d$mother_black_nh | d$mother_black_hisp
d$any_black_parent <- d$father_any_black | d$mother_any_black

# ==============================================================
# 4. Continental African Exclusion
# ==============================================================

d$mother_african_born <- (d$M1H2 == 2) & (d$M1H2B == 1)
d$father_african_born <- (d$F1H2 == 2) & (d$F1H2B == 1)
d$mother_african_born[is.na(d$mother_african_born)] <- FALSE
d$father_african_born[is.na(d$father_african_born)] <- FALSE

d$gp_african <- (d$F3H1A == 101) | (d$F3H1B == 101) |
                (d$M3H1A == 101) | (d$M3H1B == 101)
d$gp_african[is.na(d$gp_african)] <- FALSE

d$african_origin <- d$mother_african_born | d$father_african_born | d$gp_african

# ==============================================================
# 5. Generation & SIRE Dummies
# ==============================================================

d$parent_fb <- (d$M1H2 == 2) | (d$F1H2 == 2)
d$parent_fb[is.na(d$parent_fb)] <- FALSE

d$gp_fb <- (d$F3H1A >= 101) | (d$F3H1B >= 101) |
           (d$M3H1A >= 101) | (d$M3H1B >= 101)
d$gp_fb[is.na(d$gp_fb)] <- FALSE

d$gen_2nd <- d$parent_fb & !d$african_origin
d$gen_3rd <- (!d$parent_fb) & d$gp_fb & !d$african_origin

d$sire <- NA_character_
d$sire[d$father_any_black & d$mother_any_black] <- "BB"
d$sire[(d$father_any_black & d$CM1ETHRACE == 1) |
       (d$CF1ETHRACE == 1 & d$mother_any_black)] <- "BW"
d$sire[(d$father_any_black & d$CM1ETHRACE == 3 & !d$mother_black_hisp) |
       (d$CF1ETHRACE == 3 & !d$father_black_hisp & d$mother_any_black)] <- "BH"
d$sire[d$any_black_parent & is.na(d$sire)] <- "BO"
d$sire[!d$any_black_parent] <- "None"
d$sire <- factor(d$sire, levels = c("BB", "BW", "BH", "BO", "None"))

# ==============================================================
# 6. Child TVIP Exclusion Flag
# ==============================================================

d$ch_tvip3_score <- ifelse(d$CH3TVIPSTD >= 0, d$CH3TVIPSTD, NA)
d$took_tvip3 <- as.numeric(!is.na(d$ch_tvip3_score))
cat("Child TVIP at age 3:", sum(d$took_tvip3), "cases flagged for exclusion\n")

## Child TVIP at age 3: 1 cases flagged for exclusion

# ==============================================================
# 7. Demographics & SES (3-component)
# ==============================================================

d$sex    <- ifelse(d$CM1BSEX %in% c(1, 2), d$CM1BSEX - 1, NA)
d$age    <- ifelse(d$CH5AGEM >= 0, d$CH5AGEM, NA)
d$single <- ifelse(d$CM1MARF == 0 & d$CM1COHF == 0, 1,
                   ifelse(d$CM1MARF %in% c(0, 1) & d$CM1COHF %in% c(0, 1), 0, NA))

d$M_edu <- NA
for (col in c("CM5EDU", "CM4EDU", "CM3EDU", "CM2EDU", "CM1EDU")) {
  d$M_edu <- ifelse(is.na(d$M_edu) & d[[col]] >= 1, d[[col]], d$M_edu)
}
d$F_edu <- NA
for (col in c("CF5EDU", "CF4EDU", "CF3EDU", "CF2EDU", "CF1EDU")) {
  d$F_edu <- ifelse(is.na(d$F_edu) & d[[col]] >= 1, d[[col]], d$F_edu)
}
d$mid_edu <- rowMeans(cbind(d$F_edu, d$M_edu), na.rm = TRUE)
d$mid_edu[is.na(d$F_edu) & is.na(d$M_edu)] <- NA

d$pov_bl  <- ifelse(d$CM1INPOV >= 0, d$CM1INPOV, NA)
d$pov_yr1 <- ifelse(d$CM2POVCO >= 0, d$CM2POVCO, NA)
d$pov_yr3 <- ifelse(d$CM3POVCO >= 0, d$CM3POVCO, NA)
d$pov_yr5 <- ifelse(d$CM4POVCO >= 0, d$CM4POVCO, NA)
d$pov_yr9 <- ifelse(d$CM5POVCO >= 0, d$CM5POVCO, NA)
d$ave_pov <- rowMeans(cbind(d$pov_bl, d$pov_yr1, d$pov_yr3, d$pov_yr5, d$pov_yr9), na.rm = TRUE)
d$ave_pov[is.na(d$pov_bl) & is.na(d$pov_yr1) & is.na(d$pov_yr3) & is.na(d$pov_yr5) & is.na(d$pov_yr9)] <- NA

inc_vars <- c("CM1HHINC", "CM2HHINC", "CM3HHINC", "CM4HHINC", "CM5HHINC")
inc_names <- c("inc_bl", "inc_yr1", "inc_yr3", "inc_yr5", "inc_yr9")
for (i in seq_along(inc_vars)) {
  d[[inc_names[i]]] <- ifelse(d[[inc_vars[i]]] >= 0, d[[inc_vars[i]]], NA)
  d[[paste0("log_", inc_names[i])]] <- log(pmax(d[[inc_names[i]]], 1))
}
d$ave_log_inc <- rowMeans(d[, paste0("log_", inc_names)], na.rm = TRUE)
d$ave_log_inc[rowSums(!is.na(d[, inc_names])) == 0] <- NA

ses_indicators <- data.frame(
  mid_edu     = as.vector(scale(d$mid_edu)),
  ave_pov     = as.vector(scale(d$ave_pov)),
  ave_log_inc = as.vector(scale(d$ave_log_inc))
)
n_ses_valid <- rowSums(!is.na(ses_indicators))
set.seed(54321)
ses_imp <- mice(ses_indicators, m = 5, method = "pmm", maxit = 10, printFlag = FALSE)
ses_imputed <- complete(ses_imp, action = 1)
d$SES_raw <- rowMeans(ses_imputed, na.rm = FALSE)
d$SES_raw[n_ses_valid == 0] <- NA

d$wt <- ifelse(d$K5CITYWT > 0, d$K5CITYWT, NA)

# ==============================================================
# 8. Child Cognitive Variables: g
# ==============================================================

d$PPVT <- ifelse(d$CH5PPVTSS >= 0, d$CH5PPVTSS, NA)
d$WJ9  <- ifelse(d$CH5WJ9SS >= 0, d$CH5WJ9SS, NA)
d$WJ10 <- ifelse(d$CH5WJ10SS >= 0, d$CH5WJ10SS, NA)
d$DS   <- ifelse(d$CH5DSSS >= 0, d$CH5DSSS, NA)

resid_fun <- function(y, data) {
  fit <- lm(y ~ age + I(age^2) + sex, data = data, na.action = na.exclude)
  as.vector(residuals(fit))
}

d$PPVT_r <- resid_fun(d$PPVT, d)
d$WJ9_r  <- resid_fun(d$WJ9, d)
d$WJ10_r <- resid_fun(d$WJ10, d)
d$DS_r   <- resid_fun(d$DS, d)

d$PPVT_z <- as.vector(scale(d$PPVT_r))
d$WJ9_z  <- as.vector(scale(d$WJ9_r))
d$WJ10_z <- as.vector(scale(d$WJ10_r))
d$DS_z   <- as.vector(scale(d$DS_r))

# 3-test g (DS kept for LSEM/CFA only)
cog_imp_data <- data.frame(
  PPVT_z = d$PPVT_z,
  WJ9_z  = d$WJ9_z,
  WJ10_z = d$WJ10_z
)

has_any <- rowSums(!is.na(cog_imp_data)) >= 1
set.seed(12345)
imp <- mice(cog_imp_data, m = 5, method = "norm", maxit = 10, printFlag = FALSE)
cog_imputed <- complete(imp, action = 1)
cog_for_pca <- as.matrix(cog_imputed[has_any, ])
pca_out <- prcomp(cog_for_pca, center = FALSE, scale. = FALSE)
d$g_raw <- NA
d$g_raw[has_any] <- pca_out$x[, 1]

cat("PCA loadings (3-test g):", round(pca_out$rotation[, 1], 3), "\n")

## PCA loadings (3-test g): 0.564 0.588 0.58

cat("Variance explained:", round(summary(pca_out)$importance[2, 1] * 100, 1), "%\n")

## Variance explained: 71.7 %

# ==============================================================
# 9. Parental WAIS: Conservative USB + English Filter
# ==============================================================

d$mother_wais_raw <- ifelse(d$CM3COGSC >= 0 & d$CM3COGSC <= 15, d$CM3COGSC, NA)
d$father_wais_raw <- ifelse(d$CF3COGSC >= 0 & d$CF3COGSC <= 15, d$CF3COGSC, NA)

d$mother_confirmed_fb   <- as.numeric(d$M1H2B %in% c(1, 2, 3, 4, 5))
d$father_confirmed_fb   <- as.numeric(d$F1H2B %in% c(1, 2, 3, 4, 5))
d$mother_confirmed_span <- as.numeric(d$CM3SPAN == 1)
d$father_confirmed_span <- as.numeric(d$CF3SPAN == 1)

d$mother_wais <- ifelse(d$mother_confirmed_fb == 1 | d$mother_confirmed_span == 1,
                        NA, d$mother_wais_raw)
d$father_wais <- ifelse(d$father_confirmed_fb == 1 | d$father_confirmed_span == 1,
                        NA, d$father_wais_raw)

cat("\nMother WAIS: total =", sum(!is.na(d$mother_wais_raw)),
    ", after filter =", sum(!is.na(d$mother_wais)),
    ", dropped =", sum(!is.na(d$mother_wais_raw)) - sum(!is.na(d$mother_wais)), "\n")

## 
## Mother WAIS: total = 1536 , after filter = 1480 , dropped = 56

cat("Father WAIS: total =", sum(!is.na(d$father_wais_raw)),
    ", after filter =", sum(!is.na(d$father_wais)),
    ", dropped =", sum(!is.na(d$father_wais_raw)) - sum(!is.na(d$father_wais)), "\n")

## Father WAIS: total = 1276 , after filter = 1231 , dropped = 45

# Age-residualize
d$mother_age <- ifelse(d$CM3AGE > 0, d$CM3AGE, NA)
d$father_age <- ifelse(d$CF3AGE > 0, d$CF3AGE, NA)

m_mask <- !is.na(d$mother_wais) & !is.na(d$mother_age)
if (sum(m_mask) > 10) {
  d$mother_age_c  <- d$mother_age - mean(d$mother_age[m_mask])
  d$mother_age_c2 <- d$mother_age_c^2
  d$M_WAIS_res <- NA_real_
  d$M_WAIS_res[m_mask] <- residuals(lm(mother_wais ~ mother_age_c + mother_age_c2,
                                        data = d[m_mask, ]))
}

f_mask <- !is.na(d$father_wais) & !is.na(d$father_age)
if (sum(f_mask) > 10) {
  d$father_age_c  <- d$father_age - mean(d$father_age[f_mask])
  d$father_age_c2 <- d$father_age_c^2
  d$F_WAIS_res <- NA_real_
  d$F_WAIS_res[f_mask] <- residuals(lm(father_wais ~ father_age_c + father_age_c2,
                                        data = d[f_mask, ]))
}

d$ave_WAIS_raw <- rowMeans(cbind(d$M_WAIS_res, d$F_WAIS_res), na.rm = TRUE)
d$ave_WAIS_raw[is.na(d$M_WAIS_res) & is.na(d$F_WAIS_res)] <- NA

cat("Average WAIS (filtered): N =", sum(!is.na(d$ave_WAIS_raw)), "\n")

## Average WAIS (filtered): N = 1533

# ==============================================================
# 10. Build Subsets (after TVIP exclusion)
# ==============================================================

d <- d[d$took_tvip3 == 0, ]
cat("After TVIP exclusion: N =", nrow(d), "\n")

## After TVIP exclusion: N = 1639

d$PC1A_z <- as.vector(scale(d$K5PC1A))

d_apa <- d[d$any_black_parent & !d$african_origin, ]
d_apa$sire <- droplevels(d_apa$sire)

# Parent self-ID flags for individual WAIS models
d_apa$mother_black_sire <- as.numeric(d_apa$CM1ETHRACE == 2)
d_apa$father_black_sire <- as.numeric(d_apa$CF1ETHRACE == 2)

d_apa_noBW <- d_apa[d_apa$sire != "BW", ]
d_apa_noBW$sire <- droplevels(d_apa_noBW$sire)

d_haa <- d[d$CF1ETHRACE == 2 & d$CM1ETHRACE == 2 &
           !d$african_origin & !d$parent_fb & !d$gp_fb, ]

cat("APA:", nrow(d_apa), " | APA excl BW:", nrow(d_apa_noBW), " | HAA:", nrow(d_haa), "\n")

## APA: 1588  | APA excl BW: 1516  | HAA: 1323

# ==============================================================
# 11. LSEM Analysis — Liu et al. (2025) approach
# Progressive invariance testing with bootstrap
# ==============================================================



# DS included for model identification (4 indicators, 2 df)
# Run on post-TVIP APA sample to match regression analyses
d_apa_cog <- d_apa[rowSums(!is.na(d_apa[, c("PPVT_z", "WJ9_z", "WJ10_z", "DS_z")])) >= 1, ]

grid_points <- quantile(d_apa_cog$PC1A_z,
                        probs = c(0.20, 0.35, 0.50, 0.65, 0.80),
                        na.rm = TRUE)

lsem_model <- 'g =~ PPVT_z + WJ9_z + WJ10_z + DS_z'

# ── Define invariance constraints ────────────────────────────────────────

# Metric (weak) — loadings invariant
weak_constraints <- c("g=~PPVT_z", "g=~WJ9_z", "g=~WJ10_z", "g=~DS_z")

# Scalar (strong) — loadings + intercepts invariant
strong_constraints <- c(weak_constraints,
                        "PPVT_z~1", "WJ9_z~1", "WJ10_z~1", "DS_z~1")

# Strict — loadings + intercepts + residual variances
strict_constraints <- c(strong_constraints,
                        "PPVT_z~~PPVT_z", "WJ9_z~~WJ9_z",
                        "WJ10_z~~WJ10_z", "DS_z~~DS_z")

# ── Configural ───────────────────────────────────────────────────────────

parallel::detectCores()

## [1] 32

lsem_config <- lsem.estimate(
  data = as.data.frame(d_apa_cog),
  moderator = "PC1A_z",
  moderator.grid = grid_points,
  lavmodel = lsem_model,
  h = 1.5,
  estimator = "ML",
  meanstructure = TRUE,
  standardized = TRUE,
  est_joint = TRUE
)

## ** Residualize Data
## ** Fit lavaan model
## |*****|
## |lavoptions         ... done.
## lavdata            ... done.
##   Number of observations per group:                   
##     Group 1                                        415
##     Group 2                                        673
##     Group 3                                        782
##     Group 4                                        793
##     Group 5                                        707
## lavsamplestats ... done.
## lavh1              ... start:
## lavh1              ... done.
## lavpartable bounds ... done.
## lavstart           ... done.
## lavmodel           ... done.
## lavoptim           ... start:
## attempt 1 -- default options
##   objective function  = 0.0738287418959242
##   quasi-Newton steps using NLMINB:
##   objective function  = 0.0738287418959242
##   objective function  = 0.0598101428080710
##   objective function  = 0.0272600001103062
##   objective function  = 0.0212466671924765
##   objective function  = 0.0164817105615486
##   objective function  = 0.0146555981910506
##   objective function  = 0.0134640527587602
##   objective function  = 0.0120450778257396
##   objective function  = 0.0111114082610116
##   objective function  = 0.0103842179662563
##   objective function  = 0.0099169893262600
##   objective function  = 0.0095414554179040
##   objective function  = 0.0092043224558610
##   objective function  = 0.0088560381300777
##   objective function  = 0.0086710435845395
##   objective function  = 0.0085470757202159
##   objective function  = 0.0084747339400978
##   objective function  = 0.0084164424263215
##   objective function  = 0.0083712735479592
##   objective function  = 0.0083354584727222
##   objective function  = 0.0083055661345523
##   objective function  = 0.0082787958134606
##   objective function  = 0.0082528837858022
##   objective function  = 0.0082279510463489
##   objective function  = 0.0082056437113315
##   objective function  = 0.0081888682705170
##   objective function  = 0.0081774151733347
##   objective function  = 0.0081670357714019
##   objective function  = 0.0081578120412134
##   objective function  = 0.0081502376072646
##   objective function  = 0.0081457014215777
##   objective function  = 0.0081429285617339
##   objective function  = 0.0081405099179554
##   objective function  = 0.0081382019354723
##   objective function  = 0.0081356263182086
##   objective function  = 0.0081341831132384
##   objective function  = 0.0081330758252586
##   objective function  = 0.0081323577340175
##   objective function  = 0.0081317153966488
##   objective function  = 0.0081311599940430
##   objective function  = 0.0081305173603691
##   objective function  = 0.0081300265687728
##   objective function  = 0.0081295696584909
##   objective function  = 0.0081292733296530
##   objective function  = 0.0081290723806016
##   objective function  = 0.0081289199258578
##   objective function  = 0.0081288115527348
##   objective function  = 0.0081287394835312
##   objective function  = 0.0081286894269027
##   objective function  = 0.0081286501022387
##   objective function  = 0.0081286206606698
##   objective function  = 0.0081286017375926
##   objective function  = 0.0081285895849612
##   objective function  = 0.0081285794627587
##   objective function  = 0.0081285704435491
##   objective function  = 0.0081285603182290
##   objective function  = 0.0081285549490478
##   objective function  = 0.0081285517222526
##   objective function  = 0.0081285502788854
##   objective function  = 0.0081285491366399
##   objective function  = 0.0081285482035613
##   objective function  = 0.0081285473129795
##   objective function  = 0.0081285467962137
##   objective function  = 0.0081285464321888
##   objective function  = 0.0081285462418201
##   objective function  = 0.0081285461180620
##   objective function  = 0.0081285460319544
##   objective function  = 0.0081285459611770
##   objective function  = 0.0081285459050836
##   objective function  = 0.0081285458698432
##   objective function  = 0.0081285458488869
##   objective function  = 0.0081285458365467
##   objective function  = 0.0081285458285124
##   objective function  = 0.0081285458235066
##   objective function  = 0.0081285458201861
##   objective function  = 0.0081285458177103
##   objective function  = 0.0081285458159828
##   objective function  = 0.0081285458151327
##   objective function  = 0.0081285458151327
##   convergence status (0=ok):  0 
##   nlminb message says:  relative convergence (4) 
##   number of iterations:  77 
##   number of function evaluations [objective, gradient]:  78 78 
## lavoptim    ... done.
## lavimplied  ... done.
## lavloglik   ... done.
## computing TEST for test(s) = standard ... done.
## lavbaseline ... done.
## post check  ... done.
## 
## Use lsem.bootstrap() for computing standard errors!
## 
## -|
## ** Parameter summary

summary(lsem_config)

## -----------------------------------------------------------------
## Local Structural Equation Model 
## 
## sirt 4.2-133 (2025-09-27 12:57:51) 
## lavaan 0.6-19 (2024-09-26 15:50:14 UTC) 
## 
## R version 4.5.1 (2025-06-13 ucrt) x86_64, mingw32 | nodename=HUANG | login=mh198 
## 
## Function 'sirt::lsem.estimate', type='LSEM' 
## 
## 
## Call:
## lsem.estimate(data = as.data.frame(d_apa_cog), moderator = "PC1A_z", 
##     moderator.grid = grid_points, lavmodel = lsem_model, h = 1.5, 
##     standardized = TRUE, est_joint = TRUE, estimator = "ML", 
##     meanstructure = TRUE)
## 
## Date of Analysis: 2026-06-06 23:46:19.630104 
## Time difference of 4.896395 secs
## Computation Time: 4.896395 
## 
## Number of observations in datasets = 1538 
## Used observations in analysis = 1524 
## Used sampling weights: FALSE 
## Bandwidth factor = 1.5 
## Bandwidth = 0.294 
## Number of focal points for moderator = 5 
## 
## Used joint estimation: TRUE 
## Used sufficient statistics: TRUE 
## Used local linear smoothing: TRUE 
## Used pseudo weights: FALSE 
## Used lavaan package: TRUE 
## Used lavaan.survey package: FALSE 
## 
## Mean structure modelled: TRUE 
## 
## lavaan Model
## g =~ PPVT_z + WJ9_z + WJ10_z + DS_z
## 
## Global Fit Statistics for Joint Estimation
## 
##    stat value
## 1 rmsea 0.082
## 2   cfi 0.989
## 3   tli 0.967
## 4   gfi 0.992
## 5  srmr 0.020
## 
## 
## Parameter Estimate Summary
## 
##                    par parindex      M    SD   MAD    Min   Max lin_int lin_slo
## 1            g=~PPVT_z        1  1.000 0.000 0.000  1.000 1.000   1.000   0.000
## 2             g=~WJ9_z        2  1.312 0.086 0.078  1.210 1.435   1.410  -0.311
## 3            g=~WJ10_z        3  1.256 0.057 0.044  1.206 1.401   1.322  -0.211
## 4              g=~DS_z        4  0.875 0.031 0.023  0.856 0.958   0.908  -0.105
## 5       PPVT_z~~PPVT_z        5  0.568 0.087 0.075  0.475 0.736   0.671  -0.326
## 6         WJ9_z~~WJ9_z        6  0.322 0.023 0.014  0.301 0.386   0.347  -0.081
## 7       WJ10_z~~WJ10_z        7  0.403 0.021 0.018  0.354 0.421   0.399   0.012
## 8           DS_z~~DS_z        8  0.718 0.023 0.019  0.668 0.741   0.692   0.081
## 9                 g~~g        9  0.398 0.036 0.030  0.326 0.435   0.356   0.134
## 10            PPVT_z~1       10 -0.001 0.004 0.003 -0.008 0.003  -0.001  -0.001
## 11             WJ9_z~1       11  0.001 0.003 0.002 -0.002 0.008   0.004  -0.007
## 12            WJ10_z~1       12 -0.002 0.005 0.004 -0.007 0.004   0.001  -0.009
## 13              DS_z~1       13 -0.001 0.002 0.001 -0.004 0.001  -0.001   0.000
## 14                 g~1       14  0.000 0.000 0.000  0.000 0.000   0.000   0.000
## 15      std__g=~PPVT_z       15  0.643 0.045 0.039  0.554 0.691   0.589   0.171
## 16       std__g=~WJ9_z       16  0.824 0.010 0.005  0.797 0.832   0.819   0.017
## 17      std__g=~WJ10_z       17  0.780 0.011 0.009  0.769 0.802   0.780  -0.001
## 18        std__g=~DS_z       18  0.545 0.006 0.005  0.538 0.556   0.545   0.000
## 19 std__PPVT_z~~PPVT_z       19  0.585 0.057 0.050  0.522 0.693   0.653  -0.215
## 20   std__WJ9_z~~WJ9_z       20  0.321 0.016 0.009  0.308 0.366   0.329  -0.027
## 21 std__WJ10_z~~WJ10_z       21  0.392 0.017 0.014  0.357 0.409   0.391   0.002
## 22     std__DS_z~~DS_z       22  0.703 0.006 0.006  0.691 0.710   0.703   0.000
## 23           std__g~~g       23  1.000 0.000 0.000  1.000 1.000   1.000   0.000
## 24       std__PPVT_z~1       24 -0.001 0.003 0.003 -0.008 0.003  -0.001  -0.001
## 25        std__WJ9_z~1       25  0.001 0.003 0.002 -0.002 0.008   0.004  -0.007
## 26       std__WJ10_z~1       26 -0.002 0.005 0.004 -0.007 0.004   0.001  -0.008
## 27         std__DS_z~1       27 -0.001 0.002 0.001 -0.004 0.001  -0.001   0.000
## 28            std__g~1       28  0.000 0.000 0.000  0.000 0.000   0.000   0.000
##    SD_nonlin
## 1      0.000
## 2      0.025
## 3      0.015
## 4      0.013
## 5      0.013
## 6      0.010
## 7      0.021
## 8      0.009
## 9      0.004
## 10     0.004
## 11     0.003
## 12     0.004
## 13     0.002
## 14     0.000
## 15     0.006
## 16     0.009
## 17     0.011
## 18     0.006
## 19     0.008
## 20     0.014
## 21     0.017
## 22     0.006
## 23     0.000
## 24     0.003
## 25     0.002
## 26     0.004
## 27     0.002
## 28     0.000
## 
## Distribution of Moderator: Density and Effective Sample Size
## 
## M=0.062 | SD=0.851
## 
##   moderator   wgt    Neff
## 1    -0.246 0.100 419.601
## 2     0.070 0.167 679.165
## 3     0.264 0.233 789.229
## 4     0.453 0.259 800.087
## 5     0.619 0.241 713.065
## 
##    variable       M      SD     min     max
## 1 moderator   0.062   0.851  -4.930   1.448
## 2       wgt   0.200   0.066   0.100   0.259
## 3      Neff 680.229 154.315 419.601 800.087

b_config <- lsem.bootstrap(lsem_config, R = 50, n.core = 14)

## Compute LSEM for bootstrap samples:

## Warning in sqrt(w): NaNs produced

# ── Metric (weak) ────────────────────────────────────────────────────────

lsem_weak <- lsem.estimate(
  data = as.data.frame(d_apa_cog),
  moderator = "PC1A_z",
  moderator.grid = grid_points,
  lavmodel = lsem_model,
  h = 1.5,
  estimator = "ML",
  meanstructure = TRUE,
  standardized = TRUE,
  par_invariant = weak_constraints
)

## Use joint estimation
## ** Residualize Data
## ** Fit lavaan model
## |*****|
## |lavoptions         ... done.
## lavdata            ... done.
##   Number of observations per group:                   
##     Group 1                                        415
##     Group 2                                        673
##     Group 3                                        782
##     Group 4                                        793
##     Group 5                                        707
## lavsamplestats ... done.
## lavh1              ... start:
## lavh1              ... done.
## lavpartable bounds ... done.
## lavstart           ... done.
## lavmodel           ... done.
## lavoptim           ... start:
## attempt 1 -- default options
##   objective function  = 0.0738287418959240
##   quasi-Newton steps using NLMINB:
##   objective function  = 0.0738287418959240
##   objective function  = 0.0599013208206976
##   objective function  = 0.0269691804103185
##   objective function  = 0.0212689827215762
##   objective function  = 0.0167864214592230
##   objective function  = 0.0146621571547316
##   objective function  = 0.0135316330651613
##   objective function  = 0.0121010533431593
##   objective function  = 0.0112181943917189
##   objective function  = 0.0105390063987082
##   objective function  = 0.0101586882120909
##   objective function  = 0.0098461889534032
##   objective function  = 0.0095837421883159
##   objective function  = 0.0093705181903796
##   objective function  = 0.0092380702345748
##   objective function  = 0.0091564956161002
##   objective function  = 0.0091164433604852
##   objective function  = 0.0090997662457236
##   objective function  = 0.0090926561198851
##   objective function  = 0.0090893382565386
##   objective function  = 0.0090875587474185
##   objective function  = 0.0090865396129202
##   objective function  = 0.0090859427304812
##   objective function  = 0.0090855765766150
##   objective function  = 0.0090853643524534
##   objective function  = 0.0090852640158022
##   objective function  = 0.0090852234829650
##   objective function  = 0.0090852072199611
##   objective function  = 0.0090852008363753
##   objective function  = 0.0090851982579872
##   objective function  = 0.0090851967559427
##   objective function  = 0.0090851956246522
##   objective function  = 0.0090851949722973
##   objective function  = 0.0090851947207863
##   objective function  = 0.0090851946320781
##   objective function  = 0.0090851945868091
##   objective function  = 0.0090851945615698
##   objective function  = 0.0090851945501115
##   objective function  = 0.0090851945449507
##   objective function  = 0.0090851945421712
##   objective function  = 0.0090851945406734
##   objective function  = 0.0090851945400383
##   objective function  = 0.0090851945400383
##   convergence status (0=ok):  0 
##   nlminb message says:  relative convergence (4) 
##   number of iterations:  41 
##   number of function evaluations [objective, gradient]:  42 42 
## lavoptim    ... done.
## lavimplied  ... done.
## lavloglik   ... done.
## computing TEST for test(s) = standard ... done.
## lavbaseline ... done.
## post check  ... done.
## 
## Use lsem.bootstrap() for computing standard errors!
## 
## -|
## ** Parameter summary

summary(lsem_weak)

## -----------------------------------------------------------------
## Local Structural Equation Model 
## 
## sirt 4.2-133 (2025-09-27 12:57:51) 
## lavaan 0.6-19 (2024-09-26 15:50:14 UTC) 
## 
## R version 4.5.1 (2025-06-13 ucrt) x86_64, mingw32 | nodename=HUANG | login=mh198 
## 
## Function 'sirt::lsem.estimate', type='LSEM' 
## 
## 
## Call:
## lsem.estimate(data = as.data.frame(d_apa_cog), moderator = "PC1A_z", 
##     moderator.grid = grid_points, lavmodel = lsem_model, h = 1.5, 
##     standardized = TRUE, par_invariant = weak_constraints, estimator = "ML", 
##     meanstructure = TRUE)
## 
## Date of Analysis: 2026-06-06 23:59:32.040555 
## Time difference of 5.47321 secs
## Computation Time: 5.47321 
## 
## Number of observations in datasets = 1538 
## Used observations in analysis = 1524 
## Used sampling weights: FALSE 
## Bandwidth factor = 1.5 
## Bandwidth = 0.294 
## Number of focal points for moderator = 5 
## 
## Used joint estimation: TRUE 
## Used sufficient statistics: TRUE 
## Used local linear smoothing: TRUE 
## Used pseudo weights: FALSE 
## Used lavaan package: TRUE 
## Used lavaan.survey package: FALSE 
## 
## Mean structure modelled: TRUE 
## 
## lavaan Model
## g =~ PPVT_z + WJ9_z + WJ10_z + DS_z
## 
## Global Fit Statistics for Joint Estimation
## 
##    stat value
## 1 rmsea 0.051
## 2   cfi 0.990
## 3   tli 0.987
## 4   gfi 0.991
## 5  srmr 0.024
## 
## 
## Parameter Estimate Summary
## 
##                    par parindex      M    SD   MAD    Min   Max lin_int lin_slo
## 1            g=~PPVT_z        1  1.000 0.000 0.000  1.000 1.000   1.000   0.000
## 2             g=~WJ9_z        2  1.309 0.000 0.000  1.309 1.309   1.309   0.000
## 3            g=~WJ10_z        3  1.253 0.000 0.000  1.253 1.253   1.253   0.000
## 4              g=~DS_z        4  0.872 0.000 0.000  0.872 0.872   0.872   0.000
## 5       PPVT_z~~PPVT_z        5  0.569 0.079 0.068  0.486 0.723   0.663  -0.298
## 6         WJ9_z~~WJ9_z        6  0.322 0.032 0.028  0.283 0.383   0.360  -0.119
## 7       WJ10_z~~WJ10_z        7  0.403 0.014 0.011  0.367 0.415   0.395   0.023
## 8           DS_z~~DS_z        8  0.718 0.024 0.020  0.671 0.742   0.691   0.086
## 9                 g~~g        9  0.397 0.004 0.003  0.388 0.400   0.395   0.007
## 10            PPVT_z~1       10 -0.001 0.004 0.003 -0.008 0.003  -0.001  -0.001
## 11             WJ9_z~1       11  0.001 0.003 0.002 -0.002 0.008   0.004  -0.007
## 12            WJ10_z~1       12 -0.002 0.005 0.004 -0.007 0.004   0.001  -0.009
## 13              DS_z~1       13 -0.001 0.002 0.001 -0.004 0.001  -0.001   0.000
## 14                 g~1       14  0.000 0.000 0.000  0.000 0.000   0.000   0.000
## 15      std__g=~PPVT_z       15  0.643 0.026 0.023  0.591 0.670   0.612   0.099
## 16       std__g=~WJ9_z       16  0.824 0.014 0.012  0.796 0.840   0.808   0.051
## 17      std__g=~WJ10_z       17  0.780 0.004 0.003  0.776 0.790   0.782  -0.006
## 18        std__g=~DS_z       18  0.544 0.006 0.005  0.539 0.553   0.551  -0.020
## 19 std__PPVT_z~~PPVT_z       19  0.586 0.033 0.029  0.551 0.651   0.626  -0.126
## 20   std__WJ9_z~~WJ9_z       20  0.321 0.022 0.019  0.294 0.366   0.347  -0.084
## 21 std__WJ10_z~~WJ10_z       21  0.392 0.006 0.005  0.376 0.397   0.389   0.010
## 22     std__DS_z~~DS_z       22  0.704 0.006 0.006  0.694 0.709   0.697   0.022
## 23           std__g~~g       23  1.000 0.000 0.000  1.000 1.000   1.000   0.000
## 24       std__PPVT_z~1       24 -0.001 0.003 0.003 -0.008 0.003  -0.001  -0.001
## 25        std__WJ9_z~1       25  0.001 0.003 0.002 -0.002 0.008   0.004  -0.007
## 26       std__WJ10_z~1       26 -0.002 0.005 0.004 -0.007 0.004   0.001  -0.008
## 27         std__DS_z~1       27 -0.001 0.002 0.001 -0.004 0.001  -0.001   0.000
## 28            std__g~1       28  0.000 0.000 0.000  0.000 0.000   0.000   0.000
##    SD_nonlin
## 1      0.000
## 2      0.000
## 3      0.000
## 4      0.000
## 5      0.012
## 6      0.004
## 7      0.013
## 8      0.009
## 9      0.003
## 10     0.004
## 11     0.003
## 12     0.004
## 13     0.002
## 14     0.000
## 15     0.004
## 16     0.001
## 17     0.004
## 18     0.002
## 19     0.005
## 20     0.002
## 21     0.006
## 22     0.002
## 23     0.000
## 24     0.003
## 25     0.003
## 26     0.004
## 27     0.002
## 28     0.000
## 
## Distribution of Moderator: Density and Effective Sample Size
## 
## M=0.062 | SD=0.851
## 
##   moderator   wgt    Neff
## 1    -0.246 0.100 419.601
## 2     0.070 0.167 679.165
## 3     0.264 0.233 789.229
## 4     0.453 0.259 800.087
## 5     0.619 0.241 713.065
## 
##    variable       M      SD     min     max
## 1 moderator   0.062   0.851  -4.930   1.448
## 2       wgt   0.200   0.066   0.100   0.259
## 3      Neff 680.229 154.315 419.601 800.087

b_weak <- lsem.bootstrap(lsem_weak, R = 50, n.core = 14)

## Compute LSEM for bootstrap samples:

## Warning in sqrt(w): NaNs produced

# ── Scalar (strong) ──────────────────────────────────────────────────────

lsem_strong <- lsem.estimate(
  data = as.data.frame(d_apa_cog),
  moderator = "PC1A_z",
  moderator.grid = grid_points,
  lavmodel = lsem_model,
  h = 1.5,
  estimator = "ML",
  meanstructure = TRUE,
  standardized = TRUE,
  par_invariant = strong_constraints
)

## Use joint estimation
## ** Residualize Data
## ** Fit lavaan model
## |*****|
## |lavoptions         ... done.
## lavdata            ... done.
##   Number of observations per group:                   
##     Group 1                                        415
##     Group 2                                        673
##     Group 3                                        782
##     Group 4                                        793
##     Group 5                                        707
## lavsamplestats ... done.
## lavh1              ... start:
## lavh1              ... done.
## lavpartable bounds ... done.
## lavstart           ... done.
## lavmodel           ... done.
## lavoptim           ... start:
## attempt 1 -- default options
##   objective function  = 0.0738287418959241
##   quasi-Newton steps using NLMINB:
##   objective function  = 0.0738287418959241
##   objective function  = 0.0576625777807168
##   objective function  = 0.0278663165629487
##   objective function  = 0.0219230937999248
##   objective function  = 0.0157915131715109
##   objective function  = 0.0145048683825754
##   objective function  = 0.0133541406980544
##   objective function  = 0.0116893750308125
##   objective function  = 0.0106173013333035
##   objective function  = 0.0097660150077022
##   objective function  = 0.0095128522174340
##   objective function  = 0.0093379163130080
##   objective function  = 0.0092289992042474
##   objective function  = 0.0091717272777997
##   objective function  = 0.0091427236539329
##   objective function  = 0.0091250148332232
##   objective function  = 0.0091141639064793
##   objective function  = 0.0091090729643519
##   objective function  = 0.0091069585815648
##   objective function  = 0.0091059798140988
##   objective function  = 0.0091055145710012
##   objective function  = 0.0091053252494740
##   objective function  = 0.0091052503567130
##   objective function  = 0.0091052125224359
##   objective function  = 0.0091051920715713
##   objective function  = 0.0091051830298506
##   objective function  = 0.0091051800968047
##   objective function  = 0.0091051793733580
##   objective function  = 0.0091051791858574
##   objective function  = 0.0091051791149114
##   objective function  = 0.0091051790858061
##   objective function  = 0.0091051790756123
##   objective function  = 0.0091051790713540
##   objective function  = 0.0091051790691325
##   objective function  = 0.0091051790681833
##   objective function  = 0.0091051790681833
##   convergence status (0=ok):  0 
##   nlminb message says:  relative convergence (4) 
##   number of iterations:  34 
##   number of function evaluations [objective, gradient]:  35 35 
## lavoptim    ... done.
## lavimplied  ... done.
## lavloglik   ... done.
## computing TEST for test(s) = standard ... done.
## lavbaseline ... done.
## post check  ... done.
## 
## Use lsem.bootstrap() for computing standard errors!
## 
## -|
## ** Parameter summary

summary(lsem_strong)

## -----------------------------------------------------------------
## Local Structural Equation Model 
## 
## sirt 4.2-133 (2025-09-27 12:57:51) 
## lavaan 0.6-19 (2024-09-26 15:50:14 UTC) 
## 
## R version 4.5.1 (2025-06-13 ucrt) x86_64, mingw32 | nodename=HUANG | login=mh198 
## 
## Function 'sirt::lsem.estimate', type='LSEM' 
## 
## 
## Call:
## lsem.estimate(data = as.data.frame(d_apa_cog), moderator = "PC1A_z", 
##     moderator.grid = grid_points, lavmodel = lsem_model, h = 1.5, 
##     standardized = TRUE, par_invariant = strong_constraints, 
##     estimator = "ML", meanstructure = TRUE)
## 
## Date of Analysis: 2026-06-07 00:14:00.661305 
## Time difference of 4.61526 secs
## Computation Time: 4.61526 
## 
## Number of observations in datasets = 1538 
## Used observations in analysis = 1524 
## Used sampling weights: FALSE 
## Bandwidth factor = 1.5 
## Bandwidth = 0.294 
## Number of focal points for moderator = 5 
## 
## Used joint estimation: TRUE 
## Used sufficient statistics: TRUE 
## Used local linear smoothing: TRUE 
## Used pseudo weights: FALSE 
## Used lavaan package: TRUE 
## Used lavaan.survey package: FALSE 
## 
## Mean structure modelled: TRUE 
## 
## lavaan Model
## g =~ PPVT_z + WJ9_z + WJ10_z + DS_z
## 
## Global Fit Statistics for Joint Estimation
## 
##    stat value
## 1 rmsea 0.030
## 2   cfi 0.994
## 3   tli 0.995
## 4   gfi 0.991
## 5  srmr 0.024
## 
## 
## Parameter Estimate Summary
## 
##                    par parindex      M    SD   MAD    Min    Max lin_int
## 1            g=~PPVT_z        1  1.000 0.000 0.000  1.000  1.000   1.000
## 2             g=~WJ9_z        2  1.309 0.000 0.000  1.309  1.309   1.309
## 3            g=~WJ10_z        3  1.253 0.000 0.000  1.253  1.253   1.253
## 4              g=~DS_z        4  0.872 0.000 0.000  0.872  0.872   0.872
## 5       PPVT_z~~PPVT_z        5  0.569 0.079 0.068  0.486  0.723   0.663
## 6         WJ9_z~~WJ9_z        6  0.322 0.032 0.028  0.283  0.383   0.360
## 7       WJ10_z~~WJ10_z        7  0.403 0.014 0.011  0.367  0.415   0.395
## 8           DS_z~~DS_z        8  0.718 0.024 0.020  0.671  0.742   0.691
## 9                 g~~g        9  0.397 0.004 0.003  0.388  0.400   0.395
## 10            PPVT_z~1       10 -0.001 0.000 0.000 -0.001 -0.001  -0.001
## 11             WJ9_z~1       11  0.002 0.000 0.000  0.002  0.002   0.002
## 12            WJ10_z~1       12 -0.002 0.000 0.000 -0.002 -0.002  -0.002
## 13              DS_z~1       13 -0.001 0.000 0.000 -0.001 -0.001  -0.001
## 14                 g~1       14  0.000 0.000 0.000  0.000  0.000   0.000
## 15      std__g=~PPVT_z       15  0.643 0.026 0.023  0.591  0.670   0.612
## 16       std__g=~WJ9_z       16  0.824 0.014 0.012  0.796  0.840   0.808
## 17      std__g=~WJ10_z       17  0.780 0.004 0.003  0.776  0.790   0.782
## 18        std__g=~DS_z       18  0.544 0.006 0.005  0.539  0.553   0.551
## 19 std__PPVT_z~~PPVT_z       19  0.586 0.033 0.029  0.551  0.651   0.626
## 20   std__WJ9_z~~WJ9_z       20  0.321 0.022 0.019  0.294  0.366   0.347
## 21 std__WJ10_z~~WJ10_z       21  0.392 0.006 0.005  0.376  0.397   0.389
## 22     std__DS_z~~DS_z       22  0.704 0.006 0.006  0.694  0.709   0.697
## 23           std__g~~g       23  1.000 0.000 0.000  1.000  1.000   1.000
## 24       std__PPVT_z~1       24 -0.001 0.000 0.000 -0.001 -0.001  -0.001
## 25        std__WJ9_z~1       25  0.002 0.000 0.000  0.002  0.002   0.002
## 26       std__WJ10_z~1       26 -0.002 0.000 0.000 -0.002 -0.002  -0.002
## 27         std__DS_z~1       27 -0.001 0.000 0.000 -0.001 -0.001  -0.001
## 28            std__g~1       28  0.000 0.000 0.000  0.000  0.000   0.000
##    lin_slo SD_nonlin
## 1    0.000     0.000
## 2    0.000     0.000
## 3    0.000     0.000
## 4    0.000     0.000
## 5   -0.298     0.012
## 6   -0.119     0.004
## 7    0.023     0.013
## 8    0.086     0.009
## 9    0.007     0.003
## 10   0.000     0.000
## 11   0.000     0.000
## 12   0.000     0.000
## 13   0.000     0.000
## 14   0.000     0.000
## 15   0.099     0.004
## 16   0.051     0.001
## 17  -0.006     0.004
## 18  -0.020     0.002
## 19  -0.126     0.005
## 20  -0.084     0.002
## 21   0.010     0.006
## 22   0.022     0.002
## 23   0.000     0.000
## 24   0.000     0.000
## 25   0.000     0.000
## 26   0.000     0.000
## 27   0.000     0.000
## 28   0.000     0.000
## 
## Distribution of Moderator: Density and Effective Sample Size
## 
## M=0.062 | SD=0.851
## 
##   moderator   wgt    Neff
## 1    -0.246 0.100 419.601
## 2     0.070 0.167 679.165
## 3     0.264 0.233 789.229
## 4     0.453 0.259 800.087
## 5     0.619 0.241 713.065
## 
##    variable       M      SD     min     max
## 1 moderator   0.062   0.851  -4.930   1.448
## 2       wgt   0.200   0.066   0.100   0.259
## 3      Neff 680.229 154.315 419.601 800.087

b_strong <- lsem.bootstrap(lsem_strong, R = 50, n.core = 14)

## Compute LSEM for bootstrap samples:

## Warning in sqrt(w): NaNs produced

# ── Strict ───────────────────────────────────────────────────────────────

lsem_strict <- lsem.estimate(
  data = as.data.frame(d_apa_cog),
  moderator = "PC1A_z",
  moderator.grid = grid_points,
  lavmodel = lsem_model,
  h = 1.5,
  estimator = "ML",
  meanstructure = TRUE,
  standardized = TRUE,
  par_invariant = strict_constraints
)

## Use joint estimation
## ** Residualize Data
## ** Fit lavaan model
## |*****|
## |lavoptions         ... done.
## lavdata            ... done.
##   Number of observations per group:                   
##     Group 1                                        415
##     Group 2                                        673
##     Group 3                                        782
##     Group 4                                        793
##     Group 5                                        707
## lavsamplestats ... done.
## lavh1              ... start:
## lavh1              ... done.
## lavpartable bounds ... done.
## lavstart           ... done.
## lavmodel           ... done.
## lavoptim           ... start:
## attempt 1 -- default options
##   objective function  = 0.0738287418959241
##   quasi-Newton steps using NLMINB:
##   objective function  = 0.0738287418959241
##   objective function  = 0.0607208849850779
##   objective function  = 0.0460368580072337
##   objective function  = 0.0282922463973151
##   objective function  = 0.0235982225693757
##   objective function  = 0.0204433819236764
##   objective function  = 0.0173405990967450
##   objective function  = 0.0166501691167717
##   objective function  = 0.0151989219212067
##   objective function  = 0.0149124921074795
##   objective function  = 0.0146599525541600
##   objective function  = 0.0144754567668852
##   objective function  = 0.0143742758813437
##   objective function  = 0.0143158134569081
##   objective function  = 0.0142990744494216
##   objective function  = 0.0142927067858834
##   objective function  = 0.0142889041360845
##   objective function  = 0.0142874893023398
##   objective function  = 0.0142871004911391
##   objective function  = 0.0142868849123034
##   objective function  = 0.0142867022819103
##   objective function  = 0.0142865674403357
##   objective function  = 0.0142865429770247
##   objective function  = 0.0142865385057427
##   objective function  = 0.0142865381148845
##   objective function  = 0.0142865380301765
##   objective function  = 0.0142865380114903
##   objective function  = 0.0142865379976737
##   objective function  = 0.0142865379870640
##   objective function  = 0.0142865379808151
##   objective function  = 0.0142865379788037
##   objective function  = 0.0142865379781723
##   objective function  = 0.0142865379781723
##   convergence status (0=ok):  0 
##   nlminb message says:  relative convergence (4) 
##   number of iterations:  31 
##   number of function evaluations [objective, gradient]:  32 32 
## lavoptim    ... done.
## lavimplied  ... done.
## lavloglik   ... done.
## computing TEST for test(s) = standard ... done.
## lavbaseline ... done.
## post check  ... done.
## 
## Use lsem.bootstrap() for computing standard errors!
## 
## -|
## ** Parameter summary

summary(lsem_strict)

## -----------------------------------------------------------------
## Local Structural Equation Model 
## 
## sirt 4.2-133 (2025-09-27 12:57:51) 
## lavaan 0.6-19 (2024-09-26 15:50:14 UTC) 
## 
## R version 4.5.1 (2025-06-13 ucrt) x86_64, mingw32 | nodename=HUANG | login=mh198 
## 
## Function 'sirt::lsem.estimate', type='LSEM' 
## 
## 
## Call:
## lsem.estimate(data = as.data.frame(d_apa_cog), moderator = "PC1A_z", 
##     moderator.grid = grid_points, lavmodel = lsem_model, h = 1.5, 
##     standardized = TRUE, par_invariant = strict_constraints, 
##     estimator = "ML", meanstructure = TRUE)
## 
## Date of Analysis: 2026-06-07 00:27:23.443818 
## Time difference of 10.26681 secs
## Computation Time: 10.26681 
## 
## Number of observations in datasets = 1538 
## Used observations in analysis = 1524 
## Used sampling weights: FALSE 
## Bandwidth factor = 1.5 
## Bandwidth = 0.294 
## Number of focal points for moderator = 5 
## 
## Used joint estimation: TRUE 
## Used sufficient statistics: TRUE 
## Used local linear smoothing: TRUE 
## Used pseudo weights: FALSE 
## Used lavaan package: TRUE 
## Used lavaan.survey package: FALSE 
## 
## Mean structure modelled: TRUE 
## 
## lavaan Model
## g =~ PPVT_z + WJ9_z + WJ10_z + DS_z
## 
## Global Fit Statistics for Joint Estimation
## 
##    stat value
## 1 rmsea 0.034
## 2   cfi 0.990
## 3   tli 0.994
## 4   gfi 0.986
## 5  srmr 0.030
## 
## 
## Parameter Estimate Summary
## 
##                    par parindex      M    SD   MAD    Min    Max lin_int
## 1            g=~PPVT_z        1  1.000 0.000 0.000  1.000  1.000   1.000
## 2             g=~WJ9_z        2  1.319 0.000 0.000  1.319  1.319   1.319
## 3            g=~WJ10_z        3  1.259 0.000 0.000  1.259  1.259   1.259
## 4              g=~DS_z        4  0.877 0.000 0.000  0.877  0.877   0.877
## 5       PPVT_z~~PPVT_z        5  0.580 0.000 0.000  0.580  0.580   0.580
## 6         WJ9_z~~WJ9_z        6  0.324 0.000 0.000  0.324  0.324   0.324
## 7       WJ10_z~~WJ10_z        7  0.403 0.000 0.000  0.403  0.403   0.403
## 8           DS_z~~DS_z        8  0.715 0.000 0.000  0.715  0.715   0.715
## 9                 g~~g        9  0.393 0.005 0.004  0.385  0.397   0.395
## 10            PPVT_z~1       10 -0.001 0.000 0.000 -0.001 -0.001  -0.001
## 11             WJ9_z~1       11  0.002 0.000 0.000  0.002  0.002   0.002
## 12            WJ10_z~1       12 -0.002 0.000 0.000 -0.002 -0.002  -0.002
## 13              DS_z~1       13 -0.001 0.000 0.000 -0.001 -0.001  -0.001
## 14                 g~1       14  0.000 0.000 0.000  0.000  0.000   0.000
## 15      std__g=~PPVT_z       15  0.635 0.002 0.002  0.631  0.638   0.637
## 16       std__g=~WJ9_z       16  0.824 0.002 0.001  0.821  0.825   0.825
## 17      std__g=~WJ10_z       17  0.779 0.002 0.002  0.776  0.781   0.780
## 18        std__g=~DS_z       18  0.545 0.002 0.002  0.541  0.547   0.546
## 19 std__PPVT_z~~PPVT_z       19  0.596 0.003 0.003  0.593  0.601   0.595
## 20   std__WJ9_z~~WJ9_z       20  0.322 0.003 0.002  0.319  0.326   0.320
## 21 std__WJ10_z~~WJ10_z       21  0.393 0.003 0.003  0.390  0.398   0.392
## 22     std__DS_z~~DS_z       22  0.703 0.003 0.002  0.701  0.707   0.702
## 23           std__g~~g       23  1.000 0.000 0.000  1.000  1.000   1.000
## 24       std__PPVT_z~1       24 -0.001 0.000 0.000 -0.001 -0.001  -0.001
## 25        std__WJ9_z~1       25  0.002 0.000 0.000  0.002  0.002   0.002
## 26       std__WJ10_z~1       26 -0.002 0.000 0.000 -0.002 -0.002  -0.002
## 27         std__DS_z~1       27 -0.001 0.000 0.000 -0.001 -0.001  -0.001
## 28            std__g~1       28  0.000 0.000 0.000  0.000  0.000   0.000
##    lin_slo SD_nonlin
## 1    0.000     0.000
## 2    0.000     0.000
## 3    0.000     0.000
## 4    0.000     0.000
## 5    0.000     0.000
## 6    0.000     0.000
## 7    0.000     0.000
## 8    0.000     0.000
## 9   -0.009     0.004
## 10   0.000     0.000
## 11   0.000     0.000
## 12   0.000     0.000
## 13   0.000     0.000
## 14   0.000     0.000
## 15  -0.004     0.002
## 16  -0.003     0.002
## 17  -0.003     0.002
## 18  -0.004     0.002
## 19   0.005     0.003
## 20   0.005     0.003
## 21   0.005     0.003
## 22   0.005     0.002
## 23   0.000     0.000
## 24   0.000     0.000
## 25   0.000     0.000
## 26   0.000     0.000
## 27   0.000     0.000
## 28   0.000     0.000
## 
## Distribution of Moderator: Density and Effective Sample Size
## 
## M=0.062 | SD=0.851
## 
##   moderator   wgt    Neff
## 1    -0.246 0.100 419.601
## 2     0.070 0.167 679.165
## 3     0.264 0.233 789.229
## 4     0.453 0.259 800.087
## 5     0.619 0.241 713.065
## 
##    variable       M      SD     min     max
## 1 moderator   0.062   0.851  -4.930   1.448
## 2       wgt   0.200   0.066   0.100   0.259
## 3      Neff 680.229 154.315 419.601 800.087

b_strict <- lsem.bootstrap(lsem_strict, R = 50, n.core = 14)

## Compute LSEM for bootstrap samples:

## Warning in sqrt(w): NaNs produced

# ── Compare fit across invariance levels ─────────────────────────────────

invar_test <- rbind(b_config$fitstats_joint,
                    b_weak$fitstats_joint,
                    b_strong$fitstats_joint,
                    b_strict$fitstats_joint) %>%
  mutate(ci_l = value - 1.96 * se,
         ci_u = value + 1.96 * se)

invar_test$level <- c(rep("configural", nrow(b_config$fitstats_joint)),
                      rep("metric", nrow(b_weak$fitstats_joint)),
                      rep("scalar", nrow(b_strong$fitstats_joint)),
                      rep("strict", nrow(b_strict$fitstats_joint)))
invar_test$level <- factor(invar_test$level, levels = unique(invar_test$level))

print(invar_test)

##         stat      value   value_bc          se        ci_l       ci_u
## rmsea  rmsea 0.08151605 0.07609162 0.019801303 0.042705495 0.12032660
## cfi      cfi 0.98896606 0.99096480 0.006026830 0.977153475 1.00077865
## tli      tli 0.96689819 0.97289441 0.018080490 0.931460426 1.00233595
## gfi      gfi 0.99213423 0.99342406 0.003362655 0.985543427 0.99872503
## srmr    srmr 0.01980587 0.01879424 0.004132555 0.011706066 0.02790568
## rmsea1 rmsea 0.05143884 0.04505043 0.015915960 0.020243560 0.08263412
## cfi1     cfi 0.99033394 0.99366876 0.006413647 0.977763196 1.00290469
## tli1     tli 0.98681901 0.99134373 0.008794299 0.969582188 1.00405584
## gfi1     gfi 0.99125614 0.99319466 0.003482912 0.984429631 0.99808265
## srmr1   srmr 0.02412126 0.01768349 0.005301589 0.013730150 0.03451238
## rmsea2 rmsea 0.03020629 0.02412655 0.014607970 0.001574674 0.05883792
## cfi2     cfi 0.99424265 0.99818689 0.006006430 0.982470044 1.00601525
## tli2     tli 0.99545472 0.99849641 0.004868532 0.985912400 1.00499704
## gfi2     gfi 0.99123627 0.99342120 0.003325152 0.984718973 0.99775357
## srmr2   srmr 0.02419120 0.01956473 0.005007583 0.014376338 0.03400606
## rmsea3 rmsea 0.03408776 0.01994021 0.012213909 0.010148502 0.05802703
## cfi3     cfi 0.98958078 1.00134079 0.010595896 0.968812828 1.01034874
## tli3     tli 0.99421155 1.00074489 0.005886609 0.982673793 1.00574930
## gfi3     gfi 0.98632823 0.99290633 0.005517029 0.975514857 0.99714161
## srmr3   srmr 0.03025903 0.01798922 0.006240058 0.018028517 0.04248954
##             level
## rmsea  configural
## cfi    configural
## tli    configural
## gfi    configural
## srmr   configural
## rmsea1     metric
## cfi1       metric
## tli1       metric
## gfi1       metric
## srmr1      metric
## rmsea2     scalar
## cfi2       scalar
## tli2       scalar
## gfi2       scalar
## srmr2      scalar
## rmsea3     strict
## cfi3       strict
## tli3       strict
## gfi3       strict
## srmr3      strict

# ── Plots ────────────────────────────────────────────────────────────────

p_cfi <- invar_test %>%
  filter(stat == "cfi") %>%
  ggplot(aes(x = level, y = value)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u), width = .2) +
  scale_y_continuous(limits = c(.9, 1)) +
  ylab("CFI") + xlab("Invariance Level") +
  theme_minimal()

p_rmsea <- invar_test %>%
  filter(stat == "rmsea") %>%
  ggplot(aes(x = level, y = value)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u), width = .2) +
  scale_y_continuous(limits = c(0, .15)) +
  ylab("RMSEA") + xlab("Invariance Level") +
  theme_minimal()

p_srmr <- invar_test %>%
  filter(stat == "srmr") %>%
  ggplot(aes(x = level, y = value)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u), width = .2) +
  scale_y_continuous(limits = c(0, .15)) +
  ylab("SRMR") + xlab("Invariance Level") +
  theme_minimal()

print(p_cfi)

print(p_rmsea)

print(p_srmr)

#LevelRMSEACFITLISRMRConfigural.082 (.031–.131).989 (.973–1.005).967 (.919–1.015).020 (.010–.030)
                        #Metric.051 (.021–.082).990 (.976–1.005).987 (.967–1.006).024 (.013–.035)
                        #Scalar.030 (.001–.059).994 (.982–1.006).995 (.986–1.005).024 (.014–.034)
                        #Strict.034 (.010–.058).990 (.973–1.006).994 (.985–1.004).030 (.020–.041)



# ==============================================================
# 11b. Multi-Group CFA: Measurement Invariance (PC1A tertiles)
# ==============================================================
# Direct test of configural -> metric -> scalar -> strict.
# Complements LSEM: LSEM tests continuous parameter moderation
# but doesn't explicitly test intercepts. This confirms scalar
# invariance directly.

cfa_model <- 'g =~ PPVT_z + WJ9_z + WJ10_z + DS_z'

run_mi_cfa <- function(data, pc_var, label) {

  dd <- data[complete.cases(data[, c("PPVT_z", "WJ9_z", "WJ10_z", "DS_z", pc_var)]), ]
  dd$tert <- cut(dd[[pc_var]],
                 breaks = quantile(dd[[pc_var]], probs = c(0, 1/3, 2/3, 1)),
                 include.lowest = TRUE, labels = c("Low", "Mid", "High"))

  cat("\n", paste(rep("=", 80), collapse = ""), "\n")
  cat(label, "\n")
  cat("N =", nrow(dd), " | Tertile Ns:", table(dd$tert), "\n")
  cat(paste(rep("=", 80), collapse = ""), "\n")

  fit_config <- cfa(cfa_model, data = dd, group = "tert", estimator = "ML",
                    meanstructure = TRUE)
  fit_metric <- cfa(cfa_model, data = dd, group = "tert", estimator = "ML",
                    meanstructure = TRUE, group.equal = "loadings")
  fit_scalar <- cfa(cfa_model, data = dd, group = "tert", estimator = "ML",
                    meanstructure = TRUE, group.equal = c("loadings", "intercepts"))
  fit_strict <- cfa(cfa_model, data = dd, group = "tert", estimator = "ML",
                    meanstructure = TRUE, group.equal = c("loadings", "intercepts", "residuals"))  
  extract_fit <- function(fit, lbl) {
    fm <- fitmeasures(fit, c("chisq", "df", "pvalue", "cfi", "tli", "rmsea", "srmr"))
    data.frame(Model = lbl,
               chisq = round(fm["chisq"], 3),
               df    = fm["df"],
               p     = round(fm["pvalue"], 4),
               CFI   = round(fm["cfi"], 4),
               TLI   = round(fm["tli"], 4),
               RMSEA = round(fm["rmsea"], 4),
               SRMR  = round(fm["srmr"], 4),
               row.names = NULL)
  }

  fit_table <- rbind(
    extract_fit(fit_config, "1. Configural"),
    extract_fit(fit_metric, "2. Metric"),
    extract_fit(fit_scalar, "3. Scalar"),
    extract_fit(fit_strict, "4. Strict")
  )
  print(fit_table, row.names = FALSE)

  cfis <- sapply(list(fit_config, fit_metric, fit_scalar, fit_strict), fitmeasures, "cfi")
  steps <- c("Config->Metric", "Metric->Scalar", "Scalar->Strict")
  cat("\n")
  for (i in 1:3) {
    d_cfi <- cfis[i + 1] - cfis[i]
    cat(sprintf("  %-20s  ΔCFI = %+.4f  %s\n", steps[i], d_cfi,
                ifelse(abs(d_cfi) < 0.01, "[PASS]", "[FAIL]")))
  }
  cat("\n")
}

run_mi_cfa(d_apa, "K5PC1A", "APA x African Ancestry PC (PC1A) tertiles")

## 
##  ================================================================================ 
## APA x African Ancestry PC (PC1A) tertiles 
## N = 1507  | Tertile Ns: 503 502 502 
## ================================================================================ 
##          Model  chisq df      p    CFI    TLI  RMSEA   SRMR
##  1. Configural 27.064  6 0.0001 0.9882 0.9647 0.0836 0.0202
##      2. Metric 30.133 12 0.0027 0.9899 0.9848 0.0548 0.0246
##      3. Scalar 44.392 18 0.0005 0.9853 0.9853 0.0540 0.0316
##      4. Strict 64.077 26 0.0000 0.9788 0.9853 0.0540 0.0423
## 
##   Config->Metric        ΔCFI = +0.0016  [PASS]
##   Metric->Scalar        ΔCFI = -0.0046  [PASS]
##   Scalar->Strict        ΔCFI = -0.0065  [PASS]

run_mi_cfa(d_haa, "K5PC1A", "HAA x African Ancestry PC (PC1A) tertiles")

## 
##  ================================================================================ 
## HAA x African Ancestry PC (PC1A) tertiles 
## N = 1261  | Tertile Ns: 421 420 420 
## ================================================================================ 
##          Model  chisq df      p    CFI    TLI  RMSEA   SRMR
##  1. Configural 24.788  6 0.0004 0.9876 0.9629 0.0863 0.0209
##      2. Metric 27.855 12 0.0058 0.9896 0.9843 0.0561 0.0257
##      3. Scalar 35.979 18 0.0071 0.9882 0.9882 0.0487 0.0302
##      4. Strict 67.081 26 0.0000 0.9729 0.9813 0.0613 0.0438
## 
##   Config->Metric        ΔCFI = +0.0019  [PASS]
##   Metric->Scalar        ΔCFI = -0.0014  [PASS]
##   Scalar->Strict        ΔCFI = -0.0152  [FAIL]

# ==============================================================
# 11c. LSEM Bandwidth Sensitivity
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("ROBUSTNESS: LSEM bandwidth sensitivity (configural only)\n")

## ROBUSTNESS: LSEM bandwidth sensitivity (configural only)

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

for (h_val in c(1.0, 2.0)) {
  lsem_h <- lsem.estimate(
    data = as.data.frame(d_apa_cog),
    moderator = "PC1A_z",
    moderator.grid = grid_points,
    lavmodel = lsem_model,
    h = h_val,
    estimator = "ML",
    meanstructure = TRUE,
    standardized = TRUE,
    est_joint = TRUE
  )
  cat("\nh =", h_val, "\n")
  print(summary(lsem_h)$fitstat_joint)
}

## ** Residualize Data
## ** Fit lavaan model
## |*****|
## |lavoptions         ... done.
## lavdata            ... done.
##   Number of observations per group:                   
##     Group 1                                        260
##     Group 2                                        469
##     Group 3                                        568
##     Group 4                                        592
##     Group 5                                        526
## lavsamplestats ... done.
## lavh1              ... start:
## lavh1              ... done.
## lavpartable bounds ... done.
## lavstart           ... done.
## lavmodel           ... done.
## lavoptim           ... start:
## attempt 1 -- default options
##   objective function  = 0.4646802919463014
##   quasi-Newton steps using NLMINB:
##   objective function  = 0.4646802919463014
##   objective function  = 0.4225779864181642
##   objective function  = 0.3030533682262448
##   objective function  = 0.1497680920336554
##   objective function  = 0.1279494179505798
##   objective function  = 0.0777625558671662
##   objective function  = 0.0798683093680342
##   objective function  = 0.0599892344685638
##   objective function  = 0.0444734341401703
##   objective function  = 0.0340176569516158
##   objective function  = 0.0270121526829740
##   objective function  = 0.0213671496784743
##   objective function  = 0.0174365298548621
##   objective function  = 0.0151715145285565
##   objective function  = 0.0138179123770358
##   objective function  = 0.0128967974362694
##   objective function  = 0.0121196769333752
##   objective function  = 0.0112136582771468
##   objective function  = 0.0107934502881314
##   objective function  = 0.0104370158380902
##   objective function  = 0.0101081358456193
##   objective function  = 0.0099255480230321
##   objective function  = 0.0097135264327441
##   objective function  = 0.0095350280138221
##   objective function  = 0.0094812292277207
##   objective function  = 0.0093933602798418
##   objective function  = 0.0093071341004641
##   objective function  = 0.0092492658402065
##   objective function  = 0.0091913130662916
##   objective function  = 0.0091521573609960
##   objective function  = 0.0091236696274345
##   objective function  = 0.0091040328546773
##   objective function  = 0.0090891759272326
##   objective function  = 0.0090794229625522
##   objective function  = 0.0090737565202157
##   objective function  = 0.0090704309157353
##   objective function  = 0.0090678149770005
##   objective function  = 0.0090655767130282
##   objective function  = 0.0090631642283111
##   objective function  = 0.0090613466648644
##   objective function  = 0.0090595663705665
##   objective function  = 0.0090578618798858
##   objective function  = 0.0090559849108183
##   objective function  = 0.0090545824008221
##   objective function  = 0.0090535150648220
##   objective function  = 0.0090529481224803
##   objective function  = 0.0090525439401810
##   objective function  = 0.0090522443068635
##   objective function  = 0.0090519155764478
##   objective function  = 0.0090517124764770
##   objective function  = 0.0090515210609698
##   objective function  = 0.0090513909308049
##   objective function  = 0.0090512890855913
##   objective function  = 0.0090512166962937
##   objective function  = 0.0090511618843839
##   objective function  = 0.0090511161610214
##   objective function  = 0.0090510805827558
##   objective function  = 0.0090510530777419
##   objective function  = 0.0090510375414917
##   objective function  = 0.0090510276174877
##   objective function  = 0.0090510216355476
##   objective function  = 0.0090510172627789
##   objective function  = 0.0090510140514977
##   objective function  = 0.0090510116985100
##   objective function  = 0.0090510101536696
##   objective function  = 0.0090510093647467
##   objective function  = 0.0090510090080444
##   objective function  = 0.0090510088512363
##   objective function  = 0.0090510087716353
##   objective function  = 0.0090510087316453
##   objective function  = 0.0090510087102711
##   objective function  = 0.0090510086953283
##   objective function  = 0.0090510086825069
##   objective function  = 0.0090510086730037
##   objective function  = 0.0090510086676137
##   objective function  = 0.0090510086646993
##   objective function  = 0.0090510086625184
##   objective function  = 0.0090510086606419
##   objective function  = 0.0090510086593741
##   objective function  = 0.0090510086584562
##   objective function  = 0.0090510086584562
##   convergence status (0=ok):  0 
##   nlminb message says:  relative convergence (4) 
##   number of iterations:  78 
##   number of function evaluations [objective, gradient]:  80 79 
## lavoptim    ... done.
## lavimplied  ... done.
## lavloglik   ... done.
## computing TEST for test(s) = standard ... done.
## lavbaseline ... done.
## post check  ... done.
## 
## Use lsem.bootstrap() for computing standard errors!
## 
## -|
## ** Parameter summary 
## 
## 
## h = 1 
## -----------------------------------------------------------------
## Local Structural Equation Model 
## 
## sirt 4.2-133 (2025-09-27 12:57:51) 
## lavaan 0.6-19 (2024-09-26 15:50:14 UTC) 
## 
## R version 4.5.1 (2025-06-13 ucrt) x86_64, mingw32 | nodename=HUANG | login=mh198 
## 
## Function 'sirt::lsem.estimate', type='LSEM' 
## 
## 
## Call:
## lsem.estimate(data = as.data.frame(d_apa_cog), moderator = "PC1A_z", 
##     moderator.grid = grid_points, lavmodel = lsem_model, h = h_val, 
##     standardized = TRUE, est_joint = TRUE, estimator = "ML", 
##     meanstructure = TRUE)
## 
## Date of Analysis: 2026-06-07 00:39:47.125757 
## Time difference of 4.79413 secs
## Computation Time: 4.79413 
## 
## Number of observations in datasets = 1538 
## Used observations in analysis = 1524 
## Used sampling weights: FALSE 
## Bandwidth factor = 1 
## Bandwidth = 0.196 
## Number of focal points for moderator = 5 
## 
## Used joint estimation: TRUE 
## Used sufficient statistics: TRUE 
## Used local linear smoothing: TRUE 
## Used pseudo weights: FALSE 
## Used lavaan package: TRUE 
## Used lavaan.survey package: FALSE 
## 
## Mean structure modelled: TRUE 
## 
## lavaan Model
## g =~ PPVT_z + WJ9_z + WJ10_z + DS_z
## 
## Global Fit Statistics for Joint Estimation
## 
##    stat value
## 1 rmsea 0.084
## 2   cfi 0.988
## 3   tli 0.965
## 4   gfi 0.991
## 5  srmr 0.021
## 
## 
## Parameter Estimate Summary
## 
##                    par parindex      M    SD   MAD    Min   Max lin_int lin_slo
## 1            g=~PPVT_z        1  1.000 0.000 0.000  1.000 1.000   1.000   0.000
## 2             g=~WJ9_z        2  1.314 0.099 0.089  1.213 1.481   1.417  -0.325
## 3            g=~WJ10_z        3  1.255 0.058 0.046  1.212 1.403   1.319  -0.204
## 4              g=~DS_z        4  0.877 0.037 0.027  0.845 0.974   0.909  -0.102
## 5       PPVT_z~~PPVT_z        5  0.563 0.096 0.082  0.468 0.733   0.674  -0.354
## 6         WJ9_z~~WJ9_z        6  0.316 0.014 0.011  0.294 0.340   0.330  -0.047
## 7       WJ10_z~~WJ10_z        7  0.409 0.018 0.016  0.371 0.422   0.406   0.009
## 8           DS_z~~DS_z        8  0.723 0.034 0.030  0.664 0.760   0.686   0.118
## 9                 g~~g        9  0.398 0.038 0.032  0.327 0.431   0.355   0.139
## 10            PPVT_z~1       10  0.001 0.006 0.004 -0.008 0.013   0.002  -0.001
## 11             WJ9_z~1       11  0.002 0.005 0.004 -0.002 0.013   0.003  -0.004
## 12            WJ10_z~1       12  0.000 0.004 0.003 -0.005 0.005   0.001  -0.005
## 13              DS_z~1       13 -0.001 0.002 0.002 -0.003 0.003   0.000  -0.002
## 14                 g~1       14  0.000 0.000 0.000  0.000 0.000   0.000   0.000
## 15      std__g=~PPVT_z       15  0.645 0.050 0.043  0.556 0.693   0.587   0.183
## 16       std__g=~WJ9_z       16  0.827 0.008 0.007  0.809 0.838   0.826   0.003
## 17      std__g=~WJ10_z       17  0.777 0.010 0.009  0.764 0.797   0.776   0.004
## 18        std__g=~DS_z       18  0.545 0.009 0.008  0.534 0.564   0.546  -0.005
## 19 std__PPVT_z~~PPVT_z       19  0.582 0.063 0.054  0.520 0.691   0.654  -0.230
## 20   std__WJ9_z~~WJ9_z       20  0.317 0.014 0.011  0.298 0.345   0.318  -0.005
## 21 std__WJ10_z~~WJ10_z       21  0.396 0.016 0.014  0.366 0.416   0.398  -0.006
## 22     std__DS_z~~DS_z       22  0.703 0.010 0.008  0.681 0.715   0.702   0.006
## 23           std__g~~g       23  1.000 0.000 0.000  1.000 1.000   1.000   0.000
## 24       std__PPVT_z~1       24  0.001 0.005 0.004 -0.007 0.012   0.002  -0.001
## 25        std__WJ9_z~1       25  0.002 0.005 0.004 -0.002 0.012   0.003  -0.003
## 26       std__WJ10_z~1       26  0.000 0.004 0.003 -0.005 0.005   0.001  -0.005
## 27         std__DS_z~1       27 -0.001 0.002 0.002 -0.003 0.003   0.000  -0.002
## 28            std__g~1       28  0.000 0.000 0.000  0.000 0.000   0.000   0.000
##    SD_nonlin
## 1      0.000
## 2      0.049
## 3      0.023
## 4      0.025
## 5      0.024
## 6      0.006
## 7      0.018
## 8      0.014
## 9      0.011
## 10     0.006
## 11     0.005
## 12     0.004
## 13     0.002
## 14     0.000
## 15     0.013
## 16     0.008
## 17     0.010
## 18     0.009
## 19     0.016
## 20     0.014
## 21     0.016
## 22     0.010
## 23     0.000
## 24     0.005
## 25     0.005
## 26     0.004
## 27     0.002
## 28     0.000
## 
## Distribution of Moderator: Density and Effective Sample Size
## 
## M=0.062 | SD=0.851
## 
##   moderator   wgt    Neff
## 1    -0.246 0.100 262.694
## 2     0.070 0.167 473.933
## 3     0.264 0.233 572.817
## 4     0.453 0.259 597.446
## 5     0.619 0.241 531.044
## 
##    variable       M      SD     min     max
## 1 moderator   0.062   0.851  -4.930   1.448
## 2       wgt   0.200   0.066   0.100   0.259
## 3      Neff 487.587 134.150 262.694 597.446
## NULL
## ** Residualize Data
## ** Fit lavaan model
## |*****|
## |lavoptions         ... done.
## lavdata            ... done.
##   Number of observations per group:                   
##     Group 1                                        575
##     Group 2                                        840
##     Group 3                                        936
##     Group 4                                        934
##     Group 5                                        846
## lavsamplestats ... done.
## lavh1              ... start:
## lavh1              ... done.
## lavpartable bounds ... done.
## lavstart           ... done.
## lavmodel           ... done.
## lavoptim           ... start:
## attempt 1 -- default options
##   objective function  = 0.0377520215371714
##   quasi-Newton steps using NLMINB:
##   objective function  = 0.0377520215371714
##   objective function  = 0.0290327677763251
##   objective function  = 0.0157769580623049
##   objective function  = 0.0135850350477670
##   objective function  = 0.0111661433071985
##   objective function  = 0.0102969869167592
##   objective function  = 0.0096173778122135
##   objective function  = 0.0090299144523986
##   objective function  = 0.0086500877636569
##   objective function  = 0.0084386507079767
##   objective function  = 0.0081821980604341
##   objective function  = 0.0079682594800023
##   objective function  = 0.0077874145204968
##   objective function  = 0.0076473417731989
##   objective function  = 0.0075681840081152
##   objective function  = 0.0075008818479938
##   objective function  = 0.0074547260793011
##   objective function  = 0.0074204004740911
##   objective function  = 0.0073964609457680
##   objective function  = 0.0073790549148058
##   objective function  = 0.0073654892095357
##   objective function  = 0.0073542628474625
##   objective function  = 0.0073455818825337
##   objective function  = 0.0073385722018415
##   objective function  = 0.0073321982163790
##   objective function  = 0.0073262705208896
##   objective function  = 0.0073195883918944
##   objective function  = 0.0073150190070719
##   objective function  = 0.0073116937038556
##   objective function  = 0.0073098025573191
##   objective function  = 0.0073082610994214
##   objective function  = 0.0073068798413530
##   objective function  = 0.0073056859398833
##   objective function  = 0.0073046218316870
##   objective function  = 0.0073041389052434
##   objective function  = 0.0073037983440756
##   objective function  = 0.0073035995723406
##   objective function  = 0.0073034270148445
##   objective function  = 0.0073032399053524
##   objective function  = 0.0073030943362218
##   objective function  = 0.0073029564541183
##   objective function  = 0.0073028368078026
##   objective function  = 0.0073027547537983
##   objective function  = 0.0073026915439050
##   objective function  = 0.0073026415664659
##   objective function  = 0.0073026079342034
##   objective function  = 0.0073025889947602
##   objective function  = 0.0073025795905114
##   objective function  = 0.0073025739129707
##   objective function  = 0.0073025695569176
##   objective function  = 0.0073025661982127
##   objective function  = 0.0073025637752580
##   objective function  = 0.0073025619608914
##   objective function  = 0.0073025605012492
##   objective function  = 0.0073025593749970
##   objective function  = 0.0073025585971924
##   objective function  = 0.0073025580842460
##   objective function  = 0.0073025577256517
##   objective function  = 0.0073025574563816
##   objective function  = 0.0073025572532064
##   objective function  = 0.0073025571120364
##   objective function  = 0.0073025570269348
##   objective function  = 0.0073025569784277
##   objective function  = 0.0073025569451126
##   objective function  = 0.0073025569165328
##   objective function  = 0.0073025568923179
##   objective function  = 0.0073025568736596
##   objective function  = 0.0073025568648071
##   objective function  = 0.0073025568614206
##   objective function  = 0.0073025568601680
##   objective function  = 0.0073025568596323
##   objective function  = 0.0073025568596323
##   convergence status (0=ok):  0 
##   nlminb message says:  relative convergence (4) 
##   number of iterations:  70 
##   number of function evaluations [objective, gradient]:  71 71 
## lavoptim    ... done.
## lavimplied  ... done.
## lavloglik   ... done.
## computing TEST for test(s) = standard ... done.
## lavbaseline ... done.
## post check  ... done.
## 
## Use lsem.bootstrap() for computing standard errors!
## 
## -|
## ** Parameter summary 
## 
## 
## h = 2 
## -----------------------------------------------------------------
## Local Structural Equation Model 
## 
## sirt 4.2-133 (2025-09-27 12:57:51) 
## lavaan 0.6-19 (2024-09-26 15:50:14 UTC) 
## 
## R version 4.5.1 (2025-06-13 ucrt) x86_64, mingw32 | nodename=HUANG | login=mh198 
## 
## Function 'sirt::lsem.estimate', type='LSEM' 
## 
## 
## Call:
## lsem.estimate(data = as.data.frame(d_apa_cog), moderator = "PC1A_z", 
##     moderator.grid = grid_points, lavmodel = lsem_model, h = h_val, 
##     standardized = TRUE, est_joint = TRUE, estimator = "ML", 
##     meanstructure = TRUE)
## 
## Date of Analysis: 2026-06-07 00:39:51.992176 
## Time difference of 4.613003 secs
## Computation Time: 4.613003 
## 
## Number of observations in datasets = 1538 
## Used observations in analysis = 1524 
## Used sampling weights: FALSE 
## Bandwidth factor = 2 
## Bandwidth = 0.392 
## Number of focal points for moderator = 5 
## 
## Used joint estimation: TRUE 
## Used sufficient statistics: TRUE 
## Used local linear smoothing: TRUE 
## Used pseudo weights: FALSE 
## Used lavaan package: TRUE 
## Used lavaan.survey package: FALSE 
## 
## Mean structure modelled: TRUE 
## 
## lavaan Model
## g =~ PPVT_z + WJ9_z + WJ10_z + DS_z
## 
## Global Fit Statistics for Joint Estimation
## 
##    stat value
## 1 rmsea 0.078
## 2   cfi 0.990
## 3   tli 0.970
## 4   gfi 0.993
## 5  srmr 0.019
## 
## 
## Parameter Estimate Summary
## 
##                    par parindex      M    SD   MAD    Min   Max lin_int lin_slo
## 1            g=~PPVT_z        1  1.000 0.000 0.000  1.000 1.000   1.000   0.000
## 2             g=~WJ9_z        2  1.306 0.078 0.070  1.206 1.431   1.398  -0.290
## 3            g=~WJ10_z        3  1.253 0.052 0.040  1.202 1.375   1.314  -0.194
## 4              g=~DS_z        4  0.871 0.027 0.021  0.845 0.938   0.904  -0.102
## 5       PPVT_z~~PPVT_z        5  0.569 0.078 0.067  0.481 0.720   0.661  -0.293
## 6         WJ9_z~~WJ9_z        6  0.326 0.028 0.019  0.304 0.403   0.357  -0.096
## 7       WJ10_z~~WJ10_z        7  0.398 0.018 0.016  0.355 0.416   0.394   0.013
## 8           DS_z~~DS_z        8  0.715 0.017 0.014  0.676 0.731   0.696   0.060
## 9                 g~~g        9  0.399 0.034 0.029  0.331 0.438   0.359   0.129
## 10            PPVT_z~1       10 -0.002 0.002 0.002 -0.004 0.000  -0.001  -0.003
## 11             WJ9_z~1       11  0.000 0.003 0.003 -0.002 0.005   0.004  -0.011
## 12            WJ10_z~1       12 -0.002 0.004 0.004 -0.007 0.004   0.002  -0.012
## 13              DS_z~1       13 -0.002 0.001 0.001 -0.005 0.000  -0.002   0.001
## 14                 g~1       14  0.000 0.000 0.000  0.000 0.000   0.000   0.000
## 15      std__g=~PPVT_z       15  0.643 0.042 0.036  0.561 0.690   0.593   0.157
## 16       std__g=~WJ9_z       16  0.822 0.010 0.006  0.792 0.827   0.814   0.024
## 17      std__g=~WJ10_z       17  0.781 0.008 0.007  0.773 0.798   0.782  -0.001
## 18        std__g=~DS_z       18  0.545 0.003 0.003  0.541 0.549   0.544   0.003
## 19 std__PPVT_z~~PPVT_z       19  0.585 0.053 0.046  0.523 0.685   0.648  -0.198
## 20   std__WJ9_z~~WJ9_z       20  0.325 0.016 0.010  0.315 0.373   0.337  -0.039
## 21 std__WJ10_z~~WJ10_z       21  0.389 0.013 0.011  0.362 0.402   0.389   0.002
## 22     std__DS_z~~DS_z       22  0.703 0.003 0.003  0.698 0.707   0.704  -0.004
## 23           std__g~~g       23  1.000 0.000 0.000  1.000 1.000   1.000   0.000
## 24       std__PPVT_z~1       24 -0.002 0.002 0.002 -0.005 0.000  -0.001  -0.004
## 25        std__WJ9_z~1       25  0.000 0.003 0.003 -0.003 0.005   0.004  -0.011
## 26       std__WJ10_z~1       26 -0.002 0.004 0.004 -0.007 0.004   0.002  -0.012
## 27         std__DS_z~1       27 -0.002 0.001 0.001 -0.005 0.000  -0.002   0.001
## 28            std__g~1       28  0.000 0.000 0.000  0.000 0.000   0.000   0.000
##    SD_nonlin
## 1      0.000
## 2      0.018
## 3      0.007
## 4      0.005
## 5      0.008
## 6      0.011
## 7      0.018
## 8      0.007
## 9      0.002
## 10     0.002
## 11     0.001
## 12     0.002
## 13     0.001
## 14     0.000
## 15     0.004
## 16     0.008
## 17     0.008
## 18     0.003
## 19     0.006
## 20     0.013
## 21     0.013
## 22     0.003
## 23     0.000
## 24     0.002
## 25     0.001
## 26     0.002
## 27     0.001
## 28     0.000
## 
## Distribution of Moderator: Density and Effective Sample Size
## 
## M=0.062 | SD=0.851
## 
##   moderator   wgt    Neff
## 1    -0.246 0.100 580.209
## 2     0.070 0.167 848.175
## 3     0.264 0.233 945.139
## 4     0.453 0.259 942.705
## 5     0.619 0.241 853.379
## 
##    variable       M      SD     min     max
## 1 moderator   0.062   0.851  -4.930   1.448
## 2       wgt   0.200   0.066   0.100   0.259
## 3      Neff 833.921 149.294 580.209 945.139
## NULL

# run invariance models for comparison

cat("\nScalar invariance at h = 1.0:\n")

## 
## Scalar invariance at h = 1.0:

lsem_strong_h1 <- lsem.estimate(
  data = as.data.frame(d_apa_cog),
  moderator = "PC1A_z",
  moderator.grid = grid_points,
  lavmodel = lsem_model,
  h = 1.0,
  estimator = "ML",
  meanstructure = TRUE,
  standardized = TRUE,
  par_invariant = strong_constraints
)

## Use joint estimation
## ** Residualize Data
## ** Fit lavaan model
## |*****|
## |lavoptions         ... done.
## lavdata            ... done.
##   Number of observations per group:                   
##     Group 1                                        260
##     Group 2                                        469
##     Group 3                                        568
##     Group 4                                        592
##     Group 5                                        526
## lavsamplestats ... done.
## lavh1              ... start:
## lavh1              ... done.
## lavpartable bounds ... done.
## lavstart           ... done.
## lavmodel           ... done.
## lavoptim           ... start:
## attempt 1 -- default options
##   objective function  = 0.4646802919463016
##   quasi-Newton steps using NLMINB:
##   objective function  = 0.4646802919463016
##   objective function  = 0.4232976035920123
##   objective function  = 0.3807467900417316
##   objective function  = 0.3409017491427810
##   objective function  = 0.2736828981674165
##   objective function  =                Inf
##   objective function  = 0.2303475896993389
##   objective function  = 0.1645452349024202
##   objective function  = 0.1588435629690765
##   objective function  = 0.1492275036007128
##   objective function  = 0.1192368379048396
##   objective function  = 0.1889196912016715
##   objective function  = 0.1041446110785343
##   objective function  = 0.0944296225667279
##   objective function  = 0.0953728643739491
##   objective function  = 0.0846064049069207
##   objective function  = 0.0770207738115011
##   objective function  = 0.0596797522324349
##   objective function  = 0.0478058953560617
##   objective function  = 0.0396395832465671
##   objective function  = 0.0289207964272656
##   objective function  = 0.0230442958463596
##   objective function  = 0.0202570296086500
##   objective function  = 0.0176319461358133
##   objective function  = 0.0146775887716812
##   objective function  = 0.0132438560073690
##   objective function  = 0.0121594940702959
##   objective function  = 0.0111020306042772
##   objective function  = 0.0108591293427332
##   objective function  = 0.0106621612127983
##   objective function  = 0.0105922809652880
##   objective function  = 0.0105604957911247
##   objective function  = 0.0105468800176812
##   objective function  = 0.0105390378361020
##   objective function  = 0.0105350290674740
##   objective function  = 0.0105319129799973
##   objective function  = 0.0105309934565356
##   objective function  = 0.0105304841267851
##   objective function  = 0.0105303794991556
##   objective function  = 0.0105303258784127
##   objective function  = 0.0105302889909400
##   objective function  = 0.0105302658944006
##   objective function  = 0.0105302532648457
##   objective function  = 0.0105302486054307
##   objective function  = 0.0105302468587104
##   objective function  = 0.0105302460135668
##   objective function  = 0.0105302456257251
##   objective function  = 0.0105302454742300
##   objective function  = 0.0105302454077906
##   objective function  = 0.0105302453779024
##   objective function  = 0.0105302453662230
##   objective function  = 0.0105302453611104
##   objective function  = 0.0105302453585576
##   objective function  = 0.0105302453576604
##   objective function  = 0.0105302453576604
##   convergence status (0=ok):  0 
##   nlminb message says:  relative convergence (4) 
##   number of iterations:  47 
##   number of function evaluations [objective, gradient]:  54 48 
## lavoptim    ... done.
## lavimplied  ... done.
## lavloglik   ... done.
## computing TEST for test(s) = standard ... done.
## lavbaseline ... done.
## post check  ... done.
## 
## Use lsem.bootstrap() for computing standard errors!
## 
## -|
## ** Parameter summary

print(summary(lsem_strong_h1)$fitstat_joint)

## -----------------------------------------------------------------
## Local Structural Equation Model 
## 
## sirt 4.2-133 (2025-09-27 12:57:51) 
## lavaan 0.6-19 (2024-09-26 15:50:14 UTC) 
## 
## R version 4.5.1 (2025-06-13 ucrt) x86_64, mingw32 | nodename=HUANG | login=mh198 
## 
## Function 'sirt::lsem.estimate', type='LSEM' 
## 
## 
## Call:
## lsem.estimate(data = as.data.frame(d_apa_cog), moderator = "PC1A_z", 
##     moderator.grid = grid_points, lavmodel = lsem_model, h = 1, 
##     standardized = TRUE, par_invariant = strong_constraints, 
##     estimator = "ML", meanstructure = TRUE)
## 
## Date of Analysis: 2026-06-07 00:39:56.700718 
## Time difference of 4.537394 secs
## Computation Time: 4.537394 
## 
## Number of observations in datasets = 1538 
## Used observations in analysis = 1524 
## Used sampling weights: FALSE 
## Bandwidth factor = 1 
## Bandwidth = 0.196 
## Number of focal points for moderator = 5 
## 
## Used joint estimation: TRUE 
## Used sufficient statistics: TRUE 
## Used local linear smoothing: TRUE 
## Used pseudo weights: FALSE 
## Used lavaan package: TRUE 
## Used lavaan.survey package: FALSE 
## 
## Mean structure modelled: TRUE 
## 
## lavaan Model
## g =~ PPVT_z + WJ9_z + WJ10_z + DS_z
## 
## Global Fit Statistics for Joint Estimation
## 
##    stat value
## 1 rmsea 0.026
## 2   cfi 0.996
## 3   tli 0.997
## 4   gfi 0.990
## 5  srmr 0.026
## 
## 
## Parameter Estimate Summary
## 
##                    par parindex      M    SD   MAD    Min    Max lin_int
## 1            g=~PPVT_z        1  1.000 0.000 0.000  1.000  1.000   1.000
## 2             g=~WJ9_z        2  1.305 0.000 0.000  1.305  1.305   1.305
## 3            g=~WJ10_z        3  1.249 0.000 0.000  1.249  1.249   1.249
## 4              g=~DS_z        4  0.871 0.000 0.000  0.871  0.871   0.871
## 5       PPVT_z~~PPVT_z        5  0.564 0.088 0.075  0.476  0.719   0.666
## 6         WJ9_z~~WJ9_z        6  0.317 0.031 0.028  0.275  0.365   0.346
## 7       WJ10_z~~WJ10_z        7  0.409 0.014 0.012  0.392  0.432   0.401
## 8           DS_z~~DS_z        8  0.724 0.036 0.033  0.671  0.762   0.685
## 9                 g~~g        9  0.400 0.005 0.004  0.387  0.405   0.396
## 10            PPVT_z~1       10  0.002 0.000 0.000  0.002  0.002   0.002
## 11             WJ9_z~1       11  0.002 0.000 0.000  0.002  0.002   0.002
## 12            WJ10_z~1       12  0.000 0.000 0.000  0.000  0.000   0.000
## 13              DS_z~1       13 -0.001 0.000 0.000 -0.001 -0.001  -0.001
## 14                 g~1       14  0.000 0.000 0.000  0.000  0.000   0.000
## 15      std__g=~PPVT_z       15  0.646 0.030 0.026  0.592  0.674   0.611
## 16       std__g=~WJ9_z       16  0.826 0.013 0.012  0.807  0.843   0.813
## 17      std__g=~WJ10_z       17  0.777 0.004 0.003  0.771  0.781   0.779
## 18        std__g=~DS_z       18  0.544 0.008 0.008  0.535  0.556   0.552
## 19 std__PPVT_z~~PPVT_z       19  0.582 0.038 0.033  0.545  0.650   0.626
## 20   std__WJ9_z~~WJ9_z       20  0.317 0.022 0.020  0.289  0.349   0.339
## 21 std__WJ10_z~~WJ10_z       21  0.396 0.006 0.005  0.390  0.406   0.393
## 22     std__DS_z~~DS_z       22  0.704 0.009 0.009  0.691  0.714   0.695
## 23           std__g~~g       23  1.000 0.000 0.000  1.000  1.000   1.000
## 24       std__PPVT_z~1       24  0.002 0.000 0.000  0.001  0.002   0.001
## 25        std__WJ9_z~1       25  0.002 0.000 0.000  0.002  0.002   0.002
## 26       std__WJ10_z~1       26  0.000 0.000 0.000  0.000  0.000   0.000
## 27         std__DS_z~1       27 -0.001 0.000 0.000 -0.001 -0.001  -0.001
## 28            std__g~1       28  0.000 0.000 0.000  0.000  0.000   0.000
##    lin_slo SD_nonlin
## 1    0.000     0.000
## 2    0.000     0.000
## 3    0.000     0.000
## 4    0.000     0.000
## 5   -0.325     0.023
## 6   -0.092     0.020
## 7    0.027     0.013
## 8    0.122     0.015
## 9    0.011     0.004
## 10   0.000     0.000
## 11   0.000     0.000
## 12   0.000     0.000
## 13   0.000     0.000
## 14   0.000     0.000
## 15   0.110     0.007
## 16   0.042     0.007
## 17  -0.006     0.004
## 18  -0.028     0.004
## 19  -0.140     0.010
## 20  -0.070     0.012
## 21   0.009     0.006
## 22   0.030     0.004
## 23   0.000     0.000
## 24   0.000     0.000
## 25   0.000     0.000
## 26   0.000     0.000
## 27   0.000     0.000
## 28   0.000     0.000
## 
## Distribution of Moderator: Density and Effective Sample Size
## 
## M=0.062 | SD=0.851
## 
##   moderator   wgt    Neff
## 1    -0.246 0.100 262.694
## 2     0.070 0.167 473.933
## 3     0.264 0.233 572.817
## 4     0.453 0.259 597.446
## 5     0.619 0.241 531.044
## 
##    variable       M      SD     min     max
## 1 moderator   0.062   0.851  -4.930   1.448
## 2       wgt   0.200   0.066   0.100   0.259
## 3      Neff 487.587 134.150 262.694 597.446
## NULL

cat("\nScalar invariance at h = 2.0:\n")

## 
## Scalar invariance at h = 2.0:

lsem_strong_h2 <- lsem.estimate(
  data = as.data.frame(d_apa_cog),
  moderator = "PC1A_z",
  moderator.grid = grid_points,
  lavmodel = lsem_model,
  h = 2.0,
  estimator = "ML",
  meanstructure = TRUE,
  standardized = TRUE,
  par_invariant = strong_constraints
)

## Use joint estimation
## ** Residualize Data
## ** Fit lavaan model
## |*****|
## |lavoptions         ... done.
## lavdata            ... done.
##   Number of observations per group:                   
##     Group 1                                        575
##     Group 2                                        840
##     Group 3                                        936
##     Group 4                                        934
##     Group 5                                        846
## lavsamplestats ... done.
## lavh1              ... start:
## lavh1              ... done.
## lavpartable bounds ... done.
## lavstart           ... done.
## lavmodel           ... done.
## lavoptim           ... start:
## attempt 1 -- default options
##   objective function  = 0.0377520215371713
##   quasi-Newton steps using NLMINB:
##   objective function  = 0.0377520215371713
##   objective function  = 0.0288499951086053
##   objective function  = 0.0168866408402162
##   objective function  = 0.0145662261323792
##   objective function  = 0.0130494283019911
##   objective function  = 0.0120714478582574
##   objective function  = 0.0111501024676180
##   objective function  = 0.0104613484182083
##   objective function  = 0.0098762110854294
##   objective function  = 0.0089458576972912
##   objective function  = 0.0086886259909628
##   objective function  = 0.0084626929895919
##   objective function  = 0.0082883301279253
##   objective function  = 0.0081799915134803
##   objective function  = 0.0081177924549689
##   objective function  = 0.0080909252144403
##   objective function  = 0.0080776480630838
##   objective function  = 0.0080715362430142
##   objective function  = 0.0080688996390857
##   objective function  = 0.0080677099780516
##   objective function  = 0.0080670851390732
##   objective function  = 0.0080667115829195
##   objective function  = 0.0080665020209305
##   objective function  = 0.0080663863678959
##   objective function  = 0.0080663297610787
##   objective function  = 0.0080663101079686
##   objective function  = 0.0080663054139723
##   objective function  = 0.0080663043063940
##   objective function  = 0.0080663039483497
##   objective function  = 0.0080663038368748
##   objective function  = 0.0080663038080177
##   objective function  = 0.0080663037994854
##   objective function  = 0.0080663037962883
##   objective function  = 0.0080663037951706
##   objective function  = 0.0080663037948385
##   objective function  = 0.0080663037948385
##   convergence status (0=ok):  0 
##   nlminb message says:  relative convergence (4) 
##   number of iterations:  33 
##   number of function evaluations [objective, gradient]:  35 34 
## lavoptim    ... done.
## lavimplied  ... done.
## lavloglik   ... done.
## computing TEST for test(s) = standard ... done.
## lavbaseline ... done.
## post check  ... done.
## 
## Use lsem.bootstrap() for computing standard errors!
## 
## -|
## ** Parameter summary

print(summary(lsem_strong_h2)$fitstat_joint)

## -----------------------------------------------------------------
## Local Structural Equation Model 
## 
## sirt 4.2-133 (2025-09-27 12:57:51) 
## lavaan 0.6-19 (2024-09-26 15:50:14 UTC) 
## 
## R version 4.5.1 (2025-06-13 ucrt) x86_64, mingw32 | nodename=HUANG | login=mh198 
## 
## Function 'sirt::lsem.estimate', type='LSEM' 
## 
## 
## Call:
## lsem.estimate(data = as.data.frame(d_apa_cog), moderator = "PC1A_z", 
##     moderator.grid = grid_points, lavmodel = lsem_model, h = 2, 
##     standardized = TRUE, par_invariant = strong_constraints, 
##     estimator = "ML", meanstructure = TRUE)
## 
## Date of Analysis: 2026-06-07 00:40:02.241883 
## Time difference of 4.450271 secs
## Computation Time: 4.450271 
## 
## Number of observations in datasets = 1538 
## Used observations in analysis = 1524 
## Used sampling weights: FALSE 
## Bandwidth factor = 2 
## Bandwidth = 0.392 
## Number of focal points for moderator = 5 
## 
## Used joint estimation: TRUE 
## Used sufficient statistics: TRUE 
## Used local linear smoothing: TRUE 
## Used pseudo weights: FALSE 
## Used lavaan package: TRUE 
## Used lavaan.survey package: FALSE 
## 
## Mean structure modelled: TRUE 
## 
## lavaan Model
## g =~ PPVT_z + WJ9_z + WJ10_z + DS_z
## 
## Global Fit Statistics for Joint Estimation
## 
##    stat value
## 1 rmsea 0.030
## 2   cfi 0.994
## 3   tli 0.995
## 4   gfi 0.992
## 5  srmr 0.023
## 
## 
## Parameter Estimate Summary
## 
##                    par parindex      M    SD   MAD    Min    Max lin_int
## 1            g=~PPVT_z        1  1.000 0.000 0.000  1.000  1.000   1.000
## 2             g=~WJ9_z        2  1.306 0.000 0.000  1.306  1.306   1.306
## 3            g=~WJ10_z        3  1.254 0.000 0.000  1.254  1.254   1.254
## 4              g=~DS_z        4  0.871 0.000 0.000  0.871  0.871   0.871
## 5       PPVT_z~~PPVT_z        5  0.570 0.070 0.060  0.492  0.708   0.654
## 6         WJ9_z~~WJ9_z        6  0.326 0.035 0.028  0.288  0.407   0.368
## 7       WJ10_z~~WJ10_z        7  0.398 0.014 0.010  0.362  0.409   0.391
## 8           DS_z~~DS_z        8  0.715 0.018 0.015  0.676  0.731   0.695
## 9                 g~~g        9  0.397 0.004 0.002  0.386  0.400   0.394
## 10            PPVT_z~1       10 -0.002 0.000 0.000 -0.002 -0.002  -0.002
## 11             WJ9_z~1       11  0.001 0.000 0.000  0.001  0.001   0.001
## 12            WJ10_z~1       12 -0.002 0.000 0.000 -0.002 -0.002  -0.002
## 13              DS_z~1       13 -0.002 0.000 0.000 -0.002 -0.002  -0.002
## 14                 g~1       14  0.000 0.000 0.000  0.000  0.000   0.000
## 15      std__g=~PPVT_z       15  0.642 0.024 0.020  0.594  0.668   0.614
## 16       std__g=~WJ9_z       16  0.822 0.015 0.012  0.786  0.838   0.804
## 17      std__g=~WJ10_z       17  0.782 0.004 0.003  0.778  0.792   0.783
## 18        std__g=~DS_z       18  0.544 0.004 0.003  0.541  0.550   0.548
## 19 std__PPVT_z~~PPVT_z       19  0.587 0.030 0.026  0.554  0.647   0.623
## 20   std__WJ9_z~~WJ9_z       20  0.324 0.025 0.019  0.298  0.382   0.353
## 21 std__WJ10_z~~WJ10_z       21  0.389 0.006 0.005  0.373  0.394   0.386
## 22     std__DS_z~~DS_z       22  0.704 0.004 0.003  0.698  0.707   0.699
## 23           std__g~~g       23  1.000 0.000 0.000  1.000  1.000   1.000
## 24       std__PPVT_z~1       24 -0.002 0.000 0.000 -0.002 -0.002  -0.002
## 25        std__WJ9_z~1       25  0.001 0.000 0.000  0.001  0.001   0.001
## 26       std__WJ10_z~1       26 -0.002 0.000 0.000 -0.002 -0.002  -0.002
## 27         std__DS_z~1       27 -0.002 0.000 0.000 -0.002 -0.002  -0.002
## 28            std__g~1       28  0.000 0.000 0.000  0.000  0.000   0.000
##    lin_slo SD_nonlin
## 1    0.000     0.000
## 2    0.000     0.000
## 3    0.000     0.000
## 4    0.000     0.000
## 5   -0.266     0.008
## 6   -0.133     0.003
## 7    0.024     0.012
## 8    0.064     0.006
## 9    0.009     0.003
## 10   0.000     0.000
## 11   0.000     0.000
## 12   0.000     0.000
## 13   0.000     0.000
## 14   0.000     0.000
## 15   0.090     0.002
## 16   0.057     0.002
## 17  -0.006     0.004
## 18  -0.013     0.001
## 19  -0.114     0.003
## 20  -0.093     0.003
## 21   0.009     0.006
## 22   0.014     0.001
## 23   0.000     0.000
## 24   0.000     0.000
## 25   0.000     0.000
## 26   0.000     0.000
## 27   0.000     0.000
## 28   0.000     0.000
## 
## Distribution of Moderator: Density and Effective Sample Size
## 
## M=0.062 | SD=0.851
## 
##   moderator   wgt    Neff
## 1    -0.246 0.100 580.209
## 2     0.070 0.167 848.175
## 3     0.264 0.233 945.139
## 4     0.453 0.259 942.705
## 5     0.619 0.241 853.379
## 
##    variable       M      SD     min     max
## 1 moderator   0.062   0.851  -4.930   1.448
## 2       wgt   0.200   0.066   0.100   0.259
## 3      Neff 833.921 149.294 580.209 945.139
## NULL

# ==============================================================
# 12. Helper: WLS with Within-Model Standardization
# ==============================================================

run_model <- function(formula, data, label) {
  all_vars <- all.vars(formula)

  var_map <- c(
    "g"              = "g_raw",
    "PC1A_z"         = "K5PC1A",
    "age_c"          = "age",
    "ave_WAIS_res_z" = "ave_WAIS_raw",
    "M_WAIS_z"       = "M_WAIS_res",
    "F_WAIS_z"       = "F_WAIS_res",
    "SES"            = "SES_raw"
  )

  raw_vars <- all_vars
  for (i in seq_along(raw_vars)) {
    if (raw_vars[i] %in% names(var_map)) {
      raw_vars[i] <- var_map[raw_vars[i]]
    }
  }

  d_complete <- data[complete.cases(data[, raw_vars]) & !is.na(data$wt), ]

  if ("PC1A_z" %in% all_vars)         d_complete$PC1A_z         <- as.vector(scale(d_complete$K5PC1A))
  if ("g" %in% all_vars)              d_complete$g              <- as.vector(scale(d_complete$g_raw))
  if ("age_c" %in% all_vars)          d_complete$age_c          <- as.vector(scale(d_complete$age))
  if ("ave_WAIS_res_z" %in% all_vars) d_complete$ave_WAIS_res_z <- as.vector(scale(d_complete$ave_WAIS_raw))
  if ("M_WAIS_z" %in% all_vars)       d_complete$M_WAIS_z       <- as.vector(scale(d_complete$M_WAIS_res))
  if ("F_WAIS_z" %in% all_vars)       d_complete$F_WAIS_z       <- as.vector(scale(d_complete$F_WAIS_res))
  if ("SES" %in% all_vars)            d_complete$SES            <- as.vector(scale(d_complete$SES_raw))

  for (v in c("PPVT", "WJ9", "WJ10")) {
    if (v %in% all_vars && v %in% names(d_complete)) {
      d_complete[[v]] <- as.vector(scale(d_complete[[v]]))
    }
  }

  model <- lm(formula, data = d_complete, weights = wt)
  robust <- coeftest(model, vcov = vcovHC(model, type = "HC1"))

  cat("\n", paste(rep("=", 80), collapse = ""), "\n")
  cat(label, "\n")
  cat("R2 =", round(summary(model)$r.squared, 4),
      " Adj-R2 =", round(summary(model)$adj.r.squared, 4), "\n")
  cat(paste(rep("=", 80), collapse = ""), "\n")
  cat(sprintf("%20s  %9s  %8s  %8s  %10s\n", "Variable", "B", "SE", "t", "p"))
  cat(paste(rep("-", 60), collapse = ""), "\n")

  for (i in 1:nrow(robust)) {
    var_name <- rownames(robust)[i]
    b  <- robust[i, 1]; se <- robust[i, 2]; t  <- robust[i, 3]; p  <- robust[i, 4]
    p_str <- ifelse(p >= 0.0001, sprintf("%.4f", p), sprintf("%.2e", p))
    cat(sprintf("%20s  %+9.4f  %8.4f  %+8.3f  %10s\n", var_name, b, se, t, p_str))
  }
  cat("N =", nrow(d_complete), "\n")
  invisible(model)
}

# ==============================================================
# 13. APA: Child g Models
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("AFRO-PAN-AMERICAN: Child g as DV\n")

## AFRO-PAN-AMERICAN: Child g as DV

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

run_model(g ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c, d_apa,
          "APA M1: PC1A + SIRE + Generation + Demographics")

## 
##  ================================================================================ 
## APA M1: PC1A + SIRE + Generation + Demographics 
## R2 = 0.0192  Adj-R2 = 0.014 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0047    0.0553    +0.085      0.9321
##               PC1A_z    -0.1285    0.0475    -2.704      0.0069
##               sireBW    -0.3033    0.2118    -1.432      0.1524
##               sireBH    +0.1106    0.1858    +0.595      0.5518
##               sireBO    +0.0045    0.1437    +0.031      0.9751
##          gen_2ndTRUE    +0.4228    0.2187    +1.933      0.0534
##          gen_3rdTRUE    +0.2489    0.2398    +1.038      0.2994
##                  sex    -0.0099    0.0678    -0.146      0.8838
##                age_c    -0.0179    0.0307    -0.583      0.5598
## N = 1538

run_model(g ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c + ave_WAIS_res_z, d_apa,
          "APA M2: + Parental WAIS")

## 
##  ================================================================================ 
## APA M2: + Parental WAIS 
## R2 = 0.0394  Adj-R2 = 0.0334 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0101    0.0575    +0.175      0.8609
##               PC1A_z    -0.1026    0.0481    -2.132      0.0332
##               sireBW    -0.3063    0.2211    -1.385      0.1661
##               sireBH    +0.1611    0.2112    +0.763      0.4458
##               sireBO    +0.0937    0.1385    +0.676      0.4990
##          gen_2ndTRUE    +0.4597    0.3040    +1.512      0.1307
##          gen_3rdTRUE    +0.3203    0.2457    +1.304      0.1926
##                  sex    -0.0164    0.0712    -0.230      0.8181
##                age_c    -0.0205    0.0314    -0.654      0.5132
##       ave_WAIS_res_z    +0.1515    0.0345    +4.393    1.20e-05
## N = 1450

run_model(g ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c + ave_WAIS_res_z + single + SES, d_apa,
          "APA M3: + SES + Single Parent")

## 
##  ================================================================================ 
## APA M3: + SES + Single Parent 
## R2 = 0.1358  Adj-R2 = 0.1292 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0054    0.0698    -0.078      0.9379
##               PC1A_z    -0.0798    0.0471    -1.694      0.0904
##               sireBW    -0.1868    0.2134    -0.875      0.3816
##               sireBH    +0.3399    0.1940    +1.753      0.0799
##               sireBO    +0.1100    0.1284    +0.856      0.3920
##          gen_2ndTRUE    +0.2873    0.2515    +1.143      0.2534
##          gen_3rdTRUE    +0.2694    0.2695    +1.000      0.3176
##                  sex    +0.0062    0.0671    +0.093      0.9260
##                age_c    -0.0051    0.0299    -0.169      0.8655
##       ave_WAIS_res_z    +0.0698    0.0345    +2.021      0.0434
##               single    -0.0442    0.0642    -0.688      0.4916
##                  SES    +0.3178    0.0312   +10.201    1.23e-23
## N = 1450

# ==============================================================
# 13b. Unweighted Model Sensitivity
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("ROBUSTNESS: Unweighted models\n")

## ROBUSTNESS: Unweighted models

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

d_uw <- d_apa[complete.cases(d_apa[, c("g_raw", "K5PC1A", "sire",
               "gen_2nd", "gen_3rd", "sex", "age")]), ]
d_uw$PC1A_z <- as.vector(scale(d_uw$K5PC1A))
d_uw$g <- as.vector(scale(d_uw$g_raw))
d_uw$age_c <- as.vector(scale(d_uw$age))

m_uw <- lm(g ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c, data = d_uw)
robust_uw <- coeftest(m_uw, vcov = vcovHC(m_uw, type = "HC1"))
cat("APA unweighted M1: PC1A coefficient:\n")

## APA unweighted M1: PC1A coefficient:

print(robust_uw["PC1A_z", ])

##     Estimate   Std. Error      t value     Pr(>|t|) 
## -0.118535111  0.037024865 -3.201500168  0.001395249

cat("N =", nrow(d_uw), "\n")

## N = 1538

# ==============================================================
# 14. APA: Average Parental WAIS as DV (excl BW)
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("APA (excl BW): Parental WAIS as DV\n")

## APA (excl BW): Parental WAIS as DV

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

run_model(ave_WAIS_res_z ~ PC1A_z + sire + gen_2nd + gen_3rd, d_apa_noBW,
          "APA WAIS M1: PC1A + SIRE + Generation")

## 
##  ================================================================================ 
## APA WAIS M1: PC1A + SIRE + Generation 
## R2 = 0.0151  Adj-R2 = 0.0116 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0558    0.0392    +1.423      0.1549
##               PC1A_z    -0.1165    0.0358    -3.259      0.0011
##               sireBH    -0.2848    0.1385    -2.057      0.0399
##               sireBO    -0.2225    0.1519    -1.465      0.1433
##          gen_2ndTRUE    +0.0986    0.1309    +0.754      0.4512
##          gen_3rdTRUE    -0.4637    0.2154    -2.153      0.0315
## N = 1393

# ==============================================================
# 15. APA: Mother & Father WAIS separately (full APA, parent self-ID)
# ==============================================================
# Use parent's own racial self-identification, not couple SIRE.
# Full APA sample (including BW) — child ancestry is the predictor,
# so both parents' WAIS scores are informative in their own models.

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("APA: Mother & Father WAIS separately (full sample, parent self-ID)\n")

## APA: Mother & Father WAIS separately (full sample, parent self-ID)

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

run_model(M_WAIS_z ~ PC1A_z + mother_black_sire + gen_2nd + gen_3rd, d_apa,
          "APA Mother WAIS M1 (full sample, parent self-ID)")

## 
##  ================================================================================ 
## APA Mother WAIS M1 (full sample, parent self-ID) 
## R2 = 0.0102  Adj-R2 = 0.0074 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0564    0.1570    +0.359      0.7194
##               PC1A_z    -0.1002    0.0494    -2.028      0.0428
##    mother_black_sire    -0.0273    0.1663    -0.164      0.8697
##          gen_2ndTRUE    +0.0956    0.2313    +0.413      0.6794
##          gen_3rdTRUE    -0.0389    0.3212    -0.121      0.9035
## N = 1410

run_model(F_WAIS_z ~ PC1A_z + father_black_sire + gen_2nd + gen_3rd, d_apa,
          "APA Father WAIS M1 (full sample, parent self-ID)")

## 
##  ================================================================================ 
## APA Father WAIS M1 (full sample, parent self-ID) 
## R2 = 0.0116  Adj-R2 = 0.0079 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0502    0.1120    +0.448      0.6541
##               PC1A_z    -0.0816    0.0315    -2.590      0.0097
##    father_black_sire    -0.0259    0.1179    -0.220      0.8260
##          gen_2ndTRUE    -0.0114    0.1315    -0.086      0.9311
##          gen_3rdTRUE    -0.7400    0.3857    -1.919      0.0553
## N = 1076

# ==============================================================
# 16. APA: SES as DV (excl BW)
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("APA (excl BW): SES as DV\n")

## APA (excl BW): SES as DV

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

run_model(SES ~ PC1A_z + sire + gen_2nd + gen_3rd, d_apa_noBW,
          "APA SES M1")

## 
##  ================================================================================ 
## APA SES M1 
## R2 = 0.0228  Adj-R2 = 0.0195 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0701    0.0359    +1.953      0.0510
##               PC1A_z    -0.0980    0.0354    -2.768      0.0057
##               sireBH    -0.5652    0.1762    -3.208      0.0014
##               sireBO    -0.1239    0.1089    -1.138      0.2554
##          gen_2ndTRUE    +0.5087    0.1679    +3.030      0.0025
##          gen_3rdTRUE    +0.0188    0.1853    +0.101      0.9194
## N = 1477

# ==============================================================
# 17. APA: Individual Tests (Model 1 spec)
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("APA: Individual Tests (Model 1 spec)\n")

## APA: Individual Tests (Model 1 spec)

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

run_model(PPVT ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c, d_apa, "APA: PPVT")

## 
##  ================================================================================ 
## APA: PPVT 
## R2 = 0.0243  Adj-R2 = 0.0192 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0165    0.0526    -0.314      0.7537
##               PC1A_z    -0.1618    0.0508    -3.188      0.0015
##               sireBW    -0.3483    0.2267    -1.537      0.1246
##               sireBH    +0.1403    0.2376    +0.590      0.5551
##               sireBO    -0.0264    0.1595    -0.166      0.8684
##          gen_2ndTRUE    +0.3827    0.2128    +1.799      0.0723
##          gen_3rdTRUE    +0.0645    0.3968    +0.163      0.8709
##                  sex    +0.0210    0.0661    +0.319      0.7501
##                age_c    -0.0286    0.0316    -0.905      0.3658
## N = 1531

run_model(WJ9 ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c, d_apa, "APA: WJ Applied Problems")

## 
##  ================================================================================ 
## APA: WJ Applied Problems 
## R2 = 0.0315  Adj-R2 = 0.0264 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.1509    0.0538    -2.803      0.0051
##               PC1A_z    -0.0791    0.0452    -1.750      0.0803
##               sireBW    -0.2980    0.2103    -1.417      0.1567
##               sireBH    -0.0106    0.1556    -0.068      0.9456
##               sireBO    +0.0998    0.1559    +0.640      0.5221
##          gen_2ndTRUE    +0.2954    0.1849    +1.597      0.1104
##          gen_3rdTRUE    +0.1111    0.1836    +0.605      0.5453
##                  sex    +0.3031    0.0641    +4.731    2.44e-06
##                age_c    -0.0496    0.0306    -1.618      0.1059
## N = 1523

run_model(WJ10 ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c, d_apa, "APA: WJ Letter-Word")

## 
##  ================================================================================ 
## APA: WJ Letter-Word 
## R2 = 0.0222  Adj-R2 = 0.0171 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0372    0.0569    -0.655      0.5128
##               PC1A_z    -0.1014    0.0438    -2.315      0.0208
##               sireBW    -0.1976    0.1991    -0.992      0.3211
##               sireBH    +0.0681    0.1506    +0.452      0.6511
##               sireBO    -0.0490    0.1343    -0.365      0.7151
##          gen_2ndTRUE    +0.3742    0.2112    +1.772      0.0766
##          gen_3rdTRUE    +0.4718    0.1862    +2.533      0.0114
##                  sex    +0.1105    0.0686    +1.612      0.1072
##                age_c    -0.0881    0.0293    -3.004      0.0027
## N = 1524

# ==============================================================
# 18. HAA: Child g Models
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("HISTORICAL AFRICAN AMERICAN: Child g as DV\n")

## HISTORICAL AFRICAN AMERICAN: Child g as DV

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

run_model(g ~ PC1A_z + sex + age_c, d_haa, "HAA M1: PC1A + Demographics")

## 
##  ================================================================================ 
## HAA M1: PC1A + Demographics 
## R2 = 0.009  Adj-R2 = 0.0067 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0025    0.0588    +0.042      0.9664
##               PC1A_z    -0.1014    0.0317    -3.199      0.0014
##                  sex    +0.0268    0.0726    +0.369      0.7121
##                age_c    +0.0067    0.0334    +0.199      0.8423
## N = 1287

run_model(g ~ PC1A_z + sex + age_c + ave_WAIS_res_z, d_haa, "HAA M2: + Parental WAIS")

## 
##  ================================================================================ 
## HAA M2: + Parental WAIS 
## R2 = 0.036  Adj-R2 = 0.0329 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0143    0.0613    -0.234      0.8152
##               PC1A_z    -0.0936    0.0327    -2.862      0.0043
##                  sex    +0.0559    0.0757    +0.739      0.4599
##                age_c    +0.0043    0.0336    +0.128      0.8985
##       ave_WAIS_res_z    +0.1675    0.0368    +4.547    5.99e-06
## N = 1234

run_model(g ~ PC1A_z + sex + age_c + ave_WAIS_res_z + single + SES, d_haa, "HAA M3: + SES + Single")

## 
##  ================================================================================ 
## HAA M3: + SES + Single 
## R2 = 0.1212  Adj-R2 = 0.1169 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0080    0.0774    -0.104      0.9173
##               PC1A_z    -0.0788    0.0323    -2.437      0.0150
##                  sex    +0.0606    0.0731    +0.829      0.4074
##                age_c    +0.0196    0.0329    +0.596      0.5510
##       ave_WAIS_res_z    +0.0880    0.0365    +2.412      0.0160
##               single    -0.0620    0.0710    -0.873      0.3826
##                  SES    +0.2917    0.0300    +9.731    1.32e-21
## N = 1234

# ==============================================================
# 19. HAA: Parental WAIS as DV
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("HAA: Parental WAIS as DV\n")

## HAA: Parental WAIS as DV

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

run_model(ave_WAIS_res_z ~ PC1A_z, d_haa, "HAA WAIS M1")

## 
##  ================================================================================ 
## HAA WAIS M1 
## R2 = 0.0091  Adj-R2 = 0.0083 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0456    0.0399    +1.144      0.2530
##               PC1A_z    -0.0979    0.0389    -2.514      0.0121
## N = 1238

# ==============================================================
# 20. HAA: SES as DV
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("HAA: SES as DV\n")

## HAA: SES as DV

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

run_model(SES ~ PC1A_z, d_haa, "HAA SES M1")

## 
##  ================================================================================ 
## HAA SES M1 
## R2 = 0.0062  Adj-R2 = 0.0055 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0787    0.0371    +2.122      0.0340
##               PC1A_z    -0.0836    0.0400    -2.090      0.0368
## N = 1292

# ==============================================================
# 21. HAA: Individual Tests (Model 1 spec)
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("HAA: Individual Tests (Model 1 spec)\n")

## HAA: Individual Tests (Model 1 spec)

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

run_model(PPVT ~ PC1A_z + sex + age_c, d_haa, "HAA: PPVT")

## 
##  ================================================================================ 
## HAA: PPVT 
## R2 = 0.0158  Adj-R2 = 0.0135 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0442    0.0558    -0.791      0.4290
##               PC1A_z    -0.1229    0.0317    -3.873      0.0001
##                  sex    +0.1079    0.0719    +1.501      0.1337
##                age_c    -0.0139    0.0347    -0.400      0.6893
## N = 1281

run_model(WJ9 ~ PC1A_z + sex + age_c, d_haa, "HAA: WJ Applied Problems")

## 
##  ================================================================================ 
## HAA: WJ Applied Problems 
## R2 = 0.0233  Adj-R2 = 0.021 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.1436    0.0565    -2.544      0.0111
##               PC1A_z    -0.0578    0.0321    -1.803      0.0717
##                  sex    +0.3011    0.0689    +4.372    1.33e-05
##                age_c    -0.0193    0.0332    -0.579      0.5625
## N = 1273

run_model(WJ10 ~ PC1A_z + sex + age_c, d_haa, "HAA: WJ Letter-Word")

## 
##  ================================================================================ 
## HAA: WJ Letter-Word 
## R2 = 0.013  Adj-R2 = 0.0107 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0305    0.0605    -0.504      0.6145
##               PC1A_z    -0.0788    0.0311    -2.534      0.0114
##                  sex    +0.1241    0.0740    +1.677      0.0937
##                age_c    -0.0770    0.0317    -2.426      0.0154
## N = 1277

# ==============================================================
# 21b. Formal Mediation: PC1A -> Child g -> SES
# ==============================================================

# APA (excl BW): PC1A -> g -> SES 

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("MEDIATION: PC1A -> Child g -> SES\n")

## MEDIATION: PC1A -> Child g -> SES

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

# Prepare APA_noBW data (SES models exclude BW)

d_med_apa <- d_apa_noBW[complete.cases(d_apa_noBW[, c(
  "g_raw", "K5PC1A", "SES_raw", "sire", "gen_2nd", "gen_3rd", "sex", "age"
)]) & !is.na(d_apa_noBW$wt), ]
d_med_apa$PC1A_z <- as.vector(scale(d_med_apa$K5PC1A))
d_med_apa$g <- as.vector(scale(d_med_apa$g_raw))
d_med_apa$SES <- as.vector(scale(d_med_apa$SES_raw))
d_med_apa$age_c <- as.vector(scale(d_med_apa$age))

# Model M: PC1A -> g (mediator model)

med_M_apa <- lm(g ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
                 data = d_med_apa, weights = wt)

# Model Y: g -> SES, controlling for PC1A (outcome model)

out_Y_apa <- lm(SES ~ g + PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
                 data = d_med_apa, weights = wt)

# Mediation

set.seed(99999)
med_apa <- mediate(med_M_apa, out_Y_apa, treat = "PC1A_z", mediator = "g",
                   boot = TRUE, sims = 1000)

## Running nonparametric bootstrap

cat("\nAPA (excl BW): PC1A -> g -> SES\n")

## 
## APA (excl BW): PC1A -> g -> SES

summary(med_apa)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                  Estimate 95% CI Lower 95% CI Upper p-value   
## ACME           -0.0294316   -0.0522345   -0.0081046   0.006 **
## ADE            -0.0647590   -0.1340491    0.0076539   0.078 . 
## Total Effect   -0.0941907   -0.1643797   -0.0158924   0.018 * 
## Prop. Mediated  0.3124687    0.0761676    1.1046894   0.020 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 1472 
## 
## 
## Simulations: 1000

# HAA: PC1A -> g -> SES 

d_med_haa <- d_haa[complete.cases(d_haa[, c(
  "g_raw", "K5PC1A", "SES_raw", "sex", "age"
)]) & !is.na(d_haa$wt), ]
d_med_haa$PC1A_z <- as.vector(scale(d_med_haa$K5PC1A))
d_med_haa$g <- as.vector(scale(d_med_haa$g_raw))
d_med_haa$SES <- as.vector(scale(d_med_haa$SES_raw))
d_med_haa$age_c <- as.vector(scale(d_med_haa$age))

med_M_haa <- lm(g ~ PC1A_z + sex + age_c, data = d_med_haa, weights = wt)
out_Y_haa <- lm(SES ~ g + PC1A_z + sex + age_c, data = d_med_haa, weights = wt)

set.seed(99999)
med_haa <- mediate(med_M_haa, out_Y_haa, treat = "PC1A_z", mediator = "g",
                   boot = TRUE, sims = 1000)

## Running nonparametric bootstrap

cat("\nHAA: PC1A -> g -> SES\n")

## 
## HAA: PC1A -> g -> SES

summary(med_haa)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                  Estimate 95% CI Lower 95% CI Upper p-value   
## ACME           -0.0321070   -0.0533454   -0.0111253   0.002 **
## ADE            -0.0460194   -0.1230032    0.0376638   0.254   
## Total Effect   -0.0781264   -0.1559398    0.0051217   0.060 . 
## Prop. Mediated  0.4109626   -0.4618822    2.5562452   0.062 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 1287 
## 
## 
## Simulations: 1000

# Also test with PVT (adolescent verbal ability analogue) 

# For temporal ordering: PVT at age 9 predicts SES at age 9 

d_med_apa2 <- d_apa_noBW[complete.cases(d_apa_noBW[, c(
  "PPVT", "K5PC1A", "SES_raw", "sire", "gen_2nd", "gen_3rd", "sex", "age"
)]) & !is.na(d_apa_noBW$wt), ]
d_med_apa2$PC1A_z <- as.vector(scale(d_med_apa2$K5PC1A))
d_med_apa2$PPVT_z <- as.vector(scale(d_med_apa2$PPVT))
d_med_apa2$SES <- as.vector(scale(d_med_apa2$SES_raw))
d_med_apa2$age_c <- as.vector(scale(d_med_apa2$age))

med_M_apa2 <- lm(PPVT_z ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
                  data = d_med_apa2, weights = wt)
out_Y_apa2 <- lm(SES ~ PPVT_z + PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
                  data = d_med_apa2, weights = wt)

set.seed(99999)
med_apa2 <- mediate(med_M_apa2, out_Y_apa2, treat = "PC1A_z",
                    mediator = "PPVT_z", boot = TRUE, sims = 1000)

## Running nonparametric bootstrap

cat("\nAPA (excl BW): PC1A -> PPVT -> SES\n")

## 
## APA (excl BW): PC1A -> PPVT -> SES

summary(med_apa2)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                 Estimate 95% CI Lower 95% CI Upper p-value    
## ACME           -0.036926    -0.064245    -0.012995  <2e-16 ***
## ADE            -0.057619    -0.125074     0.011993   0.110    
## Total Effect   -0.094545    -0.162477    -0.022694   0.012 *  
## Prop. Mediated  0.390567     0.122464     1.175160   0.012 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 1466 
## 
## 
## Simulations: 1000

# WJ9 mediation (check whether the mediation is driven by verbal ability or is general across cognitive domains)

d_med_wj9 <- d_apa_noBW[complete.cases(d_apa_noBW[, c(
  "WJ9", "K5PC1A", "SES_raw", "sire", "gen_2nd", "gen_3rd", "sex", "age"
)]) & !is.na(d_apa_noBW$wt), ]
d_med_wj9$PC1A_z <- as.vector(scale(d_med_wj9$K5PC1A))
d_med_wj9$WJ9_z <- as.vector(scale(d_med_wj9$WJ9))
d_med_wj9$SES <- as.vector(scale(d_med_wj9$SES_raw))
d_med_wj9$age_c <- as.vector(scale(d_med_wj9$age))

med_M_wj9 <- lm(WJ9_z ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
                 data = d_med_wj9, weights = wt)
out_Y_wj9 <- lm(SES ~ WJ9_z + PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
                 data = d_med_wj9, weights = wt)
set.seed(99999)
med_wj9 <- mediate(med_M_wj9, out_Y_wj9, treat = "PC1A_z",
                   mediator = "WJ9_z", boot = TRUE, sims = 1000)

## Running nonparametric bootstrap

cat("\nAPA (excl BW): PC1A -> WJ9 -> SES\n")

## 
## APA (excl BW): PC1A -> WJ9 -> SES

summary(med_wj9)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                   Estimate 95% CI Lower 95% CI Upper p-value   
## ACME           -0.01515051  -0.03143462   0.00015771   0.056 . 
## ADE            -0.07939725  -0.15599497  -0.01320386   0.024 * 
## Total Effect   -0.09454775  -0.17160220  -0.02401644   0.006 **
## Prop. Mediated  0.16024184  -0.01010715   0.55842552   0.062 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 1457 
## 
## 
## Simulations: 1000

# WJ10 mediation
d_med_wj10 <- d_apa_noBW[complete.cases(d_apa_noBW[, c(
  "WJ10", "K5PC1A", "SES_raw", "sire", "gen_2nd", "gen_3rd", "sex", "age"
)]) & !is.na(d_apa_noBW$wt), ]
d_med_wj10$PC1A_z <- as.vector(scale(d_med_wj10$K5PC1A))
d_med_wj10$WJ10_z <- as.vector(scale(d_med_wj10$WJ10))
d_med_wj10$SES <- as.vector(scale(d_med_wj10$SES_raw))
d_med_wj10$age_c <- as.vector(scale(d_med_wj10$age))

med_M_wj10 <- lm(WJ10_z ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
                  data = d_med_wj10, weights = wt)
out_Y_wj10 <- lm(SES ~ WJ10_z + PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
                  data = d_med_wj10, weights = wt)
set.seed(99999)
med_wj10 <- mediate(med_M_wj10, out_Y_wj10, treat = "PC1A_z",
                    mediator = "WJ10_z", boot = TRUE, sims = 1000)

## Running nonparametric bootstrap

cat("\nAPA (excl BW): PC1A -> WJ10 -> SES\n")

## 
## APA (excl BW): PC1A -> WJ10 -> SES

summary(med_wj10)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                  Estimate 95% CI Lower 95% CI Upper p-value  
## ACME           -0.0206058   -0.0389887   -0.0040922   0.014 *
## ADE            -0.0720674   -0.1385549   -0.0073242   0.036 *
## Total Effect   -0.0926732   -0.1624166   -0.0248395   0.012 *
## Prop. Mediated  0.2223486    0.0387608    0.6955832   0.022 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 1458 
## 
## 
## Simulations: 1000

# ==============================================================
# 21c. Mediation: PC1A -> Parental WAIS -> Child g
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("MEDIATION: PC1A -> Parental WAIS -> Child g\n")

## MEDIATION: PC1A -> Parental WAIS -> Child g

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

# APA (exclude BW)

d_med_pw_apa <- d_apa_noBW[complete.cases(d_apa_noBW[, c(
  "g_raw", "K5PC1A", "ave_WAIS_raw", "sire", "gen_2nd", "gen_3rd", "sex", "age"
)]) & !is.na(d_apa_noBW$wt), ]
d_med_pw_apa$PC1A_z <- as.vector(scale(d_med_pw_apa$K5PC1A))
d_med_pw_apa$g <- as.vector(scale(d_med_pw_apa$g_raw))
d_med_pw_apa$W <- as.vector(scale(d_med_pw_apa$ave_WAIS_raw))
d_med_pw_apa$age_c <- as.vector(scale(d_med_pw_apa$age))

# M: PC1A -> parental WAIS

med_M_pw_apa <- lm(W ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
                    data = d_med_pw_apa, weights = wt)

# Y: parental WAIS -> g, controlling for PC1A

out_Y_pw_apa <- lm(g ~ W + PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
                    data = d_med_pw_apa, weights = wt)

set.seed(88888)
med_pw_apa <- mediate(med_M_pw_apa, out_Y_pw_apa, treat = "PC1A_z",
                      mediator = "W", boot = TRUE, sims = 1000)

## Running nonparametric bootstrap

cat("\nAPA (excl BW): PC1A -> Parental WAIS -> Child g\n")

## 
## APA (excl BW): PC1A -> Parental WAIS -> Child g

summary(med_pw_apa)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                  Estimate 95% CI Lower 95% CI Upper p-value   
## ACME           -0.0168859   -0.0319882   -0.0049938   0.002 **
## ADE            -0.0716505   -0.1363979   -0.0028324   0.038 * 
## Total Effect   -0.0885363   -0.1568614   -0.0176233   0.018 * 
## Prop. Mediated  0.1907226    0.0522423    0.7133941   0.020 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 1389 
## 
## 
## Simulations: 1000

# HAA

d_med_pw_haa <- d_haa[complete.cases(d_haa[, c(
  "g_raw", "K5PC1A", "ave_WAIS_raw", "sex", "age"
)]) & !is.na(d_haa$wt), ]
d_med_pw_haa$PC1A_z <- as.vector(scale(d_med_pw_haa$K5PC1A))
d_med_pw_haa$g <- as.vector(scale(d_med_pw_haa$g_raw))
d_med_pw_haa$W <- as.vector(scale(d_med_pw_haa$ave_WAIS_raw))
d_med_pw_haa$age_c <- as.vector(scale(d_med_pw_haa$age))

med_M_pw_haa <- lm(W ~ PC1A_z + sex + age_c, data = d_med_pw_haa, weights = wt)
out_Y_pw_haa <- lm(g ~ W + PC1A_z + sex + age_c, data = d_med_pw_haa, weights = wt)

set.seed(88888)
med_pw_haa <- mediate(med_M_pw_haa, out_Y_pw_haa, treat = "PC1A_z",
                      mediator = "W", boot = TRUE, sims = 1000)

## Running nonparametric bootstrap

cat("\nHAA: PC1A -> Parental WAIS -> Child g\n")

## 
## HAA: PC1A -> Parental WAIS -> Child g

summary(med_pw_haa)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                  Estimate 95% CI Lower 95% CI Upper p-value    
## ACME           -0.0143167   -0.0284416   -0.0019552   0.028 *  
## ADE            -0.0935652   -0.1586100   -0.0265247   0.008 ** 
## Total Effect   -0.1078818   -0.1758379   -0.0428944  <2e-16 ***
## Prop. Mediated  0.1327068    0.0168790    0.3966624   0.028 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 1234 
## 
## 
## Simulations: 1000

# ==============================================================
# 21d. Non-Linearity Test: Quadratic Ancestry Term
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("ROBUSTNESS: Quadratic ancestry term\n")

## ROBUSTNESS: Quadratic ancestry term

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

run_model(g ~ PC1A_z + I(PC1A_z^2) + sire + gen_2nd + gen_3rd + sex + age_c,
          d_apa, "APA: g ~ PC1A + PC1A² + controls")

## 
##  ================================================================================ 
## APA: g ~ PC1A + PC1A² + controls 
## R2 = 0.02  Adj-R2 = 0.0142 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0086    0.0578    -0.149      0.8816
##               PC1A_z    -0.1035    0.0566    -1.828      0.0678
##          I(PC1A_z^2)    +0.0230    0.0268    +0.856      0.3922
##               sireBW    -0.4721    0.2760    -1.710      0.0874
##               sireBH    +0.1028    0.1905    +0.539      0.5896
##               sireBO    -0.0091    0.1398    -0.065      0.9480
##          gen_2ndTRUE    +0.4123    0.2185    +1.887      0.0594
##          gen_3rdTRUE    +0.2570    0.2391    +1.075      0.2826
##                  sex    -0.0121    0.0679    -0.179      0.8582
##                age_c    -0.0195    0.0309    -0.632      0.5275
## N = 1538

run_model(g ~ PC1A_z + I(PC1A_z^2) + sex + age_c,
          d_haa, "HAA: g ~ PC1A + PC1A² + controls")

## 
##  ================================================================================ 
## HAA: g ~ PC1A + PC1A² + controls 
## R2 = 0.009  Adj-R2 = 0.0059 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0000    0.0632    -0.000      0.9999
##               PC1A_z    -0.0978    0.0433    -2.260      0.0240
##          I(PC1A_z^2)    +0.0028    0.0159    +0.175      0.8611
##                  sex    +0.0262    0.0721    +0.364      0.7161
##                age_c    +0.0061    0.0335    +0.182      0.8557
## N = 1287

# Also for PPVT (strongest individual test effect)

run_model(PPVT ~ PC1A_z + I(PC1A_z^2) + sire + gen_2nd + gen_3rd + sex + age_c,
          d_apa, "APA: PPVT ~ PC1A + PC1A² + controls")

## 
##  ================================================================================ 
## APA: PPVT ~ PC1A + PC1A² + controls 
## R2 = 0.0262  Adj-R2 = 0.0205 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0360    0.0559    -0.644      0.5194
##               PC1A_z    -0.1250    0.0551    -2.270      0.0233
##          I(PC1A_z^2)    +0.0335    0.0332    +1.011      0.3122
##               sireBW    -0.5960    0.3582    -1.664      0.0963
##               sireBH    +0.1288    0.2472    +0.521      0.6023
##               sireBO    -0.0463    0.1560    -0.297      0.7666
##          gen_2ndTRUE    +0.3674    0.2137    +1.720      0.0857
##          gen_3rdTRUE    +0.0764    0.3956    +0.193      0.8470
##                  sex    +0.0178    0.0660    +0.269      0.7877
##                age_c    -0.0310    0.0316    -0.980      0.3271
## N = 1531

# ==============================================================
# 21e. Sex-stratified models
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("ROBUSTNESS: Sex-stratified ancestry models\n")

## ROBUSTNESS: Sex-stratified ancestry models

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

# APA: Interaction

run_model(g ~ PC1A_z * sex + sire + gen_2nd + gen_3rd + age_c,
          d_apa, "APA: g ~ PC1A × Sex interaction")

## 
##  ================================================================================ 
## APA: g ~ PC1A × Sex interaction 
## R2 = 0.0214  Adj-R2 = 0.0156 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0028    0.0553    +0.051      0.9595
##               PC1A_z    -0.1767    0.0591    -2.989      0.0028
##                  sex    -0.0130    0.0676    -0.192      0.8477
##               sireBW    -0.2899    0.2108    -1.375      0.1692
##               sireBH    +0.1080    0.1788    +0.604      0.5459
##               sireBO    +0.0014    0.1417    +0.010      0.9919
##          gen_2ndTRUE    +0.4424    0.2176    +2.033      0.0422
##          gen_3rdTRUE    +0.2447    0.2368    +1.033      0.3016
##                age_c    -0.0183    0.0306    -0.598      0.5498
##           PC1A_z:sex    +0.1021    0.0598    +1.707      0.0880
## N = 1538

# APA: Males only

run_model(g ~ PC1A_z + sire + gen_2nd + gen_3rd + age_c,
          d_apa[d_apa$sex == 0, ], "APA Males: g ~ PC1A + controls")

## 
##  ================================================================================ 
## APA Males: g ~ PC1A + controls 
## R2 = 0.0359  Adj-R2 = 0.0272 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0265    0.0549    +0.483      0.6291
##               PC1A_z    -0.2103    0.0648    -3.244      0.0012
##               sireBW    -0.5919    0.2825    -2.095      0.0365
##               sireBH    +0.2727    0.1843    +1.480      0.1394
##               sireBO    -0.0898    0.2348    -0.383      0.7021
##          gen_2ndTRUE    +0.4819    0.2601    +1.853      0.0643
##          gen_3rdTRUE    -0.0487    0.0691    -0.705      0.4813
##                age_c    +0.0294    0.0447    +0.659      0.5103
## N = 782

# APA: Females only

run_model(g ~ PC1A_z + sire + gen_2nd + gen_3rd + age_c,
          d_apa[d_apa$sex == 1, ], "APA Females: g ~ PC1A + controls")

## 
##  ================================================================================ 
## APA Females: g ~ PC1A + controls 
## R2 = 0.017  Adj-R2 = 0.0078 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0354    0.0445    -0.795      0.4268
##               PC1A_z    -0.0335    0.0586    -0.571      0.5679
##               sireBW    +0.0577    0.2752    +0.210      0.8340
##               sireBH    -0.2370    0.2470    -0.960      0.3376
##               sireBO    +0.0977    0.1831    +0.534      0.5937
##          gen_2ndTRUE    +0.3725    0.3721    +1.001      0.3171
##          gen_3rdTRUE    +0.4018    0.3497    +1.149      0.2510
##                age_c    -0.0768    0.0414    -1.853      0.0643
## N = 756

# HAA: Interaction

run_model(g ~ PC1A_z * sex + age_c,
          d_haa, "HAA: g ~ PC1A × Sex interaction")

## 
##  ================================================================================ 
## HAA: g ~ PC1A × Sex interaction 
## R2 = 0.0102  Adj-R2 = 0.0071 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0001    0.0588    -0.001      0.9990
##               PC1A_z    -0.1358    0.0463    -2.934      0.0034
##                  sex    +0.0239    0.0725    +0.330      0.7417
##                age_c    +0.0053    0.0333    +0.159      0.8736
##           PC1A_z:sex    +0.0753    0.0621    +1.213      0.2255
## N = 1287

# HAA: Males only

run_model(g ~ PC1A_z + age_c,
          d_haa[d_haa$sex == 0, ], "HAA Males: g ~ PC1A + controls")

## 
##  ================================================================================ 
## HAA Males: g ~ PC1A + controls 
## R2 = 0.0157  Adj-R2 = 0.0127 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    +0.0273    0.0547    +0.498      0.6183
##               PC1A_z    -0.1263    0.0429    -2.946      0.0033
##                age_c    +0.0491    0.0500    +0.981      0.3270
## N = 654

# HAA: Females only

run_model(g ~ PC1A_z + age_c,
          d_haa[d_haa$sex == 1, ], "HAA Females: g ~ PC1A + controls")

## 
##  ================================================================================ 
## HAA Females: g ~ PC1A + controls 
## R2 = 0.0057  Adj-R2 = 0.0026 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0026    0.0454    -0.057      0.9543
##               PC1A_z    -0.0628    0.0448    -1.401      0.1617
##                age_c    -0.0463    0.0427    -1.084      0.2787
## N = 633

# ==============================================================
# 21f. Complete-case sensitivity check
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("ROBUSTNESS: Complete-case analysis (no imputation)\n")

## ROBUSTNESS: Complete-case analysis (no imputation)

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

# Identify cases with complete raw (non-imputed) data

d_apa_cc <- d_apa[!is.na(d_apa$PPVT) & !is.na(d_apa$WJ9) & !is.na(d_apa$WJ10) &
                  !is.na(d_apa$K5PC1A) & !is.na(d_apa$sex) & !is.na(d_apa$age) &
                  !is.na(d_apa$wt) & !is.na(d_apa$M_edu) & !is.na(d_apa$ave_log_inc), ]

# Build g score without imputation (PCA on complete cases)

cog_raw <- as.matrix(d_apa_cc[, c("PPVT_r", "WJ9_r", "WJ10_r")])
cog_raw_z <- apply(cog_raw, 2, scale)
pca_cc <- prcomp(cog_raw_z, center = FALSE, scale. = FALSE)
d_apa_cc$g_cc <- as.vector(pca_cc$x[, 1])

d_apa_cc$PC1A_z <- as.vector(scale(d_apa_cc$K5PC1A))
d_apa_cc$g <- as.vector(scale(d_apa_cc$g_cc))
d_apa_cc$age_c <- as.vector(scale(d_apa_cc$age))

m_cc <- lm(g ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
            data = d_apa_cc, weights = wt)
robust_cc <- coeftest(m_cc, vcov = vcovHC(m_cc, type = "HC1"))
cat("APA complete-case: PC1A coefficient:\n")

## APA complete-case: PC1A coefficient:

print(robust_cc["PC1A_z", ])

##     Estimate   Std. Error      t value     Pr(>|t|) 
## -0.136713314  0.048642753 -2.810558734  0.005009909

cat("N =", nrow(d_apa_cc), "\n")

## N = 1507

# ==============================================================
# 22. Descriptive Table
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("TABLE: Sample Composition\n")

## TABLE: Sample Composition

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

wt_mean <- function(x, w) {
  keep <- !is.na(x) & !is.na(w) & w > 0
  if (sum(keep) == 0) return(list(m = NA, n = 0))
  list(m = weighted.mean(x[keep], w[keep]), n = sum(keep))
}

print_row <- function(label, subset, apa_status, haa_status) {
  n <- nrow(subset)
  pct <- 100 * n / nrow(d)
  pc1a_m <- mean(subset$K5PC1A, na.rm = TRUE)
  pc1a_sd <- sd(subset$K5PC1A, na.rm = TRUE)
  g_res   <- wt_mean(subset$g_raw, subset$wt)
  w_res   <- wt_mean(subset$ave_WAIS_raw, subset$wt)
  g_str  <- ifelse(is.na(g_res$m), "    --", sprintf("%+.3f", g_res$m))
  w_str  <- ifelse(is.na(w_res$m), "    --", sprintf("%.2f", w_res$m))
  sd_str <- ifelse(is.na(pc1a_sd) | n < 2, "    --", sprintf("%.4f", pc1a_sd))
  cat(sprintf("  %-25s %5d %5.1f%% %+.4f %7s  %4s  %4s  %5d %7s  %5d %7s\n",
              label, n, pct, pc1a_m, sd_str, apa_status, haa_status,
              g_res$n, g_str, w_res$n, w_str))
}

cat(sprintf("\n%-27s %5s %6s %7s %7s  %4s  %4s  %5s %7s  %5s %7s\n",
            "Category", "N", "%", "PC1A", "SD", "APA", "HAA", "N(g)", "Wt.g", "N(W)", "Wt.W"))

## 
## Category                        N      %    PC1A      SD   APA   HAA   N(g)    Wt.g   N(W)    Wt.W

cat(paste(rep("-", 105), collapse = ""), "\n")

## ---------------------------------------------------------------------------------------------------------

cat("Panel A: Analytic Sample\n")

## Panel A: Analytic Sample

bb_apa <- d[d$any_black_parent & !d$african_origin & d$sire == "BB", ]
print_row("Black x Black", bb_apa, "Yes", "Yes*")

##   Black x Black              1418  86.5% +0.0058  0.0128   Yes  Yes*   1376  -0.019   1303    0.04

bw_apa <- d[d$any_black_parent & !d$african_origin & d$sire == "BW", ]
print_row("Black x White", bw_apa, "Yes", "Excl")

##   Black x White                72   4.4% -0.0671  0.0178   Yes  Excl     66  +0.187     62    0.62

bh_apa <- d[d$any_black_parent & !d$african_origin & d$sire == "BH", ]
print_row("Black x Hispanic", bh_apa, "Yes", "Excl")

##   Black x Hispanic             32   2.0% -0.0137  0.0304   Yes  Excl     32  +0.489     29   -0.26

bo_apa <- d[d$any_black_parent & !d$african_origin & d$sire == "BO", ]
print_row("Black x Other/Missing", bo_apa, "Yes", "Excl")

##   Black x Other/Missing        66   4.0% -0.0064  0.0266   Yes  Excl     64  +0.174     61   -0.14

cat("\nPanel B: Continental African Origin\n")

## 
## Panel B: Continental African Origin

afr <- d[d$any_black_parent & d$african_origin, ]
if (nrow(afr) > 0) print_row("African origin (excl)", afr, "Excl", "Excl")

##   African origin (excl)        21   1.3% +0.0031  0.0414  Excl  Excl     18  +0.071     11    0.71

cat("\nPanel C: No Black Parent\n")

## 
## Panel C: No Black Parent

no_black <- d[!d$any_black_parent, ]
print_row("No Black parent", no_black, "Excl", "Excl")

##   No Black parent              30   1.8% -0.0818  0.0418  Excl  Excl     28  +0.551     22    0.14

cat(sprintf("\n*HAA = BB 3rd+ gen only: N=%d\n", nrow(d_haa)))

## 
## *HAA = BB 3rd+ gen only: N=1323

# ==============================================================
# 23. Regression Plots
# ==============================================================

partial_resid <- function(model, data, xvar) {
  r <- residuals(model)
  b <- coef(model)[xvar]
  r + b * data[[xvar]]
}

plot_ancestry <- function(model, data, xvar, color, ylab, main_title) {
  pr <- partial_resid(model, data, xvar)
  x  <- data[[xvar]]
  plot(x, pr, pch = 16, cex = 0.4,
       col = adjustcolor("gray30", alpha.f = 0.2),
       xlab = "African Ancestry PC (std.)", ylab = ylab, main = main_title)
  lo <- loess(pr ~ x, span = 0.75)
  x_seq <- seq(min(x), max(x), length.out = 200)
  lo_pred <- predict(lo, newdata = data.frame(x = x_seq), se = TRUE)
  polygon(c(x_seq, rev(x_seq)),
          c(lo_pred$fit + 1.96 * lo_pred$se.fit, rev(lo_pred$fit - 1.96 * lo_pred$se.fit)),
          col = adjustcolor(color, alpha.f = 0.15), border = NA)
  lines(x_seq, lo_pred$fit, col = color, lwd = 2.5)
  abline(lm(pr ~ x), col = color, lwd = 1.5, lty = 2)
  b <- coef(model)[xvar]
  p <- summary(model)$coefficients[xvar, "Pr(>|t|)"]
  p_str <- ifelse(p < 0.001, "p < .001", sprintf("p = %.3f", p))
  legend("topright", legend = bquote(beta == .(sprintf("%.3f", b)) ~ "," ~ .(p_str)),
         bty = "n", cex = 0.9)
}

d_apa_g <- d_apa[complete.cases(d_apa[, c("g_raw", "K5PC1A", "sire", "gen_2nd", "gen_3rd", "sex", "age")]) & !is.na(d_apa$wt), ]
d_apa_g$g <- as.vector(scale(d_apa_g$g_raw)); d_apa_g$PC1A_z <- as.vector(scale(d_apa_g$K5PC1A)); d_apa_g$age_c <- as.vector(scale(d_apa_g$age))
m_apa_g <- lm(g ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c, data = d_apa_g, weights = d_apa_g$wt)

d_haa_g <- d_haa[complete.cases(d_haa[, c("g_raw", "K5PC1A", "sex", "age")]) & !is.na(d_haa$wt), ]
d_haa_g$g <- as.vector(scale(d_haa_g$g_raw)); d_haa_g$PC1A_z <- as.vector(scale(d_haa_g$K5PC1A)); d_haa_g$age_c <- as.vector(scale(d_haa_g$age))
m_haa_g <- lm(g ~ PC1A_z + sex + age_c, data = d_haa_g, weights = d_haa_g$wt)

d_apa_w <- d_apa_noBW[complete.cases(d_apa_noBW[, c("ave_WAIS_raw", "K5PC1A", "sire", "gen_2nd", "gen_3rd")]) & !is.na(d_apa_noBW$wt), ]
d_apa_w$ave_WAIS_res_z <- as.vector(scale(d_apa_w$ave_WAIS_raw)); d_apa_w$PC1A_z <- as.vector(scale(d_apa_w$K5PC1A))
m_apa_w <- lm(ave_WAIS_res_z ~ PC1A_z + sire + gen_2nd + gen_3rd, data = d_apa_w, weights = d_apa_w$wt)

d_haa_w <- d_haa[complete.cases(d_haa[, c("ave_WAIS_raw", "K5PC1A")]) & !is.na(d_haa$wt), ]
d_haa_w$ave_WAIS_res_z <- as.vector(scale(d_haa_w$ave_WAIS_raw)); d_haa_w$PC1A_z <- as.vector(scale(d_haa_w$K5PC1A))
m_haa_w <- lm(ave_WAIS_res_z ~ PC1A_z, data = d_haa_w, weights = d_haa_w$wt)

pdf("admixture_regression_plots.pdf", width = 10, height = 8)
par(mfrow = c(2, 2), mar = c(4.5, 4.5, 3, 1.5))
plot_ancestry(m_apa_g, d_apa_g, "PC1A_z", "blue", "Child g (z)", "A. Pan-African American: Ancestry -> Child g")
plot_ancestry(m_haa_g, d_haa_g, "PC1A_z", "blue", "Child g (z)", "B. Historical AA: Ancestry -> Child g")
plot_ancestry(m_apa_w, d_apa_w, "PC1A_z", "darkred", "Parental WAIS (z)", "C. Pan-African American: Ancestry -> Parental WAIS")
plot_ancestry(m_haa_w, d_haa_w, "PC1A_z", "darkred", "Parental WAIS (z)", "D. Historical AA: Ancestry -> Parental WAIS")
dev.off()

## png 
##   2

cat("\nPlots saved to: admixture_regression_plots.pdf\n")

## 
## Plots saved to: admixture_regression_plots.pdf

#_______________________________________________________________________________________
#Exploratory analysis 



# ==============================================================
# Epigenetic Clock Analysis
# ==============================================================


epi <- read_sav("C:/Users/mh198/Documents/Data/FFCWS/DS0008/31622-0008-Data.sav")

# Merge with main dataset
d <- merge(d, epi[, c("IDNUM", "K5MK_POAM45", "K5MK_POAM38",
                      "K5MK_PHENOAGE", "K5MK_GRIM", "K5MK_HORVATH",
                      "K5MK_SKINBLOOD", "K5MK_PEDBE", "K5MK_HANNUM",
                      "K5MK_AGE", "K5MK_BATCH")],
           by = "IDNUM", all.x = TRUE)

# Clean: set negative codes to NA
epi_vars <- c("K5MK_POAM45", "K5MK_POAM38", "K5MK_PHENOAGE", "K5MK_GRIM",
              "K5MK_HORVATH", "K5MK_SKINBLOOD", "K5MK_PEDBE", "K5MK_HANNUM",
              "K5MK_AGE", "K5MK_BATCH")
for (v in epi_vars) {
  d[[v]] <- ifelse(d[[v]] < -5, NA, d[[v]])
}

cat("Epigenetic data merged. DunedinPACE available: N =", sum(!is.na(d$K5MK_POAM45)), "\n")

## Epigenetic data merged. DunedinPACE available: N = 485

# Rebuild subsets with epi data
d_apa <- d[d$any_black_parent & !d$african_origin, ]
d_apa$sire <- droplevels(d_apa$sire)
d_apa$mother_black_sire <- as.numeric(d_apa$CM1ETHRACE == 2)
d_apa$father_black_sire <- as.numeric(d_apa$CF1ETHRACE == 2)

d_apa_noBW <- d_apa[d_apa$sire != "BW", ]
d_apa_noBW$sire <- droplevels(d_apa_noBW$sire)

d_haa <- d[d$CF1ETHRACE == 2 & d$CM1ETHRACE == 2 &
             !d$african_origin & !d$parent_fb & !d$gp_fb, ]

cat("\nDunedinPACE by sample:\n")

## 
## DunedinPACE by sample:

cat(sprintf("  APA: N = %d\n", sum(!is.na(d_apa$K5MK_POAM45))))

##   APA: N = 469

cat(sprintf("  APA excl BW: N = %d\n", sum(!is.na(d_apa_noBW$K5MK_POAM45))))

##   APA excl BW: N = 457

cat(sprintf("  HAA: N = %d\n", sum(!is.na(d_haa$K5MK_POAM45))))

##   HAA: N = 400

# ==============================================================
# Full Diagnostics: All Clocks
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("Epigenetic Clocks: Full Diagnostics\n")

## Epigenetic Clocks: Full Diagnostics

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

clock_vars <- c("K5MK_POAM45", "K5MK_POAM38", "K5MK_PHENOAGE", "K5MK_GRIM",
                "K5MK_HORVATH", "K5MK_SKINBLOOD", "K5MK_PEDBE", "K5MK_HANNUM")
clock_labels <- c("DunedinPACE", "DunedinPoAm38", "PhenoAge", "GrimAge",
                  "Horvath", "SkinBlood", "PedBE", "Hannum")

# APA
cat("\nCorrelations: Clocks with Ancestry and Outcomes (APA)\n")

## 
## Correlations: Clocks with Ancestry and Outcomes (APA)

cat(sprintf("%-15s %5s %9s %9s %9s %9s %9s %9s\n",
            "Clock", "N", "r(PC1A)", "r(g)", "r(PPVT)", "r(WAIS)", "r(SES)", "r(WJ9)"))

## Clock               N   r(PC1A)      r(g)   r(PPVT)   r(WAIS)    r(SES)    r(WJ9)

cat(paste(rep("-", 80), collapse = ""), "\n")

## --------------------------------------------------------------------------------

for (i in seq_along(clock_vars)) {
  v <- clock_vars[i]
  mask_pc   <- !is.na(d_apa[[v]]) & !is.na(d_apa$K5PC1A)
  mask_g    <- !is.na(d_apa[[v]]) & !is.na(d_apa$g_raw)
  mask_ppvt <- !is.na(d_apa[[v]]) & !is.na(d_apa$PPVT)
  mask_wais <- !is.na(d_apa[[v]]) & !is.na(d_apa$ave_WAIS_raw)
  mask_ses  <- !is.na(d_apa[[v]]) & !is.na(d_apa$SES_raw)
  mask_wj9  <- !is.na(d_apa[[v]]) & !is.na(d_apa$WJ9)
  
  n <- sum(mask_pc)
  if (n < 10) next
  
  r_pc   <- cor(d_apa[[v]][mask_pc],   d_apa$K5PC1A[mask_pc])
  r_g    <- cor(d_apa[[v]][mask_g],    d_apa$g_raw[mask_g])
  r_ppvt <- cor(d_apa[[v]][mask_ppvt], d_apa$PPVT[mask_ppvt])
  r_wais <- cor(d_apa[[v]][mask_wais], d_apa$ave_WAIS_raw[mask_wais])
  r_ses  <- cor(d_apa[[v]][mask_ses],  d_apa$SES_raw[mask_ses])
  r_wj9  <- cor(d_apa[[v]][mask_wj9],  d_apa$WJ9[mask_wj9])
  
  cat(sprintf("%-15s %5d %+9.4f %+9.4f %+9.4f %+9.4f %+9.4f %+9.4f\n",
              clock_labels[i], n, r_pc, r_g, r_ppvt, r_wais, r_ses, r_wj9))
}

## DunedinPACE       469   +0.1137   +0.0247   +0.0240   -0.0152   -0.0032   -0.0216
## DunedinPoAm38     469   +0.1263   -0.0312   -0.0835   -0.0516   -0.0690   -0.0087
## PhenoAge          146   +0.1252   -0.0190   +0.0486   -0.0096   -0.0241   -0.0142
## GrimAge           469   +0.0509   +0.0321   +0.0539   -0.0018   +0.0066   -0.0270
## Horvath           469   +0.0023   -0.0398   -0.0148   -0.0163   +0.0216   -0.0532
## SkinBlood         469   +0.0584   +0.0075   +0.0161   +0.0119   +0.0255   +0.0338
## PedBE             469   +0.0210   +0.0116   +0.0481   -0.0053   +0.0576   +0.0214
## Hannum            469   +0.0282   +0.0552   +0.0539   -0.0226   +0.0146   -0.0015

# HAA
cat("\nCorrelations: Clocks with Ancestry and Outcomes (HAA)\n")

## 
## Correlations: Clocks with Ancestry and Outcomes (HAA)

cat(sprintf("%-15s %5s %9s %9s %9s %9s %9s %9s\n",
            "Clock", "N", "r(PC1A)", "r(g)", "r(PPVT)", "r(WAIS)", "r(SES)", "r(WJ9)"))

## Clock               N   r(PC1A)      r(g)   r(PPVT)   r(WAIS)    r(SES)    r(WJ9)

cat(paste(rep("-", 80), collapse = ""), "\n")

## --------------------------------------------------------------------------------

for (i in seq_along(clock_vars)) {
  v <- clock_vars[i]
  mask_pc   <- !is.na(d_haa[[v]]) & !is.na(d_haa$K5PC1A)
  mask_g    <- !is.na(d_haa[[v]]) & !is.na(d_haa$g_raw)
  mask_ppvt <- !is.na(d_haa[[v]]) & !is.na(d_haa$PPVT)
  mask_wais <- !is.na(d_haa[[v]]) & !is.na(d_haa$ave_WAIS_raw)
  mask_ses  <- !is.na(d_haa[[v]]) & !is.na(d_haa$SES_raw)
  mask_wj9  <- !is.na(d_haa[[v]]) & !is.na(d_haa$WJ9)
  
  n <- sum(mask_pc)
  if (n < 10) next
  
  r_pc   <- cor(d_haa[[v]][mask_pc],   d_haa$K5PC1A[mask_pc])
  r_g    <- cor(d_haa[[v]][mask_g],    d_haa$g_raw[mask_g])
  r_ppvt <- cor(d_haa[[v]][mask_ppvt], d_haa$PPVT[mask_ppvt])
  r_wais <- cor(d_haa[[v]][mask_wais], d_haa$ave_WAIS_raw[mask_wais])
  r_ses  <- cor(d_haa[[v]][mask_ses],  d_haa$SES_raw[mask_ses])
  r_wj9  <- cor(d_haa[[v]][mask_wj9],  d_haa$WJ9[mask_wj9])
  
  cat(sprintf("%-15s %5d %+9.4f %+9.4f %+9.4f %+9.4f %+9.4f %+9.4f\n",
              clock_labels[i], n, r_pc, r_g, r_ppvt, r_wais, r_ses, r_wj9))
}

## DunedinPACE       400   +0.0907   +0.0333   +0.0410   -0.0125   +0.0004   -0.0218
## DunedinPoAm38     400   +0.0581   -0.0014   -0.0272   -0.0518   -0.0659   +0.0002
## PhenoAge          122   +0.0485   +0.0230   +0.0695   -0.0396   -0.0024   +0.0431
## GrimAge           400   +0.0511   +0.0264   +0.0424   +0.0085   +0.0278   -0.0216
## Horvath           400   -0.0658   -0.0333   -0.0261   -0.0012   +0.0246   -0.0343
## SkinBlood         400   +0.0498   +0.0161   +0.0219   +0.0184   +0.0457   +0.0518
## PedBE             400   +0.0022   -0.0037   +0.0482   -0.0017   +0.0373   +0.0048
## Hannum            400   +0.0337   +0.0574   +0.0568   -0.0110   +0.0207   +0.0067

# ==============================================================
# Mediation: DunedinPACE
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("Mediation: Does DunedinPACE attenuate ancestry-cognition?\n")

## Mediation: Does DunedinPACE attenuate ancestry-cognition?

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

d_apa$PACE_z <- as.vector(scale(d_apa$K5MK_POAM45))
d_haa$PACE_z <- as.vector(scale(d_haa$K5MK_POAM45))

# APA: without PACE (restricted to epi sample)
run_model(g ~ PC1A_z + sire + gen_2nd + gen_3rd + sex + age_c,
          d_apa[!is.na(d_apa$K5MK_POAM45), ],
          "APA M1 (no PACE, epi subsample)")

## 
##  ================================================================================ 
## APA M1 (no PACE, epi subsample) 
## R2 = 0.037  Adj-R2 = 0.0202 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0267    0.1162    -0.230      0.8185
##               PC1A_z    -0.1605    0.0831    -1.931      0.0541
##               sireBW    -0.1006    0.4037    -0.249      0.8033
##               sireBH    +0.3733    0.2190    +1.704      0.0890
##               sireBO    +0.1721    0.2403    +0.716      0.4741
##          gen_2ndTRUE    +0.6592    0.3450    +1.911      0.0567
##          gen_3rdTRUE    +0.0166    0.2649    +0.063      0.9501
##                  sex    +0.0964    0.1254    +0.769      0.4424
##                age_c    +0.0686    0.0544    +1.262      0.2077
## N = 467

# APA: with PACE
run_model(g ~ PC1A_z + PACE_z + sire + gen_2nd + gen_3rd + sex + age_c, d_apa,
          "APA M1 + DunedinPACE")

## 
##  ================================================================================ 
## APA M1 + DunedinPACE 
## R2 = 0.0375  Adj-R2 = 0.0186 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0301    0.1108    -0.272      0.7856
##               PC1A_z    -0.1558    0.0839    -1.857      0.0640
##               PACE_z    -0.0242    0.0844    -0.287      0.7745
##               sireBW    -0.0893    0.4013    -0.223      0.8240
##               sireBH    +0.3748    0.2172    +1.726      0.0851
##               sireBO    +0.1833    0.2331    +0.786      0.4321
##          gen_2ndTRUE    +0.6749    0.3480    +1.939      0.0531
##          gen_3rdTRUE    +0.0130    0.2735    +0.047      0.9622
##                  sex    +0.0992    0.1219    +0.814      0.4159
##                age_c    +0.0689    0.0548    +1.259      0.2086
## N = 467

# HAA: without PACE (restricted to epi sample)
run_model(g ~ PC1A_z + sex + age_c,
          d_haa[!is.na(d_haa$K5MK_POAM45), ],
          "HAA M1 (no PACE, epi subsample)")

## 
##  ================================================================================ 
## HAA M1 (no PACE, epi subsample) 
## R2 = 0.0162  Adj-R2 = 0.0087 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0246    0.1189    -0.207      0.8362
##               PC1A_z    -0.1087    0.0576    -1.888      0.0597
##                  sex    +0.1451    0.1347    +1.077      0.2821
##                age_c    +0.0830    0.0588    +1.411      0.1591
## N = 398

# HAA: with PACE
run_model(g ~ PC1A_z + PACE_z + sex + age_c, d_haa,
          "HAA M1 + DunedinPACE")

## 
##  ================================================================================ 
## HAA M1 + DunedinPACE 
## R2 = 0.0177  Adj-R2 = 0.0077 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0312    0.1118    -0.279      0.7806
##               PC1A_z    -0.1048    0.0581    -1.803      0.0722
##               PACE_z    -0.0442    0.0943    -0.468      0.6397
##                  sex    +0.1538    0.1274    +1.208      0.2279
##                age_c    +0.0833    0.0591    +1.410      0.1594
## N = 398

# ==============================================================
# Mediation: GrimAge
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("Mediation: Does GrimAge attenuate ancestry-cognition?\n")

## Mediation: Does GrimAge attenuate ancestry-cognition?

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

d_apa$GRIM_z <- as.vector(scale(d_apa$K5MK_GRIM))
d_haa$GRIM_z <- as.vector(scale(d_haa$K5MK_GRIM))

run_model(g ~ PC1A_z + GRIM_z + sire + gen_2nd + gen_3rd + sex + age_c, d_apa,
          "APA M1 + GrimAge")

## 
##  ================================================================================ 
## APA M1 + GrimAge 
## R2 = 0.0371  Adj-R2 = 0.0181 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0268    0.1177    -0.227      0.8202
##               PC1A_z    -0.1607    0.0834    -1.928      0.0545
##               GRIM_z    +0.0020    0.0674    +0.029      0.9769
##               sireBW    -0.1017    0.4031    -0.252      0.8010
##               sireBH    +0.3733    0.2196    +1.700      0.0898
##               sireBO    +0.1713    0.2382    +0.719      0.4724
##          gen_2ndTRUE    +0.6590    0.3450    +1.910      0.0567
##          gen_3rdTRUE    +0.0161    0.2654    +0.061      0.9516
##                  sex    +0.0968    0.1309    +0.740      0.4597
##                age_c    +0.0685    0.0548    +1.250      0.2121
## N = 467

run_model(g ~ PC1A_z + GRIM_z + sex + age_c, d_haa,
          "HAA M1 + GrimAge")

## 
##  ================================================================================ 
## HAA M1 + GrimAge 
## R2 = 0.0162  Adj-R2 = 0.0062 
## ================================================================================ 
##             Variable          B        SE         t           p
## ------------------------------------------------------------ 
##          (Intercept)    -0.0247    0.1213    -0.204      0.8385
##               PC1A_z    -0.1088    0.0577    -1.885      0.0601
##               GRIM_z    +0.0019    0.0738    +0.025      0.9798
##                  sex    +0.1454    0.1395    +1.042      0.2981
##                age_c    +0.0828    0.0593    +1.397      0.1631
## N = 398

# ==============================================================
# Mediation: All clocks (loop)
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("Mediation Summary: All Clocks (HAA)\n")

## Mediation Summary: All Clocks (HAA)

cat("PC1A_z coefficient with and without each clock\n")

## PC1A_z coefficient with and without each clock

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

cat(sprintf("\n%-15s %5s %9s %9s %9s\n",
            "Clock", "N", "PC1A(no)", "PC1A(+clk)", "Change"))

## 
## Clock               N  PC1A(no) PC1A(+clk)    Change

cat(paste(rep("-", 50), collapse = ""), "\n")

## --------------------------------------------------

for (i in seq_along(clock_vars)) {
  v <- clock_vars[i]
  dd <- d_haa[!is.na(d_haa[[v]]) & !is.na(d_haa$g_raw) &
                !is.na(d_haa$K5PC1A) & !is.na(d_haa$age) &
                !is.na(d_haa$sex) & !is.na(d_haa$wt), ]
  if (nrow(dd) < 30) next
  
  dd$PC1A_z <- as.vector(scale(dd$K5PC1A))
  dd$g      <- as.vector(scale(dd$g_raw))
  dd$age_c  <- as.vector(scale(dd$age))
  dd$clock_z <- as.vector(scale(dd[[v]]))
  
  m0 <- lm(g ~ PC1A_z + sex + age_c, data = dd, weights = wt)
  m1 <- lm(g ~ PC1A_z + clock_z + sex + age_c, data = dd, weights = wt)
  
  b0 <- coef(m0)["PC1A_z"]
  b1 <- coef(m1)["PC1A_z"]
  change_pct <- 100 * (b1 - b0) / b0
  
  cat(sprintf("%-15s %5d %+9.4f %+9.4f %+8.1f%%\n",
              clock_labels[i], nrow(dd), b0, b1, change_pct))
}

## DunedinPACE       398   -0.1087   -0.1048     -3.6%
## DunedinPoAm38     398   -0.1087   -0.1076     -1.1%
## PhenoAge          121   -0.1026   -0.1031     +0.6%
## GrimAge           398   -0.1087   -0.1088     +0.1%
## Horvath           398   -0.1087   -0.1145     +5.3%
## SkinBlood         398   -0.1087   -0.1085     -0.2%
## PedBE             398   -0.1087   -0.1092     +0.4%
## Hannum            398   -0.1087   -0.1088     +0.0%

# ==============================================================
# Epigenetic Formal Mediation: PC1A -> DunedinPACE -> g
# ==============================================================

cat("\n", paste(rep("#", 80), collapse = ""), "\n")

## 
##  ################################################################################

cat("FORMAL MEDIATION: PC1A -> DunedinPACE -> g (HAA)\n")

## FORMAL MEDIATION: PC1A -> DunedinPACE -> g (HAA)

cat(paste(rep("#", 80), collapse = ""), "\n")

## ################################################################################

d_epi_haa <- d_haa[!is.na(d_haa$K5MK_POAM45) & !is.na(d_haa$g_raw) &
                   !is.na(d_haa$K5PC1A) & !is.na(d_haa$age) &
                   !is.na(d_haa$sex) & !is.na(d_haa$wt), ]
d_epi_haa$PC1A_z <- as.vector(scale(d_epi_haa$K5PC1A))
d_epi_haa$g <- as.vector(scale(d_epi_haa$g_raw))
d_epi_haa$PACE_z <- as.vector(scale(d_epi_haa$K5MK_POAM45))
d_epi_haa$age_c <- as.vector(scale(d_epi_haa$age))

med_M_epi <- lm(PACE_z ~ PC1A_z + sex + age_c, data = d_epi_haa, weights = wt)
out_Y_epi <- lm(g ~ PACE_z + PC1A_z + sex + age_c, data = d_epi_haa, weights = wt)

set.seed(77777)
med_epi <- mediate(med_M_epi, out_Y_epi, treat = "PC1A_z", mediator = "PACE_z",
                   boot = TRUE, sims = 1000)

## Running nonparametric bootstrap

summary(med_epi)

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                  Estimate 95% CI Lower 95% CI Upper p-value  
## ACME           -0.0039344   -0.0294527    0.0141085   0.694  
## ADE            -0.1047899   -0.2179852    0.0076368   0.070 .
## Total Effect   -0.1087243   -0.2193958    0.0061326   0.058 .
## Prop. Mediated  0.0361871   -0.2512920    0.4752066   0.696  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 398 
## 
## 
## Simulations: 1000

################

#"Epigenetic pace-of-aging measures (DunedinPACE, DunedinPoAm38, GrimAge, Horvath, and others) did not attenuate the ancestry-cognition association (maximum change: 3.6%), and showed near-zero correlations with cognitive outcomes (|r| < .05 for all clocks). 
#These results do not support epigenetic aging as a mediator of ancestry-related cognitive differences in this sample."

FFCWS update 3

MH

2026-06-06