Latent gaps in SEM

Purpose:

Simulate IQ-like data with 10 tests from a list of g-loadings
Simulate 2 groups that differ by 1 d on g
Recover the latent gap size using SEM to account for random error in the tests

#simulate IQ data with measurement and a group difference, then recover the true size
library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## Loading required package: lattice
## 
## Loading required package: survival
## 
## Loading required package: Formula
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric
## 
## 
## The following object is masked from 'package:base':
## 
##     +

library(lavaan)

## This is lavaan 0.6-12
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## 
## The following object is masked from 'package:psych':
## 
##     cor2cov

g_means = c(0, 1)
n_per_group = 1000

#error loading
error_loading = function(x) sqrt(1 - x^2)

#repeat simulation N times
set.seed(1)
sim_results = map_df(1:100, function(i) {
  #g loadings
  g_loadings = runif(10, min = .50, max = .90)
  
  #simulate data
  d = map_df(g_means, function(g_mean) {
    #g
    dd = tibble(
      group = g_mean,
      g = rnorm(n_per_group, mean = g_mean, sd = 1)
    )
    
    #make data for 10 tests
    for (v in 1:10) {
      dd[[str_glue("test_{v}")]] = dd$g * g_loadings[v] + rnorm(n_per_group, mean = 0, sd = error_loading(g_loadings[v]))
    }
    
    dd
  })
  
  #factor analysis
  (fa_g = fa(d[-c(1:2)]))
  g_efa_scores = fa_g$scores %>% as_tibble() %>% mutate(group = d$group)
  
  #gap
  cohen.d(g_efa_scores$MR1, group = g_efa_scores$group) ->
    g_efa_gap
  
  #recover factor loadings in EFA?
  cor(fa_g$loadings[, 1], g_loadings)
  
  #SEM
  model1 = str_glue("
g_sem =~ {str_c('test_' + 1:10, collapse = ' + ')}
")
  
  model1_fit = sem(model1, data = d, std.lv = T)
  
  #recover loadings in SEM?
  cor(
    parameterEstimates(model1_fit, standardized = T) %>% 
      filter(rhs %in% ("test_" + 1:10), lhs == "g_sem") %>% 
      pull(std.all),
    g_loadings
  )
  
  #g gap model
  model2 = str_glue(
    "
g_sem =~ {str_c('test_' + 1:10, collapse = ' + ')}
g_sem ~ group
")
  
  model2_fit = sem(model2, data = d, std.lv = T)
  parameterEstimates(model2_fit, standardized = T) %>% 
    filter(lhs == "g_sem", rhs == "group") %>% 
    pull(est) ->
    g_sem_gap
  
  tibble(
    g_sem_gap = g_sem_gap,
    g_efa_gap = g_efa_gap$cohen.d[2]
  )
})

#results
#gap is 1, so SEM should be close to 1, and EFA should be a little lower
sim_results %>% describe2()

#raw results
sim_results

Latent gaps in SEM

09 January, 2023