Init

options(
  digits = 3
)

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  lavaan
)

## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## 
## The following object is masked from 'package:psych':
## 
##     cor2cov

theme_set(theme_bw())

Functions

#beta of residual
beta_residual = function(pars) {
  #residual variance
  res_var = 1 - sum(pars^2)
  
  #beta of residual
  beta = sqrt(res_var)
  
  beta
}

simulate_data = function(pars, generation) {
  # browser()
  #simulate data
  d = tibble(
    cohort = generation,
    g = rnorm(n_cohort, mean = generation * -dysgenics_speed),
    f1 = rnorm(n_cohort),
    f2 = rnorm(n_cohort),
    f3 = rnorm(n_cohort)
  )
  
  #generation observed scores
  for (i in 1:nrow(test_pars)) {
    # browser()
    d[[str_glue("test_{i}")]] = 
      pars$g[i] * d$g + 
      pars$f1[i] * d$f1 + 
      pars$f2[i] * d$f2 + 
      pars$f3[i] * d$f3 + 
      pars$flynn[i] * generation +
      rnorm(n_cohort, mean = 0, sd = beta_residual(test_pars[i, 1:4] %>% as.numeric()))
  }
  
  d
}

#the actual model
true_model = "
  g =~ test_1 + test_2 + test_3 + test_4 + test_5 + test_6 + test_7 + test_8 + test_9
  f1 =~ test_1 + test_2 + test_3
  f2 =~ test_4 + test_5 + test_6
  f3 =~ test_7 + test_8 + test_9
  "

#do cfa fit
do_cfa = function(d, model = true_model) {
  #fit model
  fit = cfa(model, data = d %>% select(starts_with("test")), orthogonal = TRUE)
  
  fit
}

#do mgcfa
do_mgcfa = function(d, model = true_model, group = "cohort") {
  # browser()
  #move group var
  d$group = d[[group]]
  
  #data subset
  d_sub = d %>% select(starts_with("test"), group)
  
  #configural
  config_fit = cfa(
    model = model,
    data = d_sub,
    orthogonal = TRUE,
    group = "group"
  )
  
  #metric
  metric_fit = cfa(
    model = model,
    data = d_sub,
    orthogonal = TRUE,
    group = "group",
    group.equal = c("loadings")
  )
  
  #scalar
  scalar_fit = cfa(
    model = model,
    data = d_sub,
    orthogonal = TRUE,
    group = "group",
    group.equal = c("loadings", "intercepts")
  )
  
  #strict
  strict_fit = cfa(
    model = model,
    data = d_sub,
    orthogonal = TRUE,
    group = "group",
    group.equal = c("loadings", "intercepts", "residuals")
  )
  
  #collect results
  list(
    config_fit = config_fit,
    metric_fit = metric_fit,
    scalar_fit = scalar_fit,
    strict_fit = strict_fit,
    model_fit = rbind(
      config_fit %>% fitmeasures() %>% as.list() %>% as_tibble() %>% mutate(model = "configural"),
      metric_fit %>% fitmeasures() %>% as.list() %>% as_tibble() %>% mutate(model = "metric"),
      scalar_fit %>% fitmeasures() %>% as.list() %>% as_tibble() %>% mutate(model = "scalar"),
      strict_fit %>% fitmeasures() %>% as.list() %>% as_tibble() %>% mutate(model = "strict")
    ) %>% select(model, everything())
  )
}

Simulations

#parameters
test_pars = tibble(
  g = c(0.2, 0.5, 0.3, 
        0.5, 0.7, 0.4, 
        0.5, 0.3, 0.7),
  f1 = c(0.5, 0.4, 0.4,
         0, 0, 0,
         0, 0, 0),
  f2 = c(0, 0, 0,
         0.4, 0.5, 0.6,
         0, 0, 0),
  f3 = c(0, 0, 0,
         0, 0, 0,
         0.4, 0.4, 0.5),
  flynn = c(0.3, 0.4, 0.2,
            0.3, 0.2, 0.3,
            0, 0.1, 0.2)
)

cor(test_pars)

##             g     f1     f2     f3   flynn
## g      1.0000 -0.566  0.301  0.256 -0.0664
## f1    -0.5660  1.000 -0.486 -0.491  0.4811
## f2     0.3006 -0.486  1.000 -0.486  0.2720
## f3     0.2556 -0.491 -0.486  1.000 -0.7084
## flynn -0.0664  0.481  0.272 -0.708  1.0000

#simulate generations
#1 IQ per 10 years, 3 IQ per generation, 3/15 = d = 0.2
dysgenics_speed = 0.2

#sample size per cohort
n_cohort = 1000

#number of generations
#generation is 30 years, 1870 to 2020 is 5 generations
generations = 5

#simulate data
set.seed(1)
d = map_dfr(0:(generations-1), ~simulate_data(test_pars, .x))

#verify factor structure
d %>% filter(cohort == 0) %>% select(starts_with("test_")) %>% cor()

##        test_1 test_2 test_3 test_4 test_5 test_6 test_7 test_8 test_9
## test_1 1.0000  0.293 0.2796 0.0495 0.0843 0.0601 0.0914 0.0646  0.156
## test_2 0.2930  1.000 0.3140 0.2522 0.3612 0.2517 0.2468 0.1295  0.324
## test_3 0.2796  0.314 1.0000 0.1301 0.1950 0.1356 0.1893 0.0745  0.217
## test_4 0.0495  0.252 0.1301 1.0000 0.5892 0.4901 0.2878 0.1952  0.410
## test_5 0.0843  0.361 0.1950 0.5892 1.0000 0.5980 0.4114 0.2203  0.530
## test_6 0.0601  0.252 0.1356 0.4901 0.5980 1.0000 0.2408 0.1600  0.345
## test_7 0.0914  0.247 0.1893 0.2878 0.4114 0.2408 1.0000 0.3308  0.574
## test_8 0.0646  0.130 0.0745 0.1952 0.2203 0.1600 0.3308 1.0000  0.421
## test_9 0.1562  0.324 0.2174 0.4100 0.5296 0.3452 0.5745 0.4214  1.000

d %>% filter(cohort == 0) %>% select(starts_with("test_")) %>% describe2()

gen_0_fit = d %>% filter(cohort == 0) %>% do_cfa()
gen_0_fit %>% parameterEstimates(standardized = T)

#do estimates match expectations?
test_pars$g_obs = gen_0_fit %>% parameterEstimates(standardized = T) %>% filter(lhs == "g", op == "=~") %>% pull(std.all)
test_pars$f1_obs = 0
test_pars$f1_obs[1:3] = gen_0_fit %>% parameterEstimates(standardized = T) %>% filter(lhs == "f1", op == "=~") %>% pull(std.all)
test_pars$f2_obs = 0
test_pars$f2_obs[4:6] = gen_0_fit %>% parameterEstimates(standardized = T) %>% filter(lhs == "f2", op == "=~") %>% pull(std.all)
test_pars$f3_obs = 0
test_pars$f3_obs[7:9] = gen_0_fit %>% parameterEstimates(standardized = T) %>% filter(lhs == "f3", op == "=~") %>% pull(std.all)

#close enough
test_pars

#trajectory of test scores over time
d %>% 
  group_by(cohort) %>% 
  summarise(across(starts_with("test_"), mean)) %>% 
  pivot_longer(cols = -cohort, names_to = "test", values_to = "mean") %>% 
  ggplot(aes(x = cohort, y = mean, color = test)) +
  geom_line()

GG_save("figs/test_scores_over_time.png")

#overall IQ changes over time
#use 1 factor model
fa_0 = fa(
  d %>% filter(cohort == 0) %>% select(starts_with("test_"))
)

fa_0

## Factor Analysis using method =  minres
## Call: fa(r = d %>% filter(cohort == 0) %>% select(starts_with("test_")))
## Standardized loadings (pattern matrix) based upon correlation matrix
##         MR1    h2   u2 com
## test_1 0.20 0.041 0.96   1
## test_2 0.47 0.218 0.78   1
## test_3 0.31 0.095 0.90   1
## test_4 0.62 0.389 0.61   1
## test_5 0.79 0.631 0.37   1
## test_6 0.59 0.345 0.66   1
## test_7 0.58 0.333 0.67   1
## test_8 0.38 0.145 0.85   1
## test_9 0.74 0.544 0.46   1
## 
##                 MR1
## SS loadings    2.74
## Proportion Var 0.30
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  36  with the objective function =  2.34 with Chi Square =  2332
## df of  the model are 27  and the objective function was  0.51 
## 
## The root mean square of the residuals (RMSR) is  0.09 
## The df corrected root mean square of the residuals is  0.1 
## 
## The harmonic n.obs is  1000 with the empirical chi square  572  with prob <  1.8e-103 
## The total n.obs was  1000  with Likelihood Chi Square =  510  with prob <  1.2e-90 
## 
## Tucker Lewis Index of factoring reliability =  0.719
## RMSEA index =  0.134  and the 90 % confidence intervals are  0.124 0.144
## BIC =  324
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.92
## Multiple R square of scores with factors          0.84
## Minimum correlation of possible factor scores     0.68

#save scores as IQ
d$IQ = psych::factor.scores(
  d %>% select(starts_with("test")) %>% as.matrix(), 
  fa_0) %>% 
  extract2("scores") %>% 
  .[, 1]

#rescale
d$IQ = d$IQ %>% standardize(focal_group = (d$cohort == 0))
d$IQ = d$IQ * 15 + 100

#ensure it is right
describe2(
  d %>% select(IQ, g),
  d$cohort
)

describe2(
  d %>% select(starts_with("test_")),
  d$cohort
) %>% filter(group == 4)

GG_denhist(d, "IQ", "cohort")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#IQ increase
d %>% 
  group_by(cohort) %>% 
  summarise(mean_IQ = mean(IQ)) %>% 
  ggplot(aes(x = cohort, y = mean_IQ)) +
  geom_line()

GG_save("figs/IQ_over_time.png")

#g decrease
d %>% 
  group_by(cohort) %>% 
  summarise(mean_g = mean(g)) %>% 
  ggplot(aes(x = cohort, y = mean_g)) +
  geom_line() +
  theme_minimal()

#Jensen method
test_pars$flynn_effect = d %>% 
  filter(cohort == max(d$cohort)) %>%
  summarise(across(starts_with("test_"), mean)) %>% 
  unlist()

test_pars$test = str_glue("test_{1:nrow(test_pars)}")

#empirical loadings
test_pars %>% 
  GG_scatter("g_obs", "flynn_effect", case_names = "test", check_overlap = F) +
  labs(
    x = "Observed g loading",
    y = "Flynn effect"
  )

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/Jensen_method_empirical_loadings.png")

## `geom_smooth()` using formula = 'y ~ x'

#model loadings
test_pars %>% 
  GG_scatter("g", "flynn", case_names = "test", check_overlap = F) +
  labs(
    x = "True g loading",
    y = "Flynn effect"
  )

## `geom_smooth()` using formula = 'y ~ x'

#flynn effect speed
test_pars %>% 
  GG_scatter("flynn", "flynn_effect", case_names = "test") +
  labs(
    x = "Model flynn effect (per generation)",
    y = "Observed Flynn effect (first vs. last generation)"
  )

## `geom_smooth()` using formula = 'y ~ x'

#mgcfa
mgcfa_0vs4 = do_mgcfa(
  d %>% filter(cohort %in% c(0, max(d$cohort))), 
  group = "cohort"
  )

mgcfa_0vs4$model_fit %>% select(model, pvalue, cfi, tli, rmsea)

anova(
  mgcfa_0vs4$config_fit, 
  mgcfa_0vs4$metric_fit, 
  mgcfa_0vs4$scalar_fit, 
  mgcfa_0vs4$strict_fit
)

Meta

write_sessioninfo()

## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_DK.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_DK.UTF-8        LC_COLLATE=en_DK.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_DK.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Copenhagen
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lavaan_0.6-19         kirkegaard_2024-09-27 psych_2.4.6.26       
##  [4] assertthat_0.2.1      weights_1.0.4         Hmisc_5.1-3          
##  [7] magrittr_2.0.3        lubridate_1.9.3       forcats_1.0.0        
## [10] stringr_1.5.1         dplyr_1.1.4           purrr_1.0.2          
## [13] readr_2.1.5           tidyr_1.3.1           tibble_3.2.1         
## [16] ggplot2_3.5.1         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1  farver_2.1.2      fastmap_1.2.0     digest_0.6.37    
##  [5] rpart_4.1.23      timechange_0.3.0  lifecycle_1.0.4   cluster_2.1.6    
##  [9] survival_3.7-0    gdata_3.0.1       compiler_4.4.1    rlang_1.1.4      
## [13] sass_0.4.9        tools_4.4.1       utf8_1.2.4        yaml_2.3.10      
## [17] data.table_1.16.2 knitr_1.48        labeling_0.4.3    htmlwidgets_1.6.4
## [21] mnormt_2.1.1      plyr_1.8.9        withr_3.0.1       foreign_0.8-86   
## [25] stats4_4.4.1      nnet_7.3-19       grid_4.4.1        fansi_1.0.6      
## [29] jomo_2.7-6        colorspace_2.1-1  mice_3.16.0       scales_1.3.0     
## [33] gtools_3.9.5      iterators_1.0.14  MASS_7.3-61       cli_3.6.3        
## [37] rmarkdown_2.28    ragg_1.3.3        generics_0.1.3    rstudioapi_0.17.1
## [41] tzdb_0.4.0        minqa_1.2.8       cachem_1.1.0      splines_4.4.1    
## [45] parallel_4.4.1    base64enc_0.1-3   vctrs_0.6.5       boot_1.3-30      
## [49] glmnet_4.1-8      Matrix_1.6-5      jsonlite_1.8.9    hms_1.1.3        
## [53] mitml_0.4-5       Formula_1.2-5     htmlTable_2.4.3   systemfonts_1.1.0
## [57] foreach_1.5.2     jquerylib_0.1.4   glue_1.8.0        nloptr_2.1.1     
## [61] pan_1.9           codetools_0.2-19  stringi_1.8.4     gtable_0.3.6     
## [65] shape_1.4.6.1     quadprog_1.5-8    lme4_1.1-35.5     munsell_0.5.1    
## [69] pillar_1.9.0      htmltools_0.5.8.1 R6_2.5.1          textshaping_0.4.0
## [73] pbivnorm_0.6.0    evaluate_1.0.1    lattice_0.22-5    highr_0.11       
## [77] backports_1.5.0   broom_1.0.7       bslib_0.8.0       Rcpp_1.0.13      
## [81] gridExtra_2.3     nlme_3.1-165      checkmate_2.3.2   mgcv_1.9-1       
## [85] xfun_0.48         pkgconfig_2.0.3

Flynn effect, co-occurrence model

Init

Functions

Simulations

Meta