Functions
#beta of residual
beta_residual = function(pars) {
#residual variance
res_var = 1 - sum(pars^2)
#beta of residual
beta = sqrt(res_var)
beta
}
simulate_data = function(pars, generation) {
# browser()
#simulate data
d = tibble(
cohort = generation,
g = rnorm(n_cohort, mean = generation * -dysgenics_speed),
f1 = rnorm(n_cohort),
f2 = rnorm(n_cohort),
f3 = rnorm(n_cohort)
)
#generation observed scores
for (i in 1:nrow(test_pars)) {
# browser()
d[[str_glue("test_{i}")]] =
pars$g[i] * d$g +
pars$f1[i] * d$f1 +
pars$f2[i] * d$f2 +
pars$f3[i] * d$f3 +
pars$flynn[i] * generation +
rnorm(n_cohort, mean = 0, sd = beta_residual(test_pars[i, 1:4] %>% as.numeric()))
}
d
}
#the actual model
true_model = "
g =~ test_1 + test_2 + test_3 + test_4 + test_5 + test_6 + test_7 + test_8 + test_9
f1 =~ test_1 + test_2 + test_3
f2 =~ test_4 + test_5 + test_6
f3 =~ test_7 + test_8 + test_9
"
#do cfa fit
do_cfa = function(d, model = true_model) {
#fit model
fit = cfa(model, data = d %>% select(starts_with("test")), orthogonal = TRUE)
fit
}
#do mgcfa
do_mgcfa = function(d, model = true_model, group = "cohort") {
# browser()
#move group var
d$group = d[[group]]
#data subset
d_sub = d %>% select(starts_with("test"), group)
#configural
config_fit = cfa(
model = model,
data = d_sub,
orthogonal = TRUE,
group = "group"
)
#metric
metric_fit = cfa(
model = model,
data = d_sub,
orthogonal = TRUE,
group = "group",
group.equal = c("loadings")
)
#scalar
scalar_fit = cfa(
model = model,
data = d_sub,
orthogonal = TRUE,
group = "group",
group.equal = c("loadings", "intercepts")
)
#strict
strict_fit = cfa(
model = model,
data = d_sub,
orthogonal = TRUE,
group = "group",
group.equal = c("loadings", "intercepts", "residuals")
)
#collect results
list(
config_fit = config_fit,
metric_fit = metric_fit,
scalar_fit = scalar_fit,
strict_fit = strict_fit,
model_fit = rbind(
config_fit %>% fitmeasures() %>% as.list() %>% as_tibble() %>% mutate(model = "configural"),
metric_fit %>% fitmeasures() %>% as.list() %>% as_tibble() %>% mutate(model = "metric"),
scalar_fit %>% fitmeasures() %>% as.list() %>% as_tibble() %>% mutate(model = "scalar"),
strict_fit %>% fitmeasures() %>% as.list() %>% as_tibble() %>% mutate(model = "strict")
) %>% select(model, everything())
)
}
Simulations
#parameters
test_pars = tibble(
g = c(0.2, 0.5, 0.3,
0.5, 0.7, 0.4,
0.5, 0.3, 0.7),
f1 = c(0.5, 0.4, 0.4,
0, 0, 0,
0, 0, 0),
f2 = c(0, 0, 0,
0.4, 0.5, 0.6,
0, 0, 0),
f3 = c(0, 0, 0,
0, 0, 0,
0.4, 0.4, 0.5),
flynn = c(0.3, 0.4, 0.2,
0.3, 0.2, 0.3,
0, 0.1, 0.2)
)
cor(test_pars)
## g f1 f2 f3 flynn
## g 1.0000 -0.566 0.301 0.256 -0.0664
## f1 -0.5660 1.000 -0.486 -0.491 0.4811
## f2 0.3006 -0.486 1.000 -0.486 0.2720
## f3 0.2556 -0.491 -0.486 1.000 -0.7084
## flynn -0.0664 0.481 0.272 -0.708 1.0000
#simulate generations
#1 IQ per 10 years, 3 IQ per generation, 3/15 = d = 0.2
dysgenics_speed = 0.2
#sample size per cohort
n_cohort = 1000
#number of generations
#generation is 30 years, 1870 to 2020 is 5 generations
generations = 5
#simulate data
set.seed(1)
d = map_dfr(0:(generations-1), ~simulate_data(test_pars, .x))
#verify factor structure
d %>% filter(cohort == 0) %>% select(starts_with("test_")) %>% cor()
## test_1 test_2 test_3 test_4 test_5 test_6 test_7 test_8 test_9
## test_1 1.0000 0.293 0.2796 0.0495 0.0843 0.0601 0.0914 0.0646 0.156
## test_2 0.2930 1.000 0.3140 0.2522 0.3612 0.2517 0.2468 0.1295 0.324
## test_3 0.2796 0.314 1.0000 0.1301 0.1950 0.1356 0.1893 0.0745 0.217
## test_4 0.0495 0.252 0.1301 1.0000 0.5892 0.4901 0.2878 0.1952 0.410
## test_5 0.0843 0.361 0.1950 0.5892 1.0000 0.5980 0.4114 0.2203 0.530
## test_6 0.0601 0.252 0.1356 0.4901 0.5980 1.0000 0.2408 0.1600 0.345
## test_7 0.0914 0.247 0.1893 0.2878 0.4114 0.2408 1.0000 0.3308 0.574
## test_8 0.0646 0.130 0.0745 0.1952 0.2203 0.1600 0.3308 1.0000 0.421
## test_9 0.1562 0.324 0.2174 0.4100 0.5296 0.3452 0.5745 0.4214 1.000
d %>% filter(cohort == 0) %>% select(starts_with("test_")) %>% describe2()
gen_0_fit = d %>% filter(cohort == 0) %>% do_cfa()
gen_0_fit %>% parameterEstimates(standardized = T)
#do estimates match expectations?
test_pars$g_obs = gen_0_fit %>% parameterEstimates(standardized = T) %>% filter(lhs == "g", op == "=~") %>% pull(std.all)
test_pars$f1_obs = 0
test_pars$f1_obs[1:3] = gen_0_fit %>% parameterEstimates(standardized = T) %>% filter(lhs == "f1", op == "=~") %>% pull(std.all)
test_pars$f2_obs = 0
test_pars$f2_obs[4:6] = gen_0_fit %>% parameterEstimates(standardized = T) %>% filter(lhs == "f2", op == "=~") %>% pull(std.all)
test_pars$f3_obs = 0
test_pars$f3_obs[7:9] = gen_0_fit %>% parameterEstimates(standardized = T) %>% filter(lhs == "f3", op == "=~") %>% pull(std.all)
#close enough
test_pars
#trajectory of test scores over time
d %>%
group_by(cohort) %>%
summarise(across(starts_with("test_"), mean)) %>%
pivot_longer(cols = -cohort, names_to = "test", values_to = "mean") %>%
ggplot(aes(x = cohort, y = mean, color = test)) +
geom_line()

GG_save("figs/test_scores_over_time.png")
#overall IQ changes over time
#use 1 factor model
fa_0 = fa(
d %>% filter(cohort == 0) %>% select(starts_with("test_"))
)
fa_0
## Factor Analysis using method = minres
## Call: fa(r = d %>% filter(cohort == 0) %>% select(starts_with("test_")))
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## test_1 0.20 0.041 0.96 1
## test_2 0.47 0.218 0.78 1
## test_3 0.31 0.095 0.90 1
## test_4 0.62 0.389 0.61 1
## test_5 0.79 0.631 0.37 1
## test_6 0.59 0.345 0.66 1
## test_7 0.58 0.333 0.67 1
## test_8 0.38 0.145 0.85 1
## test_9 0.74 0.544 0.46 1
##
## MR1
## SS loadings 2.74
## Proportion Var 0.30
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 36 with the objective function = 2.34 with Chi Square = 2332
## df of the model are 27 and the objective function was 0.51
##
## The root mean square of the residuals (RMSR) is 0.09
## The df corrected root mean square of the residuals is 0.1
##
## The harmonic n.obs is 1000 with the empirical chi square 572 with prob < 1.8e-103
## The total n.obs was 1000 with Likelihood Chi Square = 510 with prob < 1.2e-90
##
## Tucker Lewis Index of factoring reliability = 0.719
## RMSEA index = 0.134 and the 90 % confidence intervals are 0.124 0.144
## BIC = 324
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.92
## Multiple R square of scores with factors 0.84
## Minimum correlation of possible factor scores 0.68
#save scores as IQ
d$IQ = psych::factor.scores(
d %>% select(starts_with("test")) %>% as.matrix(),
fa_0) %>%
extract2("scores") %>%
.[, 1]
#rescale
d$IQ = d$IQ %>% standardize(focal_group = (d$cohort == 0))
d$IQ = d$IQ * 15 + 100
#ensure it is right
describe2(
d %>% select(IQ, g),
d$cohort
)
describe2(
d %>% select(starts_with("test_")),
d$cohort
) %>% filter(group == 4)
GG_denhist(d, "IQ", "cohort")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#IQ increase
d %>%
group_by(cohort) %>%
summarise(mean_IQ = mean(IQ)) %>%
ggplot(aes(x = cohort, y = mean_IQ)) +
geom_line()

GG_save("figs/IQ_over_time.png")
#g decrease
d %>%
group_by(cohort) %>%
summarise(mean_g = mean(g)) %>%
ggplot(aes(x = cohort, y = mean_g)) +
geom_line() +
theme_minimal()

#Jensen method
test_pars$flynn_effect = d %>%
filter(cohort == max(d$cohort)) %>%
summarise(across(starts_with("test_"), mean)) %>%
unlist()
test_pars$test = str_glue("test_{1:nrow(test_pars)}")
#empirical loadings
test_pars %>%
GG_scatter("g_obs", "flynn_effect", case_names = "test", check_overlap = F) +
labs(
x = "Observed g loading",
y = "Flynn effect"
)
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/Jensen_method_empirical_loadings.png")
## `geom_smooth()` using formula = 'y ~ x'
#model loadings
test_pars %>%
GG_scatter("g", "flynn", case_names = "test", check_overlap = F) +
labs(
x = "True g loading",
y = "Flynn effect"
)
## `geom_smooth()` using formula = 'y ~ x'

#flynn effect speed
test_pars %>%
GG_scatter("flynn", "flynn_effect", case_names = "test") +
labs(
x = "Model flynn effect (per generation)",
y = "Observed Flynn effect (first vs. last generation)"
)
## `geom_smooth()` using formula = 'y ~ x'

#mgcfa
mgcfa_0vs4 = do_mgcfa(
d %>% filter(cohort %in% c(0, max(d$cohort))),
group = "cohort"
)
mgcfa_0vs4$model_fit %>% select(model, pvalue, cfi, tli, rmsea)
anova(
mgcfa_0vs4$config_fit,
mgcfa_0vs4$metric_fit,
mgcfa_0vs4$scalar_fit,
mgcfa_0vs4$strict_fit
)