LEVANTE EF Tasks - Interim Report

Author

Mike Frank

1 Data loading

Code
library(tidyverse)
library(glue)
library(here)
here() starts at /Users/mcfrank/Projects/levante-pilots
Code
library(mirt)
Loading required package: stats4
Loading required package: lattice
Code
library(ggrepel)
# require(arm)
library(lavaan)
This is lavaan 0.6-19
lavaan is FREE software! Please report any bugs.
Code
library(tidySEM)
Loading required package: OpenMx
OpenMx may run faster if it is compiled to take advantage of multiple cores.
Registered S3 method overwritten by 'tidySEM':
  method          from  
  predict.MxModel OpenMx
Code
source(here("02_score_data","irt_helpers.R"))
source(here("03_summaries", "plotting_helper.R"))

sites <- c("ca_pilot", "co_pilot", "de_pilot")

task_data_nested <- sites |>
  set_names() |>
  map(\(s) read_rds(here(glue("01_processed_data/{s}/task_data_nested.rds")))) |>
  list_rbind(names_to = "site")

task_data_combined <- task_data_nested |>
  select(-task_id) |>
  unnest(data) 

ef <- filter(task_data_combined, 
             task_id %in% c("hearts-and-flowers","same-different-selection","memory-game"))

Get ages.

Code
participants <- sites |>
  set_names() |>
  map(\(s) read_rds(here(glue("00_prepped_data/{s}/participants.rds")))) |>
  list_rbind(names_to = "site")

run_ages <- participants |>
  select(user_id, ages) |>
  unnest(ages)

# this is useful below for various simplifications
ages <- run_ages |>
  group_by(user_id) |>
  summarise(age = mean(age))

ef <- left_join(ef, run_ages)
Joining with `by = join_by(user_id, run_id)`

1.1 Load multigroup models

Code
best_multigroup <- readRDS(here("02_scored_data", "irt_outputs", "multigroup_best_outputs.rds")) 
multigroup_scores <- readRDS(here("02_scored_data", "scores", "scores_multigroup.rds"))

1.2 Overall sumscores

First plot sumscores.

Code
ef_runs <- ef |>
  group_by(site, user_id, run_id, task_id) |>
  summarise(correct = mean(correct), 
            age = mean(age))
`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.
Code
ggplot(ef_runs, aes(x = age, y = correct)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "loess", lambda = 1) +
  facet_grid(site ~ task_id) + 
  ylim(0,1) 
Warning in geom_smooth(method = "loess", lambda = 1): Ignoring unknown
parameters: `lambda`
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 214 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 214 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_smooth()`).

Now go to task analysis.

2 Hearts and Flowers

Zoom in on HF.

Code
hf <- ef |>
  filter(task_id == "hearts-and-flowers") 

Look at all phases.

2.1 Sumscore

Code
hf_blocks <- hf |>
  mutate(corpus_trial_type = fct_relevel(corpus_trial_type, "hearts", "flowers", "mixed")) |>
  group_by(site, user_id, run_id, task_id, corpus_trial_type) |>
  summarise(correct = mean(correct), 
            age = mean(age), 
            n = n())
`summarise()` has grouped output by 'site', 'user_id', 'run_id', 'task_id'. You
can override using the `.groups` argument.
Code
ggplot(hf_blocks,
       aes(x = age, y = correct)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "glm", method.args = list(family = "binomial")) +
  geom_hline(yintercept = .5, lty = 2) + 
  ylim(0,1) + 
  facet_grid(site ~ corpus_trial_type)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 226 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning: Removed 226 rows containing missing values or values outside the scale range
(`geom_point()`).

F notes that there are a lot of younger kids BELOW chance. Let’s see if they are missing data.

Code
ggplot(hf_blocks, aes(x = n, y = correct)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_grid(site ~ corpus_trial_type)
`geom_smooth()` using formula = 'y ~ x'

NO - this is not because of missing data. Something else is happening.

Code
hf_runs <- hf |>
  group_by(site, user_id, run_id, task_id) |>
  summarise(logit = coef(glm(correct ~ 1, family = "binomial"))[1],
            bayeslogit = coef(arm::bayesglm(correct ~ 1, family = "binomial",
                                            prior.scale.for.intercept = .1))[1],
            correct_rt = mean(as.numeric(rt[correct]), na.rm=TRUE),
            correct = mean(correct),
            age = mean(age))
`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.

Histograms.

Code
ggplot(filter(ef_runs, task_id == "hearts-and-flowers") |>
         mutate(age_group = cut(age, c(5, 7, 9, 11, 13, include.lowest=TRUE))) |>
         filter(!is.na(age_group)),
       aes(x = correct)) + 
  geom_histogram(binwidth = .25) +
  geom_vline(xintercept = .5, lty = 2) +
  scale_x_continuous(breaks = c(0,.25, .5, .75, 1)) + 
  facet_grid(site ~ age_group)

Reaction time.

Code
ggplot(hf_runs,
       aes(x = age, y = correct_rt)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "lm") +
  geom_hline(yintercept = 0, lty = 2) + 
  # ylim(0,1) + 
  facet_grid(site ~ task_id)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 78 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 78 rows containing missing values or values outside the scale range
(`geom_point()`).

Code
ggplot(hf, aes(x = as.numeric(rt))) + 
  geom_histogram() + 
  scale_x_log10(breaks = c(100,200, 500,1000,2000,3000,5000,10000)) + coord_flip()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 15 rows containing non-finite outside the scale range
(`stat_bin()`).

2.2 IRT

We now use the multigroup IRT models.

Code
multigroup_scores_hf <- multigroup_scores |>
  filter(task_id == "hearts-and-flowers") |>
  select(site, task_id, user_id, run_id, metric_type, metric_value ) |>
  left_join(run_ages)
Joining with `by = join_by(user_id, run_id)`
Code
ggplot(multigroup_scores_hf, aes(x = age, y = metric_value, col = site)) + 
  geom_point() + 
  geom_smooth()
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 78 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 78 rows containing missing values or values outside the scale range
(`geom_point()`).

Look at coefficients.

Code
hf_coefs <- filter(best_multigroup, task_id == "hearts-and-flowers")$coefs[[1]] |>
  separate(item, sep = "_", into = c("task","block","trial", "number")) |>
  select(-number) |>
  distinct()

ggplot(hf_coefs, aes(x = a1, y = -d, col = block, pch = trial)) + 
  geom_point() + 
  geom_text_repel(aes(label = trial)) + 
  xlab("Slope") + 
  ylab("Difficulty") + 
  facet_wrap(~site) 

3 Memory game

Merge in item ids.

Code
id_map <- read_csv(here("02_score_data/item_metadata/pilot-item-ID mapping.csv"))
Rows: 691 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): item_uid, trials

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Code
trial_id_map <- id_map |>
  mutate(trials = trials |> str_split(",")) |>
  unnest(trials) |>
  rename(trial_id = trials) |>
  mutate(trial_id = str_trim(trial_id))
Code
mg <- ef |>
  filter(task_id == "memory-game") |>
  left_join(trial_id_map) |>
  filter(!is.na(item_uid)) |>
  separate(item_uid , into = c("task", "direction", "size", "span"), sep = "_") |>
  mutate(span = as.numeric(str_replace(span, "len",""))) |>
  group_by(run_id, direction) |>
  mutate(max_span = max(span))
Joining with `by = join_by(trial_id)`

3.1 Sumscore

Code
mg_spans <- mg |>
  group_by(site, span, size, direction) |>
  summarise(n = n(), 
            correct = mean(correct))
`summarise()` has grouped output by 'site', 'span', 'size'. You can override
using the `.groups` argument.
Code
ggplot(mg_spans, aes(x = span, y = correct, col = direction, pch = size)) + 
  geom_point(aes(size = n)) + 
  geom_line(aes(lty = size)) + 
  facet_wrap(~site)

Code
mg |>
  filter(direction == "forward", span > 1) |>
  mutate(age_rounded = as_factor(round(age))) |>
  group_by(age_rounded, span) |>
  summarise(correct = mean(correct), 
            n = n()) |>
  ggplot(aes(x = span, y = correct, col = age_rounded)) + 
  geom_point(aes(size = n)) + 
  geom_smooth(method = "lm", se =FALSE)
`summarise()` has grouped output by 'age_rounded'. You can override using the
`.groups` argument.
`geom_smooth()` using formula = 'y ~ x'

Code
mg |>
  filter(direction == "forward", span > 1) |>
  mutate(max_span = as_factor(max_span)) |>
  group_by(max_span, span) |>
  summarise(correct = mean(correct), 
            n = n()) |>
  ggplot(aes(x = span, y = correct, col = max_span)) + 
  geom_point(aes(size = n)) + 
  geom_line()
`summarise()` has grouped output by 'max_span'. You can override using the
`.groups` argument.

Code
  # geom_smooth(method = "lm", se =FALSE)
Code
mg_runs <- mg |>
  filter(correct) |>
  group_by(site, user_id, run_id, direction) |>
  summarise(max_span = max(span), 
            age = mean(age))
`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.
Code
ggplot(mg_runs, aes(x = age, y = max_span, col = direction)) + 
  geom_jitter(alpha = .5, height = .1, width = 0) + 
  geom_smooth(method = "lm")+ 
  facet_grid(direction~site, scales = "free_y")
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 39 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning in qt((1 - level)/2, df): NaNs produced
Warning: Removed 39 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
-Inf

3.2 IRT

We now use the multigroup IRT models.

Code
multigroup_scores_mg <- multigroup_scores |>
  filter(task_id == "memory-game") |>
  select(site, task_id, user_id, run_id, metric_type, metric_value ) |>
  left_join(run_ages)
Joining with `by = join_by(user_id, run_id)`
Code
ggplot(multigroup_scores_mg, aes(x = age, y = metric_value, col = site)) + 
  geom_point() + 
  geom_smooth()
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 41 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 41 rows containing missing values or values outside the scale range
(`geom_point()`).

Look at coefficients.

Code
mg_coefs <- filter(best_multigroup, task_id == "memory-game")$coefs[[1]] |>
  separate(item, sep = "_", into = c("task", "direction", "size", "span", "number")) |>
  mutate(span = as.numeric(str_replace(span, "len",""))) |>
  select(-task, -number) |>
  distinct()

ggplot(mg_coefs, aes(x = span, y = -d, col = direction, pch = size)) + 
  geom_point() + 
  geom_line() + 
  # geom_text_repel(aes(label = direction)) + 
  xlab("Slope") + 
  ylab("Difficulty") + 
  facet_wrap(~site) 

Note that we have used grid3 very little and so it’s getting dropped from most analysis.

4 SDS

Code
sds <- filter(ef, task_id == "same-different-selection") |>
  left_join(trial_id_map) |>
  filter(!is.na(item_uid)) |>
  separate(item_uid , into = c("task", "block", "trial"), sep = "_") 
Joining with `by = join_by(trial_id)`

4.1 Sumscore

Code
sds_runs <- sds |>
  group_by(site, user_id, run_id, task_id) |>
  summarise(correct = mean(correct), 
            age = mean(age))
`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.
Code
sds_runs_block <- sds |>
  filter(block != "3unique") |>
  mutate(block = fct_relevel(block, "dimensions","same")) |>
  group_by(site, user_id, run_id, task_id, block) |>
  summarise(correct = mean(correct), 
            age = mean(age))
`summarise()` has grouped output by 'site', 'user_id', 'run_id', 'task_id'. You
can override using the `.groups` argument.
Code
ggplot(sds_runs_block, aes(x = age, y = correct)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "lm", se=FALSE) + 
  facet_grid(site ~ block)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 291 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 291 rows containing missing values or values outside the scale range
(`geom_point()`).

4.2 IRT

Code
multigroup_scores_sds <- multigroup_scores |>
  filter(task_id == "same-different-selection") |>
  select(site, task_id, user_id, run_id, metric_type, metric_value ) |>
  left_join(run_ages)
Joining with `by = join_by(user_id, run_id)`
Code
ggplot(multigroup_scores_sds, aes(x = age, y = metric_value, col = site)) + 
  geom_point() + 
  geom_smooth()
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 76 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 76 rows containing missing values or values outside the scale range
(`geom_point()`).

Look at coefficients.

Code
sds_coefs <- filter(best_multigroup, task_id == "same-different-selection")$coefs[[1]] |>
  separate(item, sep = "_", into = c("task", "block","trial")) |>
  filter(block != "3unique") |>
  mutate(block = fct_relevel(block, "dimensions","same")) |>
  distinct()
Warning: Expected 3 pieces. Additional pieces discarded in 372 rows [1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
Code
ggplot(sds_coefs, aes(x = block, y = -d, col = trial)) + 
  geom_point() + 
  geom_line(aes(group = trial)) + 
  coord_flip() +
  # geom_text_repel(aes(label = direction)) + 
  xlab("block") + 
  ylab("Difficulty") 

5 EF factor analysis

5.1 Only LEVANTE tasks

Code
ef_scores <- filter(multigroup_scores, 
                    task_id %in% c("hearts-and-flowers", "memory-game", "same-different-selection"))

ef_scores_irt <- ef_scores |>
  select(site, task_id, metric_value, user_id) |>
  pivot_wider(names_from = "task_id", values_from = "metric_value", 
              id_cols = c("user_id", "site")) |>
  janitor:::clean_names() |>
  mutate(across(hearts_and_flowers:same_different_selection, ~ scale(.x)[,1]), 
         ca_site = site == "ca_pilot",
         de_site = site == "de_pilot") |>
  left_join(ages)
Joining with `by = join_by(user_id)`
Code
cfa_model <-  "
ef =~ hearts_and_flowers + memory_game + same_different_selection
ef ~ age
"

fit <- cfa(cfa_model, ef_scores_irt, std.lv=TRUE, missing='fiml')
Warning: lavaan->lav_data_full():  
   118 cases were deleted due to missing values in exogenous variable(s), 
   while fixed.x = TRUE.
Code
summary(fit, fit.measures=TRUE, standardize=TRUE)
lavaan 0.6-19 ended normally after 43 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        10

                                                  Used       Total
  Number of observations                           499         617
  Number of missing patterns                         7            

Model Test User Model:
                                                      
  Test statistic                                59.708
  Degrees of freedom                                 2
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                               582.932
  Degrees of freedom                                 6
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.900
  Tucker-Lewis Index (TLI)                       0.700
                                                      
  Robust Comparative Fit Index (CFI)             0.887
  Robust Tucker-Lewis Index (TLI)                0.660

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -1626.547
  Loglikelihood unrestricted model (H1)      -1596.693
                                                      
  Akaike (AIC)                                3273.093
  Bayesian (BIC)                              3315.219
  Sample-size adjusted Bayesian (SABIC)       3283.479

Root Mean Square Error of Approximation:

  RMSEA                                          0.240
  90 Percent confidence interval - lower         0.190
  90 Percent confidence interval - upper         0.295
  P-value H_0: RMSEA <= 0.050                    0.000
  P-value H_0: RMSEA >= 0.080                    1.000
                                                      
  Robust RMSEA                                   0.278
  90 Percent confidence interval - lower         0.223
  90 Percent confidence interval - upper         0.338
  P-value H_0: Robust RMSEA <= 0.050             0.000
  P-value H_0: Robust RMSEA >= 0.080             1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.062

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef =~                                                                 
    herts_nd_flwrs    0.415    0.035   11.972    0.000    0.722    0.727
    memory_game       0.397    0.041    9.651    0.000    0.691    0.723
    sm_dffrnt_slct    0.308    0.038    8.038    0.000    0.536    0.527

Regressions:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef ~                                                                  
    age               0.645    0.063   10.225    0.000    0.371    0.818

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs   -2.347    0.167  -14.087    0.000   -2.347   -2.363
   .memory_game      -2.240    0.153  -14.631    0.000   -2.240   -2.344
   .sm_dffrnt_slct   -1.654    0.166   -9.964    0.000   -1.654   -1.629

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs    0.466    0.042   11.151    0.000    0.466    0.472
   .memory_game       0.435    0.045    9.570    0.000    0.435    0.477
   .sm_dffrnt_slct    0.744    0.057   13.130    0.000    0.744    0.722
   .ef                1.000                               0.331    0.331
Code
graph_sem(model = fit, text_size = 3) + 
  theme(panel.background = element_rect(fill = "white"))

Multi-group model

Code
# configural invariance
fit1 <- cfa(cfa_model, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site")
Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.
Code
# weak invariance
fit2 <- cfa(cfa_model, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site", group.equal = "loadings")
Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.
Code
# strong invariance
fit3 <- cfa(cfa_model, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site", group.equal = c("intercepts","loadings"))
Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.
Code
# model comparison tests
lavTestLRT(fit1, fit2, fit3)

Chi-Squared Difference Test

     Df    AIC    BIC   Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
fit1  6 2802.8 2929.2  13.204                                          
fit2 10 2815.7 2925.2  34.098     20.894 0.15935       4  0.0003323 ***
fit3 14 3011.9 3104.6 238.286    204.188 0.54853       4  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
graph_sem(model = fit1, text_size = 3) + 
  theme(panel.background = element_rect(fill = "white"))

5.2 Add MEFS

Code
mefs_scores <- read_csv(here("02_scored_data","mefs_data","LEVANTE_20250403_1132.csv")) |>
  janitor::clean_names() |>
  select(child_id, a1_total_score, a1_standard_score) |>
  rename(user_id = child_id, 
         mefs_total = a1_total_score,
         mefs_standard = a1_standard_score
         ) 
Rows: 416 Columns: 436
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (153): Org Name, Child ID, Birth Month / Year, Gender, Special Educatio...
dbl  (270): RS ID, A1_Assessment ID, A1_Age (Months), A1_Basal Level, A1_Hig...
lgl   (12): Group Name, Group ID, NCES ID, First Name, Last Name, Ethnicity,...
dttm   (1): A1_Date of Test

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Code
ef_scores_irt <- ef_scores_irt |>
  left_join(mefs_scores) |>
  mutate(mefs_total = scale(mefs_total)[,1],
         mefs_standard = scale(mefs_standard)[,1])
Joining with `by = join_by(user_id)`
Code
cfa_model_mefs <-  "
ef =~ hearts_and_flowers + memory_game + same_different_selection + mefs_standard
ef ~ age
"

fit <- cfa(cfa_model_mefs, ef_scores_irt, std.lv=TRUE, missing='fiml')
Warning: lavaan->lav_data_full():  
   118 cases were deleted due to missing values in exogenous variable(s), 
   while fixed.x = TRUE.
Code
summary(fit, fit.measures=TRUE, standardize=TRUE)
lavaan 0.6-19 ended normally after 44 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        13

                                                  Used       Total
  Number of observations                           501         619
  Number of missing patterns                        12            

Model Test User Model:
                                                      
  Test statistic                                64.023
  Degrees of freedom                                 5
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                               729.995
  Degrees of freedom                                10
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.918
  Tucker-Lewis Index (TLI)                       0.836
                                                      
  Robust Comparative Fit Index (CFI)             0.921
  Robust Tucker-Lewis Index (TLI)                0.842

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -1953.761
  Loglikelihood unrestricted model (H1)      -1921.750
                                                      
  Akaike (AIC)                                3933.522
  Bayesian (BIC)                              3988.338
  Sample-size adjusted Bayesian (SABIC)       3947.075

Root Mean Square Error of Approximation:

  RMSEA                                          0.153
  90 Percent confidence interval - lower         0.121
  90 Percent confidence interval - upper         0.188
  P-value H_0: RMSEA <= 0.050                    0.000
  P-value H_0: RMSEA >= 0.080                    1.000
                                                      
  Robust RMSEA                                   0.173
  90 Percent confidence interval - lower         0.132
  90 Percent confidence interval - upper         0.217
  P-value H_0: Robust RMSEA <= 0.050             0.000
  P-value H_0: Robust RMSEA >= 0.080             1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.051

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef =~                                                                 
    herts_nd_flwrs    0.427    0.031   13.781    0.000    0.708    0.711
    memory_game       0.434    0.038   11.388    0.000    0.720    0.753
    sm_dffrnt_slct    0.339    0.037    9.194    0.000    0.562    0.554
    mefs_standard     0.418    0.040   10.448    0.000    0.692    0.696

Regressions:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef ~                                                                  
    age               0.599    0.051   11.680    0.000    0.361    0.798

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs   -2.248    0.168  -13.346    0.000   -2.248   -2.258
   .memory_game      -2.276    0.148  -15.369    0.000   -2.276   -2.381
   .sm_dffrnt_slct   -1.695    0.160  -10.596    0.000   -1.695   -1.671
   .mefs_standard    -2.092    0.179  -11.658    0.000   -2.092   -2.102

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs    0.490    0.042   11.568    0.000    0.490    0.494
   .memory_game       0.396    0.041    9.570    0.000    0.396    0.433
   .sm_dffrnt_slct    0.713    0.054   13.128    0.000    0.713    0.693
   .mefs_standard     0.511    0.054    9.506    0.000    0.511    0.516
   .ef                1.000                               0.364    0.364
Code
graph_sem(model = fit, text_size = 3) + 
  theme(panel.background = element_rect(fill = "white"))

Code
# configural invariance
fit1 <- cfa(cfa_model_mefs, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site")
Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.
Code
# weak invariance
fit2 <- cfa(cfa_model_mefs, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site", group.equal = "loadings")
Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.
Code
# strong invariance
fit3 <- cfa(cfa_model_mefs, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site", group.equal = c("intercepts","loadings"))
Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.
Code
# model comparison tests
lavTestLRT(fit1, fit2, fit3)

Chi-Squared Difference Test

     Df    AIC    BIC   Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
fit1 15 3459.4 3623.8  41.306                                          
fit2 21 3476.6 3615.7  70.457     29.151 0.15200       6  5.696e-05 ***
fit3 27 3690.5 3804.4 296.452    225.994 0.46857       6  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
graph_sem(model = fit1, text_size = 3) + 
  theme(panel.background = element_rect(fill = "white"))