LEVANTE EF Tasks - Interim Report

Author

Mike Frank

1 Data loading

Code

library(tidyverse)
library(glue)
library(here)

here() starts at /Users/mcfrank/Projects/levante-pilots

Code

library(mirt)

Loading required package: stats4

Loading required package: lattice

Code

library(ggrepel)
# require(arm)
library(lavaan)

This is lavaan 0.6-19
lavaan is FREE software! Please report any bugs.

Code

library(tidySEM)

Loading required package: OpenMx

OpenMx may run faster if it is compiled to take advantage of multiple cores.

Registered S3 method overwritten by 'tidySEM':
  method          from  
  predict.MxModel OpenMx

Code

source(here("02_score_data","irt_helpers.R"))
source(here("03_summaries", "plotting_helper.R"))

sites <- c("ca_pilot", "co_pilot", "de_pilot")

task_data_nested <- sites |>
  set_names() |>
  map(\(s) read_rds(here(glue("01_processed_data/{s}/task_data_nested.rds")))) |>
  list_rbind(names_to = "site")

task_data_combined <- task_data_nested |>
  select(-task_id) |>
  unnest(data) 

ef <- filter(task_data_combined, 
             task_id %in% c("hearts-and-flowers","same-different-selection","memory-game"))

Get ages.

Code

participants <- sites |>
  set_names() |>
  map(\(s) read_rds(here(glue("00_prepped_data/{s}/participants.rds")))) |>
  list_rbind(names_to = "site")

run_ages <- participants |>
  select(user_id, ages) |>
  unnest(ages)

# this is useful below for various simplifications
ages <- run_ages |>
  group_by(user_id) |>
  summarise(age = mean(age))

ef <- left_join(ef, run_ages)

Joining with `by = join_by(user_id, run_id)`

1.1 Load multigroup models

Code

best_multigroup <- readRDS(here("02_scored_data", "irt_outputs", "multigroup_best_outputs.rds")) 
multigroup_scores <- readRDS(here("02_scored_data", "scores", "scores_multigroup.rds"))

1.2 Overall sumscores

First plot sumscores.

Code

ef_runs <- ef |>
  group_by(site, user_id, run_id, task_id) |>
  summarise(correct = mean(correct), 
            age = mean(age))

`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.

Code

ggplot(ef_runs, aes(x = age, y = correct)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "loess", lambda = 1) +
  facet_grid(site ~ task_id) + 
  ylim(0,1)

Warning in geom_smooth(method = "loess", lambda = 1): Ignoring unknown
parameters: `lambda`

`geom_smooth()` using formula = 'y ~ x'

Warning: Removed 214 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning: Removed 214 rows containing missing values or values outside the scale range
(`geom_point()`).

Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_smooth()`).

Now go to task analysis.

2 Hearts and Flowers

Zoom in on HF.

Code

hf <- ef |>
  filter(task_id == "hearts-and-flowers")

Look at all phases.

2.1 Sumscore

Code

hf_blocks <- hf |>
  mutate(corpus_trial_type = fct_relevel(corpus_trial_type, "hearts", "flowers", "mixed")) |>
  group_by(site, user_id, run_id, task_id, corpus_trial_type) |>
  summarise(correct = mean(correct), 
            age = mean(age), 
            n = n())

`summarise()` has grouped output by 'site', 'user_id', 'run_id', 'task_id'. You
can override using the `.groups` argument.

Code

ggplot(hf_blocks,
       aes(x = age, y = correct)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "glm", method.args = list(family = "binomial")) +
  geom_hline(yintercept = .5, lty = 2) + 
  ylim(0,1) + 
  facet_grid(site ~ corpus_trial_type)

`geom_smooth()` using formula = 'y ~ x'

Warning: Removed 226 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Warning: Removed 226 rows containing missing values or values outside the scale range
(`geom_point()`).

F notes that there are a lot of younger kids BELOW chance. Let’s see if they are missing data.

Code

ggplot(hf_blocks, aes(x = n, y = correct)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_grid(site ~ corpus_trial_type)

`geom_smooth()` using formula = 'y ~ x'

NO - this is not because of missing data. Something else is happening.

Code

hf_runs <- hf |>
  group_by(site, user_id, run_id, task_id) |>
  summarise(logit = coef(glm(correct ~ 1, family = "binomial"))[1],
            bayeslogit = coef(arm::bayesglm(correct ~ 1, family = "binomial",
                                            prior.scale.for.intercept = .1))[1],
            correct_rt = mean(as.numeric(rt[correct]), na.rm=TRUE),
            correct = mean(correct),
            age = mean(age))

`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.

Histograms.

Code

ggplot(filter(ef_runs, task_id == "hearts-and-flowers") |>
         mutate(age_group = cut(age, c(5, 7, 9, 11, 13, include.lowest=TRUE))) |>
         filter(!is.na(age_group)),
       aes(x = correct)) + 
  geom_histogram(binwidth = .25) +
  geom_vline(xintercept = .5, lty = 2) +
  scale_x_continuous(breaks = c(0,.25, .5, .75, 1)) + 
  facet_grid(site ~ age_group)

Reaction time.

Code

ggplot(hf_runs,
       aes(x = age, y = correct_rt)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "lm") +
  geom_hline(yintercept = 0, lty = 2) + 
  # ylim(0,1) + 
  facet_grid(site ~ task_id)

`geom_smooth()` using formula = 'y ~ x'

Warning: Removed 78 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning: Removed 78 rows containing missing values or values outside the scale range
(`geom_point()`).

Code

ggplot(hf, aes(x = as.numeric(rt))) + 
  geom_histogram() + 
  scale_x_log10(breaks = c(100,200, 500,1000,2000,3000,5000,10000)) + coord_flip()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Warning: Removed 15 rows containing non-finite outside the scale range
(`stat_bin()`).

2.2 IRT

We now use the multigroup IRT models.

Code

multigroup_scores_hf <- multigroup_scores |>
  filter(task_id == "hearts-and-flowers") |>
  select(site, task_id, user_id, run_id, metric_type, metric_value ) |>
  left_join(run_ages)

Joining with `by = join_by(user_id, run_id)`

Code

ggplot(multigroup_scores_hf, aes(x = age, y = metric_value, col = site)) + 
  geom_point() + 
  geom_smooth()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Warning: Removed 78 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning: Removed 78 rows containing missing values or values outside the scale range
(`geom_point()`).

Look at coefficients.

Code

hf_coefs <- filter(best_multigroup, task_id == "hearts-and-flowers")$coefs[[1]] |>
  separate(item, sep = "_", into = c("task","block","trial", "number")) |>
  select(-number) |>
  distinct()

ggplot(hf_coefs, aes(x = a1, y = -d, col = block, pch = trial)) + 
  geom_point() + 
  geom_text_repel(aes(label = trial)) + 
  xlab("Slope") + 
  ylab("Difficulty") + 
  facet_wrap(~site)

3 Memory game

Merge in item ids.

Code

id_map <- read_csv(here("02_score_data/item_metadata/pilot-item-ID mapping.csv"))

Rows: 691 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): item_uid, trials

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Code

trial_id_map <- id_map |>
  mutate(trials = trials |> str_split(",")) |>
  unnest(trials) |>
  rename(trial_id = trials) |>
  mutate(trial_id = str_trim(trial_id))

Code

mg <- ef |>
  filter(task_id == "memory-game") |>
  left_join(trial_id_map) |>
  filter(!is.na(item_uid)) |>
  separate(item_uid , into = c("task", "direction", "size", "span"), sep = "_") |>
  mutate(span = as.numeric(str_replace(span, "len",""))) |>
  group_by(run_id, direction) |>
  mutate(max_span = max(span))

Joining with `by = join_by(trial_id)`

3.1 Sumscore

Code

mg_spans <- mg |>
  group_by(site, span, size, direction) |>
  summarise(n = n(), 
            correct = mean(correct))

`summarise()` has grouped output by 'site', 'span', 'size'. You can override
using the `.groups` argument.

Code

ggplot(mg_spans, aes(x = span, y = correct, col = direction, pch = size)) + 
  geom_point(aes(size = n)) + 
  geom_line(aes(lty = size)) + 
  facet_wrap(~site)

Code

mg |>
  filter(direction == "forward", span > 1) |>
  mutate(age_rounded = as_factor(round(age))) |>
  group_by(age_rounded, span) |>
  summarise(correct = mean(correct), 
            n = n()) |>
  ggplot(aes(x = span, y = correct, col = age_rounded)) + 
  geom_point(aes(size = n)) + 
  geom_smooth(method = "lm", se =FALSE)

`summarise()` has grouped output by 'age_rounded'. You can override using the
`.groups` argument.
`geom_smooth()` using formula = 'y ~ x'

Code

mg |>
  filter(direction == "forward", span > 1) |>
  mutate(max_span = as_factor(max_span)) |>
  group_by(max_span, span) |>
  summarise(correct = mean(correct), 
            n = n()) |>
  ggplot(aes(x = span, y = correct, col = max_span)) + 
  geom_point(aes(size = n)) + 
  geom_line()

`summarise()` has grouped output by 'max_span'. You can override using the
`.groups` argument.

Code

  # geom_smooth(method = "lm", se =FALSE)

Code

mg_runs <- mg |>
  filter(correct) |>
  group_by(site, user_id, run_id, direction) |>
  summarise(max_span = max(span), 
            age = mean(age))

`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.

Code

ggplot(mg_runs, aes(x = age, y = max_span, col = direction)) + 
  geom_jitter(alpha = .5, height = .1, width = 0) + 
  geom_smooth(method = "lm")+ 
  facet_grid(direction~site, scales = "free_y")

`geom_smooth()` using formula = 'y ~ x'

Warning: Removed 39 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning in qt((1 - level)/2, df): NaNs produced

Warning: Removed 39 rows containing missing values or values outside the scale range
(`geom_point()`).

Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
-Inf

3.2 IRT

We now use the multigroup IRT models.

Code

multigroup_scores_mg <- multigroup_scores |>
  filter(task_id == "memory-game") |>
  select(site, task_id, user_id, run_id, metric_type, metric_value ) |>
  left_join(run_ages)

Joining with `by = join_by(user_id, run_id)`

Code

ggplot(multigroup_scores_mg, aes(x = age, y = metric_value, col = site)) + 
  geom_point() + 
  geom_smooth()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Warning: Removed 41 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning: Removed 41 rows containing missing values or values outside the scale range
(`geom_point()`).

Look at coefficients.

Code

mg_coefs <- filter(best_multigroup, task_id == "memory-game")$coefs[[1]] |>
  separate(item, sep = "_", into = c("task", "direction", "size", "span", "number")) |>
  mutate(span = as.numeric(str_replace(span, "len",""))) |>
  select(-task, -number) |>
  distinct()

ggplot(mg_coefs, aes(x = span, y = -d, col = direction, pch = size)) + 
  geom_point() + 
  geom_line() + 
  # geom_text_repel(aes(label = direction)) + 
  xlab("Slope") + 
  ylab("Difficulty") + 
  facet_wrap(~site)

Note that we have used grid3 very little and so it’s getting dropped from most analysis.

4 SDS

Code

sds <- filter(ef, task_id == "same-different-selection") |>
  left_join(trial_id_map) |>
  filter(!is.na(item_uid)) |>
  separate(item_uid , into = c("task", "block", "trial"), sep = "_")

Joining with `by = join_by(trial_id)`

4.1 Sumscore

Code

sds_runs <- sds |>
  group_by(site, user_id, run_id, task_id) |>
  summarise(correct = mean(correct), 
            age = mean(age))

`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.

Code

sds_runs_block <- sds |>
  filter(block != "3unique") |>
  mutate(block = fct_relevel(block, "dimensions","same")) |>
  group_by(site, user_id, run_id, task_id, block) |>
  summarise(correct = mean(correct), 
            age = mean(age))

`summarise()` has grouped output by 'site', 'user_id', 'run_id', 'task_id'. You
can override using the `.groups` argument.

Code

ggplot(sds_runs_block, aes(x = age, y = correct)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "lm", se=FALSE) + 
  facet_grid(site ~ block)

`geom_smooth()` using formula = 'y ~ x'

Warning: Removed 291 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning: Removed 291 rows containing missing values or values outside the scale range
(`geom_point()`).

4.2 IRT

Code

multigroup_scores_sds <- multigroup_scores |>
  filter(task_id == "same-different-selection") |>
  select(site, task_id, user_id, run_id, metric_type, metric_value ) |>
  left_join(run_ages)

Joining with `by = join_by(user_id, run_id)`

Code

ggplot(multigroup_scores_sds, aes(x = age, y = metric_value, col = site)) + 
  geom_point() + 
  geom_smooth()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Warning: Removed 76 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning: Removed 76 rows containing missing values or values outside the scale range
(`geom_point()`).

Look at coefficients.

Code

sds_coefs <- filter(best_multigroup, task_id == "same-different-selection")$coefs[[1]] |>
  separate(item, sep = "_", into = c("task", "block","trial")) |>
  filter(block != "3unique") |>
  mutate(block = fct_relevel(block, "dimensions","same")) |>
  distinct()

Warning: Expected 3 pieces. Additional pieces discarded in 372 rows [1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

Code

ggplot(sds_coefs, aes(x = block, y = -d, col = trial)) + 
  geom_point() + 
  geom_line(aes(group = trial)) + 
  coord_flip() +
  # geom_text_repel(aes(label = direction)) + 
  xlab("block") + 
  ylab("Difficulty")

5 EF factor analysis

5.1 Only LEVANTE tasks

Code

ef_scores <- filter(multigroup_scores, 
                    task_id %in% c("hearts-and-flowers", "memory-game", "same-different-selection"))

ef_scores_irt <- ef_scores |>
  select(site, task_id, metric_value, user_id) |>
  pivot_wider(names_from = "task_id", values_from = "metric_value", 
              id_cols = c("user_id", "site")) |>
  janitor:::clean_names() |>
  mutate(across(hearts_and_flowers:same_different_selection, ~ scale(.x)[,1]), 
         ca_site = site == "ca_pilot",
         de_site = site == "de_pilot") |>
  left_join(ages)

Joining with `by = join_by(user_id)`

Code

cfa_model <-  "
ef =~ hearts_and_flowers + memory_game + same_different_selection
ef ~ age
"

fit <- cfa(cfa_model, ef_scores_irt, std.lv=TRUE, missing='fiml')

Warning: lavaan->lav_data_full():  
   118 cases were deleted due to missing values in exogenous variable(s), 
   while fixed.x = TRUE.

Code

summary(fit, fit.measures=TRUE, standardize=TRUE)

lavaan 0.6-19 ended normally after 43 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        10

                                                  Used       Total
  Number of observations                           499         617
  Number of missing patterns                         7            

Model Test User Model:
                                                      
  Test statistic                                59.708
  Degrees of freedom                                 2
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                               582.932
  Degrees of freedom                                 6
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.900
  Tucker-Lewis Index (TLI)                       0.700
                                                      
  Robust Comparative Fit Index (CFI)             0.887
  Robust Tucker-Lewis Index (TLI)                0.660

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -1626.547
  Loglikelihood unrestricted model (H1)      -1596.693
                                                      
  Akaike (AIC)                                3273.093
  Bayesian (BIC)                              3315.219
  Sample-size adjusted Bayesian (SABIC)       3283.479

Root Mean Square Error of Approximation:

  RMSEA                                          0.240
  90 Percent confidence interval - lower         0.190
  90 Percent confidence interval - upper         0.295
  P-value H_0: RMSEA <= 0.050                    0.000
  P-value H_0: RMSEA >= 0.080                    1.000
                                                      
  Robust RMSEA                                   0.278
  90 Percent confidence interval - lower         0.223
  90 Percent confidence interval - upper         0.338
  P-value H_0: Robust RMSEA <= 0.050             0.000
  P-value H_0: Robust RMSEA >= 0.080             1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.062

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef =~                                                                 
    herts_nd_flwrs    0.415    0.035   11.972    0.000    0.722    0.727
    memory_game       0.397    0.041    9.651    0.000    0.691    0.723
    sm_dffrnt_slct    0.308    0.038    8.038    0.000    0.536    0.527

Regressions:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef ~                                                                  
    age               0.645    0.063   10.225    0.000    0.371    0.818

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs   -2.347    0.167  -14.087    0.000   -2.347   -2.363
   .memory_game      -2.240    0.153  -14.631    0.000   -2.240   -2.344
   .sm_dffrnt_slct   -1.654    0.166   -9.964    0.000   -1.654   -1.629

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs    0.466    0.042   11.151    0.000    0.466    0.472
   .memory_game       0.435    0.045    9.570    0.000    0.435    0.477
   .sm_dffrnt_slct    0.744    0.057   13.130    0.000    0.744    0.722
   .ef                1.000                               0.331    0.331

Code

graph_sem(model = fit, text_size = 3) + 
  theme(panel.background = element_rect(fill = "white"))

Multi-group model

Code

# configural invariance
fit1 <- cfa(cfa_model, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site")

Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.

Code

# weak invariance
fit2 <- cfa(cfa_model, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site", group.equal = "loadings")

Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.

Code

# strong invariance
fit3 <- cfa(cfa_model, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site", group.equal = c("intercepts","loadings"))

Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.

Code

# model comparison tests
lavTestLRT(fit1, fit2, fit3)


Chi-Squared Difference Test

     Df    AIC    BIC   Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
fit1  6 2802.8 2929.2  13.204                                          
fit2 10 2815.7 2925.2  34.098     20.894 0.15935       4  0.0003323 ***
fit3 14 3011.9 3104.6 238.286    204.188 0.54853       4  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Code

graph_sem(model = fit1, text_size = 3) + 
  theme(panel.background = element_rect(fill = "white"))

5.2 Add MEFS

Code

mefs_scores <- read_csv(here("02_scored_data","mefs_data","LEVANTE_20250403_1132.csv")) |>
  janitor::clean_names() |>
  select(child_id, a1_total_score, a1_standard_score) |>
  rename(user_id = child_id, 
         mefs_total = a1_total_score,
         mefs_standard = a1_standard_score
         )

Rows: 416 Columns: 436
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (153): Org Name, Child ID, Birth Month / Year, Gender, Special Educatio...
dbl  (270): RS ID, A1_Assessment ID, A1_Age (Months), A1_Basal Level, A1_Hig...
lgl   (12): Group Name, Group ID, NCES ID, First Name, Last Name, Ethnicity,...
dttm   (1): A1_Date of Test

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Code

ef_scores_irt <- ef_scores_irt |>
  left_join(mefs_scores) |>
  mutate(mefs_total = scale(mefs_total)[,1],
         mefs_standard = scale(mefs_standard)[,1])

Joining with `by = join_by(user_id)`

Code

cfa_model_mefs <-  "
ef =~ hearts_and_flowers + memory_game + same_different_selection + mefs_standard
ef ~ age
"

fit <- cfa(cfa_model_mefs, ef_scores_irt, std.lv=TRUE, missing='fiml')

Warning: lavaan->lav_data_full():  
   118 cases were deleted due to missing values in exogenous variable(s), 
   while fixed.x = TRUE.

Code

summary(fit, fit.measures=TRUE, standardize=TRUE)

lavaan 0.6-19 ended normally after 44 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        13

                                                  Used       Total
  Number of observations                           501         619
  Number of missing patterns                        12            

Model Test User Model:
                                                      
  Test statistic                                64.023
  Degrees of freedom                                 5
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                               729.995
  Degrees of freedom                                10
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.918
  Tucker-Lewis Index (TLI)                       0.836
                                                      
  Robust Comparative Fit Index (CFI)             0.921
  Robust Tucker-Lewis Index (TLI)                0.842

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -1953.761
  Loglikelihood unrestricted model (H1)      -1921.750
                                                      
  Akaike (AIC)                                3933.522
  Bayesian (BIC)                              3988.338
  Sample-size adjusted Bayesian (SABIC)       3947.075

Root Mean Square Error of Approximation:

  RMSEA                                          0.153
  90 Percent confidence interval - lower         0.121
  90 Percent confidence interval - upper         0.188
  P-value H_0: RMSEA <= 0.050                    0.000
  P-value H_0: RMSEA >= 0.080                    1.000
                                                      
  Robust RMSEA                                   0.173
  90 Percent confidence interval - lower         0.132
  90 Percent confidence interval - upper         0.217
  P-value H_0: Robust RMSEA <= 0.050             0.000
  P-value H_0: Robust RMSEA >= 0.080             1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.051

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef =~                                                                 
    herts_nd_flwrs    0.427    0.031   13.781    0.000    0.708    0.711
    memory_game       0.434    0.038   11.388    0.000    0.720    0.753
    sm_dffrnt_slct    0.339    0.037    9.194    0.000    0.562    0.554
    mefs_standard     0.418    0.040   10.448    0.000    0.692    0.696

Regressions:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef ~                                                                  
    age               0.599    0.051   11.680    0.000    0.361    0.798

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs   -2.248    0.168  -13.346    0.000   -2.248   -2.258
   .memory_game      -2.276    0.148  -15.369    0.000   -2.276   -2.381
   .sm_dffrnt_slct   -1.695    0.160  -10.596    0.000   -1.695   -1.671
   .mefs_standard    -2.092    0.179  -11.658    0.000   -2.092   -2.102

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs    0.490    0.042   11.568    0.000    0.490    0.494
   .memory_game       0.396    0.041    9.570    0.000    0.396    0.433
   .sm_dffrnt_slct    0.713    0.054   13.128    0.000    0.713    0.693
   .mefs_standard     0.511    0.054    9.506    0.000    0.511    0.516
   .ef                1.000                               0.364    0.364

Code

graph_sem(model = fit, text_size = 3) + 
  theme(panel.background = element_rect(fill = "white"))

Code

# configural invariance
fit1 <- cfa(cfa_model_mefs, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site")

Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.

Code

# weak invariance
fit2 <- cfa(cfa_model_mefs, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site", group.equal = "loadings")

Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.

Code

# strong invariance
fit3 <- cfa(cfa_model_mefs, ef_scores_irt, std.lv=TRUE, missing='fiml', 
           group = "site", group.equal = c("intercepts","loadings"))

Warning: lavaan->lav_data_full():  
   118 cases were deleted in group co_pilot due to missing values in 
   exogenous variable(s), while fixed.x = TRUE.

Code

# model comparison tests
lavTestLRT(fit1, fit2, fit3)


Chi-Squared Difference Test

     Df    AIC    BIC   Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
fit1 15 3459.4 3623.8  41.306                                          
fit2 21 3476.6 3615.7  70.457     29.151 0.15200       6  5.696e-05 ***
fit3 27 3690.5 3804.4 296.452    225.994 0.46857       6  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Code

graph_sem(model = fit1, text_size = 3) + 
  theme(panel.background = element_rect(fill = "white"))