Data loading

library(tidyverse)
library(glue)
library(here)
here() starts at /Users/mcfrank/Projects/levante-pilots
library(mirt)
Loading required package: stats4
Loading required package: lattice
library(ggrepel)
# require(arm)
library(lavaan)
This is lavaan 0.6-19
lavaan is FREE software! Please report any bugs.
library(tidySEM)
Loading required package: OpenMx
OpenMx may run faster if it is compiled to take advantage of multiple cores.
Registered S3 method overwritten by 'tidySEM':
  method          from  
  predict.MxModel OpenMx
source(here("02_score_data","irt_helpers.R"))
source(here("03_summaries", "plotting_helper.R"))

sites <- c("co_pilot", "de_pilot")

task_data_nested <- sites |>
  set_names() |>
  map(\(s) read_rds(here(glue("01_processed_data/{s}/task_data_nested.rds")))) |>
  list_rbind(names_to = "site")

task_data_combined <- task_data_nested |>
  select(-task_id) |>
  unnest(data) 

ef <- filter(task_data_combined, 
              task_id %in% c("hearts-and-flowers","same-different-selection","memory-game"))

Get ages.

participants <- sites |>
  set_names() |>
  map(\(s) read_rds(here(glue("00_prepped_data/{s}/participants.rds")))) |>
  list_rbind(names_to = "site")

run_ages <- participants |>
  select(user_id, ages) |>
  unnest(ages)

ef <- left_join(ef, run_ages)
Joining with `by = join_by(user_id, run_id)`

Sumscore approach

First plot sumscores.

ef_runs <- ef |>
  group_by(site, user_id, run_id, task_id) |>
  summarise(correct = mean(correct), 
            age = mean(age))
`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.
ggplot(ef_runs, aes(x = age, y = correct)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "loess", lambda = 1) +
  facet_grid(site ~ task_id)
Warning in geom_smooth(method = "loess", lambda = 1): Ignoring unknown
parameters: `lambda`
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 142 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 142 rows containing missing values (`geom_point()`).

Hearts and Flowers

Zoom in on HF.

hf_runs <- ef |>
  filter(task_id == "hearts-and-flowers") |>
  group_by(site, user_id, run_id, task_id) |>
  summarise(logit = coef(glm(correct ~ 1, family = "binomial"))[1],
            bayeslogit = coef(arm::bayesglm(correct ~ 1, family = "binomial", 
                                       prior.scale.for.intercept = .1))[1],
            correct = mean(correct), 
            age = mean(age))
Warning: There was 1 warning in `summarise()`.
ℹ In argument: `bayeslogit = coef(arm::bayesglm(correct ~ 1, family =
  "binomial", prior.scale.for.intercept = 0.1))[1]`.
ℹ In group 1: `site = "co_pilot"`, `user_id = "01TonkOE2dWerkUlJZOIDwJkQNG3"`,
  `run_id = "arGLMZNt5N4prVhCGbFw"`, `task_id = "hearts-and-flowers"`.
Caused by warning in `check_dep_version()`:
! ABI version mismatch: 
lme4 was built with Matrix ABI version 1
Current Matrix ABI version is 0
Please re-install lme4 from source or restore original 'Matrix' package
`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.
ggplot(hf_runs,
       aes(x = age, y = correct)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "glm", method.args = list(family = "binomial")) +
  geom_hline(yintercept = .5, lty = 2) + 
  ylim(0,1) + 
  facet_grid(site ~ task_id)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 42 rows containing non-finite values (`stat_smooth()`).
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning: Removed 42 rows containing missing values (`geom_point()`).

Logit scoring.

ggplot(hf_runs,
       aes(x = age, y = logit)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "glm", method.args = list(family = "binomial")) +
  geom_hline(yintercept = 0, lty = 2) + 
  # ylim(0,1) + 
  facet_grid(site ~ task_id)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 42 rows containing non-finite values (`stat_smooth()`).
Warning: Computation failed in `stat_smooth()`
Computation failed in `stat_smooth()`
Caused by error:
! y values must be 0 <= y <= 1
Warning: Removed 42 rows containing missing values (`geom_point()`).

Bayesian logit.

ggplot(hf_runs,
       aes(x = age, y = bayeslogit)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "lm") +
  geom_hline(yintercept = 0, lty = 2) + 
  # ylim(0,1) + 
  facet_grid(site ~ task_id)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 42 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 42 rows containing missing values (`geom_point()`).

Histograms.

ggplot(filter(ef_runs, task_id == "hearts-and-flowers") |>
         mutate(age_group = cut(age, c(5, 7, 9, 11, 13, include.lowest=TRUE))) |>
         filter(!is.na(age_group)),
       aes(x = correct)) + 
  geom_histogram(binwidth = .25) +
  geom_vline(xintercept = .5, lty = 2) +
  scale_x_continuous(breaks = c(0,.25, .5, .75, 1)) + 
  facet_grid(site ~ age_group)

Memory game

mg_runs <- ef |>
  filter(task_id == "memory-game", 
         corpus_trial_type != "") |>
  mutate(span = str_count(response, ":")) |>
  filter(correct) |>
  group_by(site, user_id, run_id, corpus_trial_type) |>
  summarise(span = max(span), 
            age = mean(age))
`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.
ggplot(mg_runs, aes(x = age, y = span, col = corpus_trial_type)) + 
  geom_jitter(alpha = .5, height = .1, width = 0) + 
  geom_smooth(method = "lm")+ 
  facet_grid(corpus_trial_type~site, scales = "free_y")
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).

EF factor analysis

sds_scores <- read_rds(here("02_scored_data","scores_irt.rds")) |>
  filter(task_id == "same-different-selection")

ef_scores <- bind_rows(sds_scores |> 
                         filter(site %in% c("co_pilot","de_pilot")) |>
                      dplyr::select(site, task_id, user_id, run_id, metric_type, metric_value), 
                    mg_runs |> 
                      filter(corpus_trial_type == "forward") |>
                      mutate(metric_value = span, 
                             metric_type = "longest span",
                             task_id = "memory-game") |>
                      dplyr::select(site, task_id, user_id, run_id, metric_type,  metric_value)) |>
  bind_rows(hf_runs |>
              pivot_longer(c(correct, logit, bayeslogit), names_to = "metric_type", 
                           values_to = "metric_value") |>
              dplyr::select(site, task_id, user_id, run_id, metric_type, metric_value))
ages <- run_ages |>
  group_by(user_id) |>
  summarise(age = mean(age))

ef_scores_sumscore <- ef_scores |>
  filter(task_id != "hearts-and-flowers" | 
           (task_id == "hearts-and-flowers" & metric_type == "correct")) |>
  dplyr::select(task_id, metric_value, user_id, site) |>
  pivot_wider(names_from = "task_id", values_from = "metric_value", 
              id_cols = c("user_id","site"), 
              values_fn = mean) |>
  janitor:::clean_names() |>
  mutate(across(same_different_selection:hearts_and_flowers, ~ scale(.x)[,1])) |>
  left_join(ages)
Joining with `by = join_by(user_id)`
ef_scores_bayeslogit <- ef_scores |>
  filter(task_id != "hearts-and-flowers" | 
           (task_id == "hearts-and-flowers" & metric_type == "bayeslogit")) |>
  dplyr::select(task_id, metric_value, user_id, site) |>
  pivot_wider(names_from = "task_id", values_from = "metric_value", 
              id_cols = c("user_id","site"), 
              values_fn = mean) |>
  janitor:::clean_names() |>
  mutate(across(same_different_selection:hearts_and_flowers, ~ scale(.x)[,1])) |>
  left_join(ages)
Joining with `by = join_by(user_id)`
cfa_model <-  "
ef =~ hearts_and_flowers + memory_game + same_different_selection
ef ~ age
"

fit <- cfa(cfa_model, ef_scores_sumscore, std.lv=TRUE, missing='fiml')
Warning: lavaan->lav_data_full():  
   85 cases were deleted due to missing values in exogenous variable(s), 
   while fixed.x = TRUE.
summary(fit, fit.measures=TRUE, standardize=TRUE)
lavaan 0.6-19 ended normally after 45 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        10

                                                  Used       Total
  Number of observations                           333         418
  Number of missing patterns                         5            

Model Test User Model:
                                                      
  Test statistic                                 0.870
  Degrees of freedom                                 2
  P-value (Chi-square)                           0.647

Model Test Baseline Model:

  Test statistic                               268.550
  Degrees of freedom                                 6
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    1.000
  Tucker-Lewis Index (TLI)                       1.013
                                                      
  Robust Comparative Fit Index (CFI)             1.000
  Robust Tucker-Lewis Index (TLI)                1.012

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -1068.238
  Loglikelihood unrestricted model (H1)      -1067.803
                                                      
  Akaike (AIC)                                2156.475
  Bayesian (BIC)                              2194.557
  Sample-size adjusted Bayesian (SABIC)       2162.836

Root Mean Square Error of Approximation:

  RMSEA                                          0.000
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.085
  P-value H_0: RMSEA <= 0.050                    0.819
  P-value H_0: RMSEA >= 0.080                    0.063
                                                      
  Robust RMSEA                                   0.000
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.098
  P-value H_0: Robust RMSEA <= 0.050             0.787
  P-value H_0: Robust RMSEA >= 0.080             0.096

Standardized Root Mean Square Residual:

  SRMR                                           0.010

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef =~                                                                 
    herts_nd_flwrs    0.349    0.054    6.427    0.000    0.653    0.677
    memory_game       0.391    0.060    6.466    0.000    0.730    0.707
    sm_dffrnt_slct    0.245    0.044    5.591    0.000    0.458    0.451

Regressions:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef ~                                                                  
    age               0.718    0.111    6.481    0.000    0.384    0.845

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs   -2.195    0.188  -11.652    0.000   -2.195   -2.277
   .memory_game      -2.598    0.259  -10.032    0.000   -2.598   -2.514
   .sm_dffrnt_slct   -1.547    0.204   -7.586    0.000   -1.547   -1.522

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs    0.503    0.055    9.069    0.000    0.503    0.541
   .memory_game       0.535    0.069    7.769    0.000    0.535    0.501
   .sm_dffrnt_slct    0.822    0.069   11.975    0.000    0.822    0.797
   .ef                1.000                               0.286    0.286
graph_sem(model = fit, text_size = 3) + 
  theme(panel.background = element_rect(fill = "white"))

cfa_model <-  "
ef =~ hearts_and_flowers + memory_game + same_different_selection
ef ~ age
"

fit_bayeslogit <- cfa(cfa_model, ef_scores_bayeslogit, std.lv=TRUE, missing='fiml')
Warning: lavaan->lav_data_full():  
   85 cases were deleted due to missing values in exogenous variable(s), 
   while fixed.x = TRUE.
summary(fit_bayeslogit, fit.measures=TRUE, standardize=TRUE)
lavaan 0.6-19 ended normally after 46 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        10

                                                  Used       Total
  Number of observations                           333         418
  Number of missing patterns                         5            

Model Test User Model:
                                                      
  Test statistic                                 0.752
  Degrees of freedom                                 2
  P-value (Chi-square)                           0.687

Model Test Baseline Model:

  Test statistic                               275.883
  Degrees of freedom                                 6
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    1.000
  Tucker-Lewis Index (TLI)                       1.014
                                                      
  Robust Comparative Fit Index (CFI)             1.000
  Robust Tucker-Lewis Index (TLI)                1.013

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -1077.903
  Loglikelihood unrestricted model (H1)      -1077.528
                                                      
  Akaike (AIC)                                2175.807
  Bayesian (BIC)                              2213.888
  Sample-size adjusted Bayesian (SABIC)       2182.168

Root Mean Square Error of Approximation:

  RMSEA                                          0.000
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.081
  P-value H_0: RMSEA <= 0.050                    0.843
  P-value H_0: RMSEA >= 0.080                    0.053
                                                      
  Robust RMSEA                                   0.000
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.092
  P-value H_0: Robust RMSEA <= 0.050             0.817
  P-value H_0: Robust RMSEA >= 0.080             0.079

Standardized Root Mean Square Residual:

  SRMR                                           0.009

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef =~                                                                 
    herts_nd_flwrs    0.348    0.056    6.216    0.000    0.687    0.682
    memory_game       0.362    0.058    6.252    0.000    0.715    0.694
    sm_dffrnt_slct    0.228    0.043    5.352    0.000    0.451    0.444

Regressions:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  ef ~                                                                  
    age               0.774    0.124    6.256    0.000    0.392    0.862

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs   -2.371    0.195  -12.165    0.000   -2.371   -2.353
   .memory_game      -2.595    0.259  -10.030    0.000   -2.595   -2.519
   .sm_dffrnt_slct   -1.552    0.205   -7.588    0.000   -1.552   -1.528

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .herts_nd_flwrs    0.543    0.059    9.249    0.000    0.543    0.535
   .memory_game       0.551    0.068    8.109    0.000    0.551    0.519
   .sm_dffrnt_slct    0.829    0.069   12.064    0.000    0.829    0.803
   .ef                1.000                               0.257    0.257
graph_sem(model = fit_bayeslogit, text_size = 3) + 
  theme(panel.background = element_rect(fill = "white"))

anova(fit, fit_bayeslogit)
Warning: lavaan->lavTestLRT():  
   Some restricted models fit better than less restricted models; either 
   these models are not nested, or the less restricted model failed to reach 
   a global optimum.Smallest difference = -0.11801326394215.
Warning: lavaan->lavTestLRT():  
   some models have the same degrees of freedom

Chi-Squared Difference Test

               Df    AIC    BIC  Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
fit             2 2156.5 2194.6 0.8699                                    
fit_bayeslogit  2 2175.8 2213.9 0.7519   -0.11801     0       0