Longitudinal models

This markdown documents attempts to do longitudinal models for the Peekbank data.

library(here)

here() starts at /Users/mcfrank/Projects/peekbank/peekbank-development

source(here("helper","common.R"))

Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2

library(lavaan)

Warning: package 'lavaan' was built under R version 4.3.3

This is lavaan 0.6-18
lavaan is FREE software! Please report any bugs.

library(tidySEM)

Warning: package 'tidySEM' was built under R version 4.3.3

Loading required package: OpenMx

OpenMx may run faster if it is compiled to take advantage of multiple cores.

Registered S3 method overwritten by 'tidySEM':
  method          from  
  predict.MxModel OpenMx

library(nlme)

Warning: package 'nlme' was built under R version 4.3.3


Attaching package: 'nlme'

The following object is masked from 'package:dplyr':

    collapse

library(emmeans)

Warning: package 'emmeans' was built under R version 4.3.3

Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'


Attaching package: 'emmeans'

The following object is masked from 'package:GGally':

    pigs

library(lme4)

Loading required package: Matrix


Attaching package: 'Matrix'

The following objects are masked from 'package:OpenMx':

    %&%, expm

The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Attaching package: 'lme4'

The following object is masked from 'package:nlme':

    lmList

library(lmerTest)


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

d_sub <- readRDS(here("cached_intermediates","1_d_sub.Rds")) |>
  group_by(subject_id) |>
  arrange(age) |>
  mutate(admin_num = 1:n(), 
         time_since_t0 = age - age[1],
         delta_t = c(0, diff(age)))

longitudinal <- d_sub |>
  group_by(dataset_name, subject_id) |>
  count() |>
  filter(n > 1)

d_sub_long <- d_sub |>
  filter(subject_id %in% longitudinal$subject_id) 

ggplot(d_sub_long, 
       aes(x = time_since_t0, fill = dataset_name)) +
  geom_histogram(binwidth = 1) + 
  # facet_wrap(~dataset_name) + 
  xlab("Time since first test")

Draft 1: Replicate growth models from Fernald & Marchman 2012

First, we try replicating the original growth models.

Just FM2012

fm2012 <- filter(d_sub, dataset_name == "fernald_marchman_2012") |>
  group_by(subject_id) |>
  mutate(age_group = case_when(age < 21 ~ 18, 
                               age > 21 & age < 27 ~ 24, 
                               .default = 30)) |>
  arrange(age) |>
  mutate(rt_t0 = rt[1]) |>
  ungroup() |>
  mutate(rt_t0_group = ifelse(rt_t0 < median(rt_t0), "low", "high"))

ggplot(fm2012, 
       aes(x = age_group, y= prod)) + 
  geom_line(alpha = .2, aes(group = subject_id)) + 
  stat_summary(aes(col = rt_t0_group)) + 
  geom_smooth(aes(col = rt_t0_group), method = "lm", formula = y ~ poly(x, 2))

Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_summary()`).

No summary function supplied, defaulting to `mean_se()`

Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_smooth()`).

This looks similar (but maybe a bit worse with log rt.

fm_mod <- lmer(prod ~ age * rt_t0 + (age | subject_id), 
     data = fm2012, 
     control = lmerControl(optimizer = "bobyqa"))

Warning: Some predictor variables are on very different scales: consider
rescaling

Warning: Some predictor variables are on very different scales: consider
rescaling

summary(fm_mod)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: prod ~ age * rt_t0 + (age | subject_id)
   Data: fm2012
Control: lmerControl(optimizer = "bobyqa")

REML criterion at convergence: -1221.3

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.48247 -0.52017  0.01563  0.46886  2.90259 

Random effects:
 Groups     Name        Variance  Std.Dev. Corr 
 subject_id (Intercept) 0.0309042 0.17580       
            age         0.0001324 0.01151  -0.92
 Residual               0.0043541 0.06599       
Number of obs: 675, groups:  subject_id, 122

Fixed effects:
              Estimate Std. Error         df t value Pr(>|t|)    
(Intercept) -9.223e-01  6.566e-02  1.070e+02 -14.046   <2e-16 ***
age          5.894e-02  3.749e-03  1.098e+02  15.724   <2e-16 ***
rt_t0        6.732e-05  5.284e-05  1.068e+02   1.274   0.2054    
age:rt_t0   -5.810e-06  3.019e-06  1.098e+02  -1.925   0.0568 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
          (Intr) age    rt_t0 
age       -0.921              
rt_t0     -0.949  0.875       
age:rt_t0  0.874 -0.949 -0.923
fit warnings:
Some predictor variables are on very different scales: consider rescaling

Note that this model looks a lot better with quadratic growth. (They used quadratics in the original).

fm_mod <- lmer(prod ~ age * rt_t0 + I(age^2) * rt_t0 + (age | subject_id), 
     data = fm2012, 
     control = lmerControl(optimizer = "bobyqa"))

Warning: Some predictor variables are on very different scales: consider
rescaling

Warning: Some predictor variables are on very different scales: consider
rescaling

summary(fm_mod)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: prod ~ age * rt_t0 + I(age^2) * rt_t0 + (age | subject_id)
   Data: fm2012
Control: lmerControl(optimizer = "bobyqa")

REML criterion at convergence: -1185

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.7045 -0.4795  0.0069  0.4828  2.8385 

Random effects:
 Groups     Name        Variance  Std.Dev. Corr 
 subject_id (Intercept) 0.0312247 0.1767        
            age         0.0001324 0.0115   -0.92
 Residual               0.0043039 0.0656        
Number of obs: 675, groups:  subject_id, 122

Fixed effects:
                 Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)    -1.639e+00  2.791e-01  4.997e+02  -5.872 7.87e-09 ***
age             1.214e-01  2.393e-02  4.764e+02   5.072 5.64e-07 ***
rt_t0           6.224e-04  2.239e-04  5.025e+02   2.779  0.00565 ** 
I(age^2)       -1.302e-03  4.927e-04  4.577e+02  -2.642  0.00852 ** 
age:rt_t0      -5.421e-05  1.922e-05  4.788e+02  -2.820  0.00500 ** 
rt_t0:I(age^2)  1.009e-06  3.958e-07  4.597e+02   2.549  0.01112 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) age    rt_t0  I(g^2) ag:r_0
age         -0.994                            
rt_t0       -0.948  0.942                     
I(age^2)     0.972 -0.988 -0.922              
age:rt_t0    0.942 -0.948 -0.994  0.936       
rt_t0:I(^2) -0.921  0.936  0.972 -0.948 -0.988
fit warnings:
Some predictor variables are on very different scales: consider rescaling

All data

Let’s try the same model with all the data.

d_growth <- d_sub |>
  filter(!is.na(prod), subject_id %in% longitudinal$subject_id) |>
  group_by(subject_id) |>
  arrange(age) |>
  mutate(rt_t0 = log_rt[1]) |>
  ungroup() |>
  mutate(rt_t0_group = ifelse(rt_t0 < median(rt_t0, na.rm=TRUE), "low", "high"))

ggplot(filter(d_growth, !is.na(rt_t0)), 
              aes(x = age, y= prod)) +
  geom_line(alpha = .2, aes(group = subject_id)) +
  geom_smooth(aes(col = rt_t0_group), method = "lm", formula = y ~ x + I(x^2))

ggplot(filter(d_growth, !is.na(rt_t0)), 
              aes(x = age, y= prod)) +
  geom_line(alpha = .2, aes(group = subject_id)) +
  geom_smooth(aes(col = rt_t0_group), method = "lm", formula = y ~ x + I(x^2))

Linear.

d_growth$age_15 <- d_growth$age - 15
all_mod <- lmer(prod ~ age_15 *rt_t0 +  (age  | subject_id) + (age | dataset_name), 
     data = d_growth, 
     control = lmerControl(optimizer = "bobyqa"))

summary(all_mod)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: prod ~ age_15 * rt_t0 + (age | subject_id) + (age | dataset_name)
   Data: d_growth
Control: lmerControl(optimizer = "bobyqa")

REML criterion at convergence: -2421.1

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.9626 -0.4593 -0.0035  0.4460  3.6891 

Random effects:
 Groups       Name        Variance  Std.Dev. Corr 
 subject_id   (Intercept) 4.843e-02 0.220067      
              age         1.564e-04 0.012506 -0.90
 dataset_name (Intercept) 6.725e-03 0.082008      
              age         4.190e-06 0.002047 -0.60
 Residual                 4.742e-03 0.068860      
Number of obs: 1416, groups:  subject_id, 315; dataset_name, 6

Fixed effects:
               Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)    0.402460   0.159986 215.908587   2.516  0.01261 *  
age_15         0.088431   0.020260 153.326554   4.365 2.33e-05 ***
rt_t0         -0.059748   0.022638 224.097681  -2.639  0.00889 ** 
age_15:rt_t0  -0.005447   0.002909 160.021373  -1.872  0.06299 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) age_15 rt_t0 
age_15      -0.369              
rt_t0       -0.981  0.368       
ag_15:rt_t0  0.365 -0.998 -0.368

Again better with quadratics. (Won’t converge with nested quadratics in random effects).

d_growth$age_15 <- d_growth$age - 15
all_mod <- lmer(prod ~ age_15 *rt_t0  + I(age_15^2) * rt_t0 + (age  | subject_id) + (age | dataset_name), 
     data = d_growth, 
     control = lmerControl(optimizer = "bobyqa"))

Warning: Some predictor variables are on very different scales: consider
rescaling

Warning: Some predictor variables are on very different scales: consider
rescaling

summary(all_mod)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: prod ~ age_15 * rt_t0 + I(age_15^2) * rt_t0 + (age | subject_id) +  
    (age | dataset_name)
   Data: d_growth
Control: lmerControl(optimizer = "bobyqa")

REML criterion at convergence: -2426.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.0773 -0.4521 -0.0244  0.4119  3.7370 

Random effects:
 Groups       Name        Variance  Std.Dev. Corr 
 subject_id   (Intercept) 4.800e-02 0.219098      
              age         1.568e-04 0.012522 -0.90
 dataset_name (Intercept) 4.855e-03 0.069679      
              age         1.518e-05 0.003896 -0.62
 Residual                 4.583e-03 0.067699      
Number of obs: 1416, groups:  subject_id, 315; dataset_name, 6

Fixed effects:
                    Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)       -2.718e-01  2.012e-01  4.282e+02  -1.351    0.178    
age_15             3.184e-01  4.616e-02  9.716e+02   6.898 9.51e-12 ***
rt_t0              3.883e-02  2.854e-02  4.309e+02   1.361    0.174    
I(age_15^2)       -1.404e-02  2.543e-03  1.035e+03  -5.520 4.28e-08 ***
age_15:rt_t0      -3.917e-02  6.596e-03  9.624e+02  -5.938 4.02e-09 ***
rt_t0:I(age_15^2)  2.065e-03  3.656e-04  1.032e+03   5.648 2.09e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) age_15 rt_t0  I(_15^ a_15:_
age_15      -0.681                            
rt_t0       -0.991  0.682                     
I(age_15^2)  0.623 -0.894 -0.623              
ag_15:rt_t0  0.680 -0.998 -0.681  0.893       
r_0:I(_15^2 -0.620  0.891  0.621 -0.999 -0.893
fit warnings:
Some predictor variables are on very different scales: consider rescaling

So what we see here is that RT at t0 is very predictive of something about growth.

On the other hand, these models don’t take into account RT at t1 or t2. So they don’t tell us if RT is a stable trait that’s associated with growth…

Draft 2: Two-time points

So we try fitting two factor analyses and looking at coupling over time.

d_sub_s <- d_sub |>
  ungroup() |>
  select(dataset_name, subject_id, administration_id, age, 
         time_since_t0, delta_t, admin_num,
         log_rt, log_rt_var, 
         long_window_accuracy, long_window_acc_var, prod, comp, ) |>
  rename(acc = long_window_accuracy, 
         acc_sd = long_window_acc_var, 
         log_rt_sd = log_rt_var) |>
  ungroup() |>
  mutate(across(all_of(c("log_rt", "log_rt_sd", "acc", "acc_sd", 
                         "prod", "comp")), 
                       ~ age_scale(.x, age)))

Check on amount of data.

d_sub_wide <- d_sub_s |> 
  mutate(t = cut(time_since_t0, breaks = c(0, 3, 6, 30), 
                 include.lowest = TRUE)) |>
  filter(t != "(6,30]") |>
  mutate(t = as.numeric(t)) |>
  group_by(subject_id, dataset_name, t) |>
  summarise(across(all_of(c("log_rt", "log_rt_sd", "acc",
                            "acc_sd", "prod", "comp")),
                   ~ mean(.x, na.rm=TRUE))) |> 
  mutate(across(all_of(c("log_rt", "log_rt_sd", "acc",
                         "acc_sd", "prod", "comp")),
                ~ ifelse(is.nan(.x), NA, .x))) |>
  pivot_wider(id_cols = c("subject_id","dataset_name"), 
              names_from = "t",
              values_from = c("log_rt", "log_rt_sd", "acc", 
                              "acc_sd", "prod", "comp"), 
              names_prefix = "t")

`summarise()` has grouped output by 'subject_id', 'dataset_name'. You can
override using the `.groups` argument.

d_sub_wide |>
  ungroup() |>
  summarise(across(log_rt_t1:comp_t2, 
                   ~sum(!is.na(.x))))

# A tibble: 1 × 12
  log_rt_t1 log_rt_t2 log_rt_sd_t1 log_rt_sd_t2 acc_t1 acc_t2 acc_sd_t1
      <int>     <int>        <int>        <int>  <int>  <int>     <int>
1      1447       250         1447          250   1751    255      1751
# ℹ 5 more variables: acc_sd_t2 <int>, prod_t1 <int>, prod_t2 <int>,
#   comp_t1 <int>, comp_t2 <int>

fa3_model_long <- "
# measurement model
vocab_t1 =~ 1*prod_t1 + s1*comp_t1
accuracy_t1 =~ 1*acc_t1 + s2*acc_sd_t1
speed_t1 =~ 1*log_rt_t1 + s3*log_rt_sd_t1

vocab_t2 =~ 1*prod_t2 + s1*comp_t1
accuracy_t2 =~ 1*acc_t2 + s2*acc_sd_t2
speed_t2 =~ 1*log_rt_t2 + s3*log_rt_sd_t2

# longitudinal relationships
vocab_t2 ~ vocab_t1 + speed_t1 + accuracy_t1
speed_t2 ~ vocab_t1 + speed_t1 + accuracy_t1
accuracy_t2 ~ vocab_t1 + speed_t1 + accuracy_t1

# factor covariances
vocab_t1 ~~ accuracy_t1
vocab_t1 ~~ speed_t1
accuracy_t1 ~~ speed_t1

vocab_t2 ~~ accuracy_t2
vocab_t2 ~~ speed_t2
accuracy_t2 ~~ speed_t2

# means for the latents
vocab_t1 ~ 1
vocab_t2 ~ 1
accuracy_t1 ~ 1
accuracy_t2 ~ 1
speed_t1 ~ 1
speed_t2 ~ 1

# residual variance for the latents
vocab_t1 ~~ NA*vocab_t1
vocab_t2 ~~ NA*vocab_t2
accuracy_t1 ~~ NA*accuracy_t1
accuracy_t2 ~~ NA*accuracy_t2
speed_t1 ~~ NA*speed_t1
speed_t2 ~~ NA*speed_t2
"

fit3_long <- sem(fa3_model_long, d_sub_wide, std.lv=TRUE, missing='fiml')

Warning: lavaan->lav_data_full():  
   some cases are empty and will be ignored: 370 548 551 559 568 700 711 715 
   717 731 740 742 744 746 750 755 756 757 759 780 784 786 796 797 817 824 
   834 842 847 851 854 865 873 879 893 895 899 910 917 925 927 943 963 964 
   965 967 974 1045 1053 1062 1063 1064 1069 1074 1077 1080 1081 1084 1086 
   1093 1096 1101 1102 1107 1108 1113 1114 1122 1133 1135 1138 1139 1140 1142 
   1144 1145 1147 1153 1156 1162 1177 1178 1184 1185 1191 1194 1199 1207 1211 
   1215 1217 1218 1219 1225 1226 1229 1230 1231 1246 1250 1256 1259 1261 1267 
   1279 1283 1284 1303 1315 1317 1318 1319 1321 1336 1338 1341 1386 1413 1415 
   1416 1511 1514 1860 1862 1865 1867 1869 1877 1878 1880 1881 1889 1890 1893 
   1901 1906 1910 1911 1912 1914 1916 1920 1924 1929 1930 1933 1934 1942 1949 
   1952 1958 1960 1963.

Warning: lavaan->lav_data_full():  
   due to missing values, some pairwise combinations have less than 10% 
   coverage; use lavInspect(fit, "coverage") to investigate.

Warning: lavaan->lav_mvnorm_missing_h1_estimate_moments():  
   Maximum number of iterations reached when computing the sample moments 
   using EM; use the em.h1.iter.max= argument to increase the number of 
   iterations

Warning: lavaan->lav_model_vcov():  
   The variance-covariance matrix of the estimated parameters (vcov) does not 
   appear to be positive definite! The smallest eigenvalue (= -2.079687e-16) 
   is smaller than zero. This may be a symptom that the model is not 
   identified.

Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative

summary(fit3_long, fit.measures=TRUE, standardize=TRUE)

lavaan 0.6-18 ended normally after 151 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        55
  Number of equality constraints                     3

                                                  Used       Total
  Number of observations                          1810        1963
  Number of missing patterns                        20            

Model Test User Model:
                                                      
  Test statistic                               137.177
  Degrees of freedom                                25
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              1892.286
  Degrees of freedom                                55
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.939
  Tucker-Lewis Index (TLI)                       0.866
                                                      
  Robust Comparative Fit Index (CFI)             0.913
  Robust Tucker-Lewis Index (TLI)                0.808

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)             -13157.842
  Loglikelihood unrestricted model (H1)     -13089.253
                                                      
  Akaike (AIC)                               26419.684
  Bayesian (BIC)                             26705.740
  Sample-size adjusted Bayesian (SABIC)      26540.539

Root Mean Square Error of Approximation:

  RMSEA                                          0.050
  90 Percent confidence interval - lower         0.042
  90 Percent confidence interval - upper         0.058
  P-value H_0: RMSEA <= 0.050                    0.500
  P-value H_0: RMSEA >= 0.080                    0.000
                                                      
  Robust RMSEA                                   0.170
  90 Percent confidence interval - lower         0.133
  90 Percent confidence interval - upper         0.209
  P-value H_0: Robust RMSEA <= 0.050             0.000
  P-value H_0: Robust RMSEA >= 0.080             1.000

Standardized Root Mean Square Residual:

  SRMR                                           0.093

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  vocab_t1 =~                                                           
    prod_t1           1.000                               2.056    1.124
    comp_t1   (s1)    0.316    0.029   10.776    0.000    0.649    0.397
  accuracy_t1 =~                                                        
    acc_t1            1.000                               0.950    0.797
    acc_sd_t1 (s2)   -0.729    0.041  -17.682    0.000   -0.692   -0.583
  speed_t1 =~                                                           
    log_rt_t1         1.000                               0.798    0.732
    lg_rt_s_1 (s3)    0.635    0.054   11.765    0.000    0.507    0.427
  vocab_t2 =~                                                           
    prod_t2           1.000                               2.598    0.994
    comp_t1   (s1)    0.316    0.029   10.776    0.000    0.820    0.501
  accuracy_t2 =~                                                        
    acc_t2            1.000                               0.949    0.841
    acc_sd_t2 (s2)   -0.729    0.041  -17.682    0.000   -0.692   -0.737
  speed_t2 =~                                                           
    log_rt_t2         1.000                               0.745    0.843
    lg_rt_s_2 (s3)    0.635    0.054   11.765    0.000    0.473    0.505

Regressions:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  vocab_t2 ~                                                            
    vocab_t1          0.826    0.184    4.487    0.000    0.654    0.654
    speed_t1         -0.260    0.442   -0.589    0.556   -0.080   -0.080
    accuracy_t1       0.607    0.489    1.240    0.215    0.222    0.222
  speed_t2 ~                                                            
    vocab_t1          0.058    0.049    1.187    0.235    0.160    0.160
    speed_t1          0.853    0.334    2.550    0.011    0.914    0.914
    accuracy_t1       0.016    0.297    0.053    0.957    0.020    0.020
  accuracy_t2 ~                                                         
    vocab_t1          0.041    0.063    0.646    0.518    0.088    0.088
    speed_t1         -0.532    0.405   -1.314    0.189   -0.447   -0.447
    accuracy_t1       0.177    0.369    0.480    0.631    0.177    0.177

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  vocab_t1 ~~                                                           
    accuracy_t1       0.951    0.083   11.393    0.000    0.487    0.487
    speed_t1         -0.511    0.070   -7.289    0.000   -0.311   -0.311
  accuracy_t1 ~~                                                        
    speed_t1         -0.613    0.036  -16.948    0.000   -0.809   -0.809
 .vocab_t2 ~~                                                           
   .accuracy_t2       0.151    0.099    1.516    0.129    0.141    0.141
   .speed_t2         -0.008    0.089   -0.090    0.928   -0.015   -0.015
 .accuracy_t2 ~~                                                        
   .speed_t2         -0.196    0.068   -2.900    0.004   -0.711   -0.711

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
    vocab_t1          0.116    0.045    2.582    0.010    0.056    0.056
   .vocab_t2          0.017    0.047    0.368    0.713    0.007    0.007
    accuracy_t1       0.022    0.021    1.037    0.300    0.023    0.023
   .accuracy_t2       0.045    0.044    1.023    0.306    0.047    0.047
    speed_t1          0.028    0.023    1.210    0.226    0.036    0.036
   .speed_t2         -0.001    0.030   -0.029    0.977   -0.001   -0.001
   .prod_t1          -1.132    0.028  -39.820    0.000   -1.132   -0.619
   .comp_t1           0.554    0.062    8.869    0.000    0.554    0.338
   .acc_t1           -0.220    0.019  -11.847    0.000   -0.220   -0.184
   .acc_sd_t1        -0.050    0.022   -2.308    0.021   -0.050   -0.042
   .log_rt_t1         0.004    0.023    0.168    0.866    0.004    0.004
   .log_rt_sd_t1     -0.041    0.027   -1.516    0.130   -0.041   -0.035
   .prod_t2           1.268    0.053   24.031    0.000    1.268    0.485
   .acc_t2            0.626    0.037   17.098    0.000    0.626    0.555
   .acc_sd_t2        -0.009    0.039   -0.231    0.817   -0.009   -0.010
   .log_rt_t2        -0.215    0.029   -7.470    0.000   -0.215   -0.243
   .log_rt_sd_t2     -0.274    0.045   -6.139    0.000   -0.274   -0.293

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
    vocab_t1          4.227    0.565    7.483    0.000    1.000    1.000
   .vocab_t2          2.124    1.013    2.096    0.036    0.315    0.315
    accuracy_t1       0.902    0.065   13.785    0.000    1.000    1.000
   .accuracy_t2       0.534    0.098    5.422    0.000    0.593    0.593
    speed_t1          0.637    0.063   10.044    0.000    1.000    1.000
   .speed_t2          0.142    0.091    1.574    0.115    0.257    0.257
   .prod_t1          -0.879    0.543   -1.619    0.106   -0.879   -0.263
   .comp_t1           0.747    0.135    5.531    0.000    0.747    0.279
   .acc_t1            0.519    0.052   10.083    0.000    0.519    0.365
   .acc_sd_t1         0.931    0.040   23.132    0.000    0.931    0.660
   .log_rt_t1         0.552    0.056    9.827    0.000    0.552    0.464
   .log_rt_sd_t1      1.152    0.049   23.738    0.000    1.152    0.818
   .prod_t2           0.076    0.683    0.112    0.911    0.076    0.011
   .acc_t2            0.372    0.069    5.351    0.000    0.372    0.292
   .acc_sd_t2         0.402    0.049    8.172    0.000    0.402    0.456
   .log_rt_t2         0.225    0.062    3.652    0.000    0.225    0.289
   .log_rt_sd_t2      0.654    0.062   10.472    0.000    0.654    0.745

layout_long = matrix(nrow=7, ncol = 7,
                     data = c("log_rt_sd_t1","log_rt_t1", "acc_t1", "acc_sd_t1", "prod_t1","comp_t1", NA,
                               NA, NA, NA, NA, NA, NA,  NA,
                              "speed_t1",NA,"accuracy_t1",NA,"vocab_t1",NA,  NA,
                              NA, NA, NA, NA, NA, NA,  NA,
                               NA,"speed_t2",NA,"accuracy_t2",NA,NA,"vocab_t2",
                               NA, NA, NA, NA, NA, NA,  NA,
                               NA,"log_rt_sd_t2","log_rt_t2", "acc_t2", "acc_sd_t2", "prod_t2", "comp_t2"), 
                     byrow = TRUE)
graph_sem(model = fit3_long, text_size = 2, layout = t(layout_long))

Draft 3: Full longitudinal model

d_sub_wide <- d_sub_s |> 
  mutate(t = cut(time_since_t0, breaks = c(0, 3, 6, 9, 12, 30), 
                 include.lowest = TRUE)) |>
  filter(t != "(12,30]") |>
  mutate(t = as.numeric(t)) |>
  group_by(subject_id, dataset_name, t) |>
  summarise(across(all_of(c("log_rt", "log_rt_sd", "acc",
                            "acc_sd", "prod", "comp")),
                   ~ mean(.x, na.rm=TRUE))) |> 
  mutate(across(all_of(c("log_rt", "log_rt_sd", "acc",
                         "acc_sd", "prod", "comp")),
                ~ ifelse(is.nan(.x), NA, .x))) |>
  pivot_wider(id_cols = c("subject_id","dataset_name"), 
              names_from = "t",
              values_from = c("log_rt", "log_rt_sd", "acc", 
                              "acc_sd", "prod", "comp"), 
              names_prefix = "t")

`summarise()` has grouped output by 'subject_id', 'dataset_name'. You can
override using the `.groups` argument.

d_sub_wide |>
  ungroup() |>
  summarise(across(log_rt_t1:comp_t4, 
                   ~sum(!is.na(.x))))

# A tibble: 1 × 24
  log_rt_t1 log_rt_t2 log_rt_t3 log_rt_t4 log_rt_sd_t1 log_rt_sd_t2 log_rt_sd_t3
      <int>     <int>     <int>     <int>        <int>        <int>        <int>
1      1447       250        84       169         1447          250           84
# ℹ 17 more variables: log_rt_sd_t4 <int>, acc_t1 <int>, acc_t2 <int>,
#   acc_t3 <int>, acc_t4 <int>, acc_sd_t1 <int>, acc_sd_t2 <int>,
#   acc_sd_t3 <int>, acc_sd_t4 <int>, prod_t1 <int>, prod_t2 <int>,
#   prod_t3 <int>, prod_t4 <int>, comp_t1 <int>, comp_t2 <int>, comp_t3 <int>,
#   comp_t4 <int>

with just the observed variables

slopes_model_long <- "

# regressions
accuracy_intercept =~ 1*acc_t1 + 1*acc_t2 + 1*acc_t3 + 1*acc_t4 
accuracy_slope =~ 1*acc_t1 + 2*acc_t2 + 3*acc_t3 + 4*acc_t4
speed_intercept =~ 1*log_rt_t1 + 1*log_rt_t2 + 1*log_rt_t3 + 1*log_rt_t4 
speed_slope =~ 1*log_rt_t1 + 2*log_rt_t2 + 3*log_rt_t3 + 4*log_rt_t4
vocab_intercept =~ 1*prod_t1 + 1*prod_t2 + 1*prod_t3 + 1*prod_t4 
vocab_slope =~ 1*prod_t1 + 2*prod_t2 + 3*prod_t3 + 4*prod_t4
"

slopes_long <- growth(slopes_model_long, d_sub_wide, std.lv=TRUE, missing='fiml')

Warning: lavaan->lav_data_full():  
   some cases are empty and will be ignored: 370 548 551 559 568 700 711 715 
   717 731 740 742 744 746 750 755 756 757 759 780 784 786 796 797 817 824 
   834 842 847 851 854 865 873 879 893 895 899 910 917 925 927 943 963 964 
   965 967 974 1045 1053 1062 1063 1064 1069 1074 1077 1080 1081 1084 1086 
   1093 1096 1101 1102 1107 1108 1113 1114 1122 1133 1135 1138 1139 1140 1142 
   1144 1145 1147 1153 1156 1162 1177 1178 1184 1185 1191 1194 1199 1207 1211 
   1215 1217 1218 1219 1225 1226 1229 1230 1231 1246 1250 1256 1259 1261 1267 
   1279 1283 1284 1303 1315 1317 1318 1319 1321 1336 1338 1341 1386 1413 1415 
   1416 1511 1514 1860 1862 1865 1867 1869 1877 1878 1880 1881 1889 1890 1893 
   1901 1906 1910 1911 1912 1914 1916 1920 1924 1929 1930 1933 1934 1942 1949 
   1952 1958 1960 1963.

Warning: lavaan->lav_data_full():  
   due to missing values, some pairwise combinations have less than 10% 
   coverage; use lavInspect(fit, "coverage") to investigate.

Warning: lavaan->lav_mvnorm_missing_h1_estimate_moments():  
   Maximum number of iterations reached when computing the sample moments 
   using EM; use the em.h1.iter.max= argument to increase the number of 
   iterations

Warning: lavaan->lav_object_post_check():  
   covariance matrix of latent variables is not positive definite ; use 
   lavInspect(fit, "cov.lv") to investigate.

summary(slopes_long, fit.measures=TRUE, standardize=TRUE)

lavaan 0.6-18 ended normally after 119 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        33

                                                  Used       Total
  Number of observations                          1810        1963
  Number of missing patterns                        27            

Model Test User Model:
                                                      
  Test statistic                               595.139
  Degrees of freedom                                57
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              1168.013
  Degrees of freedom                                66
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.512
  Tucker-Lewis Index (TLI)                       0.435
                                                      
  Robust Comparative Fit Index (CFI)             0.000
  Robust Tucker-Lewis Index (TLI)               -0.173

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -8508.790
  Loglikelihood unrestricted model (H1)      -8211.221
                                                      
  Akaike (AIC)                               17083.580
  Bayesian (BIC)                             17265.116
  Sample-size adjusted Bayesian (SABIC)      17160.277

Root Mean Square Error of Approximation:

  RMSEA                                          0.072
  90 Percent confidence interval - lower         0.067
  90 Percent confidence interval - upper         0.078
  P-value H_0: RMSEA <= 0.050                    0.000
  P-value H_0: RMSEA >= 0.080                    0.008
                                                      
  Robust RMSEA                                   0.390
  90 Percent confidence interval - lower         0.338
  90 Percent confidence interval - upper         0.443
  P-value H_0: Robust RMSEA <= 0.050             0.000
  P-value H_0: Robust RMSEA >= 0.080             1.000

Standardized Root Mean Square Residual:

  SRMR                                           4.588

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                        Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  accuracy_intercept =~                                                      
    acc_t1                 1.000                               1.000    0.836
    acc_t2                 1.000                               1.000    0.538
    acc_t3                 1.000                               1.000    0.384
    acc_t4                 1.000                               1.000    0.282
  accuracy_slope =~                                                          
    acc_t1                 1.000                               1.000    0.836
    acc_t2                 2.000                               2.000    1.076
    acc_t3                 3.000                               3.000    1.151
    acc_t4                 4.000                               4.000    1.127
  speed_intercept =~                                                         
    log_rt_t1              1.000                               1.000    0.928
    log_rt_t2              1.000                               1.000    0.659
    log_rt_t3              1.000                               1.000    0.421
    log_rt_t4              1.000                               1.000    0.300
  speed_slope =~                                                             
    log_rt_t1              1.000                               1.000    0.928
    log_rt_t2              2.000                               2.000    1.318
    log_rt_t3              3.000                               3.000    1.263
    log_rt_t4              4.000                               4.000    1.199
  vocab_intercept =~                                                         
    prod_t1                1.000                               1.000    0.489
    prod_t2                1.000                               1.000    0.316
    prod_t3                1.000                               1.000    0.243
    prod_t4                1.000                               1.000    0.201
  vocab_slope =~                                                             
    prod_t1                1.000                               1.000    0.489
    prod_t2                2.000                               2.000    0.632
    prod_t3                3.000                               3.000    0.730
    prod_t4                4.000                               4.000    0.805

Covariances:
                        Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  accuracy_intercept ~~                                                      
    accuracy_slope        -0.604    0.044  -13.858    0.000   -0.604   -0.604
    speed_intercpt        -0.900    0.094   -9.578    0.000   -0.900   -0.900
    speed_slope            0.542    0.070    7.777    0.000    0.542    0.542
    vocab_intercpt        -0.251    0.093   -2.704    0.007   -0.251   -0.251
    vocab_slope           -0.290    0.075   -3.882    0.000   -0.290   -0.290
  accuracy_slope ~~                                                          
    speed_intercpt         0.789    0.053   14.770    0.000    0.789    0.789
    speed_slope           -0.992    0.018  -56.360    0.000   -0.992   -0.992
    vocab_intercpt         0.854    0.061   14.107    0.000    0.854    0.854
    vocab_slope            0.903    0.025   35.792    0.000    0.903    0.903
  speed_intercept ~~                                                         
    speed_slope           -0.755    0.027  -27.590    0.000   -0.755   -0.755
    vocab_intercpt         0.715    0.071   10.048    0.000    0.715    0.715
    vocab_slope            0.524    0.054    9.735    0.000    0.524    0.524
  speed_slope ~~                                                             
    vocab_intercpt        -0.925    0.054  -17.201    0.000   -0.925   -0.925
    vocab_slope           -0.921    0.018  -50.305    0.000   -0.921   -0.921
  vocab_intercept ~~                                                         
    vocab_slope            0.930    0.088   10.615    0.000    0.930    0.930

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
    accurcy_ntrcpt   -0.945    0.057  -16.563    0.000   -0.945   -0.945
    accuracy_slope    0.771    0.050   15.306    0.000    0.771    0.771
    speed_intercpt    0.567    0.054   10.475    0.000    0.567    0.567
    speed_slope      -0.533    0.047  -11.417    0.000   -0.533   -0.533
    vocab_intercpt   -2.961    0.062  -47.989    0.000   -2.961   -2.961
    vocab_slope       1.901    0.051   37.513    0.000    1.901    1.901

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .acc_t1            0.638    0.078    8.195    0.000    0.638    0.446
   .acc_t2            0.871    0.090    9.626    0.000    0.871    0.252
   .acc_t3            0.417    0.108    3.866    0.000    0.417    0.061
   .acc_t4            0.430    0.171    2.514    0.012    0.430    0.034
   .log_rt_t1         0.672    0.056   12.096    0.000    0.672    0.578
   .log_rt_t2         0.323    0.042    7.716    0.000    0.323    0.140
   .log_rt_t3         0.169    0.058    2.908    0.004    0.169    0.030
   .log_rt_t4         0.173    0.088    1.960    0.050    0.173    0.016
   .prod_t1           0.319    0.061    5.220    0.000    0.319    0.076
   .prod_t2           1.285    0.136    9.465    0.000    1.285    0.128
   .prod_t3           1.287    0.310    4.147    0.000    1.287    0.076
   .prod_t4           0.253    0.195    1.294    0.196    0.253    0.010
    accurcy_ntrcpt    1.000                               1.000    1.000
    accuracy_slope    1.000                               1.000    1.000
    speed_intercpt    1.000                               1.000    1.000
    speed_slope       1.000                               1.000    1.000
    vocab_intercpt    1.000                               1.000    1.000
    vocab_slope       1.000                               1.000    1.000

layout <- read.csv(here("misc/layout_slopes.csv"), header = FALSE)


graph_sem(model = slopes_long, text_size = 3, layout=t(layout))

This is still linear. We can add quadratic?

d_sub_wide |>
  select(subject_id, prod_t1, prod_t2, prod_t3, prod_t4) |>
  pivot_longer(starts_with("prod"), names_to = "variable", values_to = "prod") |>
  mutate(t = as.numeric(str_sub(variable, -1))) |>
  ggplot(aes(x = t, y = prod)) + 
  geom_line(aes(group = subject_id), alpha = .1) + 
  geom_smooth(aes(group = 1), method = "lm") + 
  geom_smooth(aes(group = 1), method = "lm", formula = y ~ poly(x, 2), col = "green")

Adding missing grouping variables: `dataset_name`
`geom_smooth()` using formula = 'y ~ x'

Warning: Removed 6564 rows containing non-finite outside the scale range
(`stat_smooth()`).
Removed 6564 rows containing non-finite outside the scale range
(`stat_smooth()`).

Warning: Removed 6385 rows containing missing values or values outside the scale range
(`geom_line()`).

But it seems unnecessary for this view of the data. Arguably we’re losing a lot by doing it this way.

try finer-grained rebinning

Let’s try putting prod back to its natural units?

d_sub_wide_fine <- d_sub_s |>
  mutate(t = cut(time_since_t0, breaks = c(0, 2, 4, 6, 8, 10, 12, 14, 16, 30), 
                 include.lowest = TRUE)) |>
  filter(t != "(12,30]") |>
  mutate(t = as.numeric(t)) |>
  group_by(subject_id, dataset_name, t) |>
  summarise(across(all_of(c("log_rt", "log_rt_sd", "acc",
                            "acc_sd", "prod", "comp")),
                   ~ mean(.x, na.rm=TRUE))) |> 
  mutate(across(all_of(c("log_rt", "log_rt_sd", "acc",
                         "acc_sd", "prod", "comp")),
                ~ ifelse(is.nan(.x), NA, .x))) |>
  pivot_wider(id_cols = c("subject_id","dataset_name"), 
              names_from = "t",
              values_from = c("log_rt", "log_rt_sd", "acc", 
                              "acc_sd", "prod", "comp"), 
              names_prefix = "t")

`summarise()` has grouped output by 'subject_id', 'dataset_name'. You can
override using the `.groups` argument.

d_sub_wide_fine |>
  ungroup() |>
  summarise(across(log_rt_t1:comp_t4, 
                   ~sum(!is.na(.x))))

# A tibble: 1 × 48
  log_rt_t1 log_rt_t3 log_rt_t4 log_rt_t9 log_rt_t2 log_rt_t5 log_rt_t8
      <int>     <int>     <int>     <int>     <int>     <int>     <int>
1      1443       238        73        55        90        81        16
# ℹ 41 more variables: log_rt_t7 <int>, log_rt_t6 <int>, log_rt_sd_t1 <int>,
#   log_rt_sd_t3 <int>, log_rt_sd_t4 <int>, log_rt_sd_t9 <int>,
#   log_rt_sd_t2 <int>, log_rt_sd_t5 <int>, log_rt_sd_t8 <int>,
#   log_rt_sd_t7 <int>, log_rt_sd_t6 <int>, acc_t1 <int>, acc_t3 <int>,
#   acc_t4 <int>, acc_t9 <int>, acc_t2 <int>, acc_t5 <int>, acc_t8 <int>,
#   acc_t7 <int>, acc_t6 <int>, acc_sd_t1 <int>, acc_sd_t3 <int>,
#   acc_sd_t4 <int>, acc_sd_t9 <int>, acc_sd_t2 <int>, acc_sd_t5 <int>, …

d_sub_wide_fine |>
  select(subject_id, starts_with("prod")) |>
  pivot_longer(starts_with("prod"), names_to = "variable", values_to = "prod") |>
  mutate(t = as.numeric(str_sub(variable, -1))) |>
  ggplot(aes(x = t, y = prod)) + 
  geom_line(aes(group = subject_id), alpha = .1)

Adding missing grouping variables: `dataset_name`

Warning: Removed 15694 rows containing missing values or values outside the scale range
(`geom_line()`).

slopes_model_long <- "

# regressions
accuracy_intercept =~ 1*acc_t1 + 1*acc_t2 + 1*acc_t3 + 1*acc_t4 + 1*acc_t5 
accuracy_slope =~ 1*acc_t1 + 2*acc_t2 + 3*acc_t3 + 4*acc_t4 + 5*acc_t5 
speed_intercept =~ 1*log_rt_t1 + 1*log_rt_t2 + 1*log_rt_t3 + 1*log_rt_t4 + 1*log_rt_t5 
speed_slope =~ 1*log_rt_t1 + 2*log_rt_t2 + 3*log_rt_t3 + 4*log_rt_t4 + 5*log_rt_t5
vocab_intercept =~ 1*prod_t1 + 1*prod_t2 + 1*prod_t3 + 1*prod_t4 + 1*prod_t5 
vocab_slope =~ 1*prod_t1 + 2*prod_t2 + 3*prod_t3 + 4*prod_t4 + 5*prod_t5 
"

slopes_long <- growth(slopes_model_long, d_sub_wide_fine, std.lv=TRUE, missing='fiml')

Warning: lavaan->lav_data_full():  
   some cases are empty and will be ignored: 370 548 551 559 568 700 711 715 
   717 731 740 742 744 746 750 755 756 757 759 780 784 786 796 797 817 824 
   834 842 847 851 854 865 873 879 893 895 899 910 917 925 927 943 963 964 
   965 967 974 1045 1053 1062 1063 1064 1069 1074 1077 1080 1081 1084 1086 
   1093 1096 1101 1102 1107 1108 1113 1114 1122 1133 1135 1138 1139 1140 1142 
   1144 1145 1147 1153 1156 1162 1177 1178 1184 1185 1191 1194 1199 1207 1211 
   1215 1217 1218 1219 1225 1226 1229 1230 1231 1246 1250 1256 1259 1261 1267 
   1279 1283 1284 1303 1315 1317 1318 1319 1321 1336 1338 1341 1386 1413 1415 
   1416 1511 1514 1860 1862 1865 1867 1869 1877 1878 1880 1881 1889 1890 1893 
   1901 1906 1910 1911 1912 1914 1916 1920 1924 1929 1930 1933 1934 1942 1949 
   1952 1958 1960 1963.

Warning: lavaan->lav_data_full():  
   due to missing values, some pairwise combinations have less than 10% 
   coverage; use lavInspect(fit, "coverage") to investigate.

Warning: lavaan->lav_mvnorm_missing_h1_estimate_moments():  
   Maximum number of iterations reached when computing the sample moments 
   using EM; use the em.h1.iter.max= argument to increase the number of 
   iterations

Warning: lavaan->lav_object_post_check():  
   covariance matrix of latent variables is not positive definite ; use 
   lavInspect(fit, "cov.lv") to investigate.

summary(slopes_long, fit.measures=TRUE, standardize=TRUE)

lavaan 0.6-18 ended normally after 106 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        36

                                                  Used       Total
  Number of observations                          1810        1963
  Number of missing patterns                        44            

Model Test User Model:
                                                      
  Test statistic                               645.228
  Degrees of freedom                                99
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              1233.601
  Degrees of freedom                               105
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.516
  Tucker-Lewis Index (TLI)                       0.487
                                                      
  Robust Comparative Fit Index (CFI)             0.000
  Robust Tucker-Lewis Index (TLI)               -0.138

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -8386.455
  Loglikelihood unrestricted model (H1)      -8063.841
                                                      
  Akaike (AIC)                               16844.911
  Bayesian (BIC)                             17042.950
  Sample-size adjusted Bayesian (SABIC)      16928.579

Root Mean Square Error of Approximation:

  RMSEA                                          0.055
  90 Percent confidence interval - lower         0.051
  90 Percent confidence interval - upper         0.059
  P-value H_0: RMSEA <= 0.050                    0.017
  P-value H_0: RMSEA >= 0.080                    0.000
                                                      
  Robust RMSEA                                   0.460
  90 Percent confidence interval - lower         0.426
  90 Percent confidence interval - upper         0.495
  P-value H_0: Robust RMSEA <= 0.050             0.000
  P-value H_0: Robust RMSEA >= 0.080             1.000

Standardized Root Mean Square Residual:

  SRMR                                           6.905

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                        Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  accuracy_intercept =~                                                      
    acc_t1                 1.000                               1.000    0.834
    acc_t2                 1.000                               1.000    0.513
    acc_t3                 1.000                               1.000    0.368
    acc_t4                 1.000                               1.000    0.278
    acc_t5                 1.000                               1.000    0.218
  accuracy_slope =~                                                          
    acc_t1                 1.000                               1.000    0.834
    acc_t2                 2.000                               2.000    1.026
    acc_t3                 3.000                               3.000    1.104
    acc_t4                 4.000                               4.000    1.112
    acc_t5                 5.000                               5.000    1.088
  speed_intercept =~                                                         
    log_rt_t1              1.000                               1.000    0.923
    log_rt_t2              1.000                               1.000    0.621
    log_rt_t3              1.000                               1.000    0.407
    log_rt_t4              1.000                               1.000    0.295
    log_rt_t5              1.000                               1.000    0.230
  speed_slope =~                                                             
    log_rt_t1              1.000                               1.000    0.923
    log_rt_t2              2.000                               2.000    1.243
    log_rt_t3              3.000                               3.000    1.222
    log_rt_t4              4.000                               4.000    1.181
    log_rt_t5              5.000                               5.000    1.148
  vocab_intercept =~                                                         
    prod_t1                1.000                               1.000    0.496
    prod_t2                1.000                               1.000    0.322
    prod_t3                1.000                               1.000    0.250
    prod_t4                1.000                               1.000    0.200
    prod_t5                1.000                               1.000    0.167
  vocab_slope =~                                                             
    prod_t1                1.000                               1.000    0.496
    prod_t2                2.000                               2.000    0.645
    prod_t3                3.000                               3.000    0.749
    prod_t4                4.000                               4.000    0.802
    prod_t5                5.000                               5.000    0.835

Covariances:
                        Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  accuracy_intercept ~~                                                      
    accuracy_slope        -0.549    0.043  -12.836    0.000   -0.549   -0.549
    speed_intercpt        -0.822    0.084   -9.839    0.000   -0.822   -0.822
    speed_slope            0.499    0.060    8.307    0.000    0.499    0.499
    vocab_intercpt        -0.308    0.077   -3.973    0.000   -0.308   -0.308
    vocab_slope           -0.262    0.067   -3.933    0.000   -0.262   -0.262
  accuracy_slope ~~                                                          
    speed_intercpt         0.747    0.050   15.056    0.000    0.747    0.747
    speed_slope           -0.999    0.009 -112.000    0.000   -0.999   -0.999
    vocab_intercpt         0.883    0.058   15.155    0.000    0.883    0.883
    vocab_slope            0.912    0.020   45.223    0.000    0.912    0.912
  speed_intercept ~~                                                         
    speed_slope           -0.720    0.030  -24.213    0.000   -0.720   -0.720
    vocab_intercpt         0.636    0.064   10.009    0.000    0.636    0.636
    vocab_slope            0.556    0.051   10.970    0.000    0.556    0.556
  speed_slope ~~                                                             
    vocab_intercpt        -0.929    0.053  -17.405    0.000   -0.929   -0.929
    vocab_slope           -0.934    0.016  -58.974    0.000   -0.934   -0.934
  vocab_intercept ~~                                                         
    vocab_slope            0.936    0.081   11.556    0.000    0.936    0.936

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
    accurcy_ntrcpt   -0.962    0.053  -18.004    0.000   -0.962   -0.962
    accuracy_slope    0.765    0.048   15.951    0.000    0.765    0.765
    speed_intercpt    0.631    0.052   12.087    0.000    0.631    0.631
    speed_slope      -0.574    0.045  -12.736    0.000   -0.574   -0.574
    vocab_intercpt   -2.606    0.055  -47.561    0.000   -2.606   -2.606
    vocab_slope       1.507    0.047   32.274    0.000    1.507    1.507

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .acc_t1            0.535    0.074    7.192    0.000    0.535    0.372
   .acc_t2            0.995    0.159    6.243    0.000    0.995    0.262
   .acc_t3            0.672    0.080    8.370    0.000    0.672    0.091
   .acc_t4            0.328    0.103    3.187    0.001    0.328    0.025
   .acc_t5            0.623    0.160    3.890    0.000    0.623    0.029
   .log_rt_t1         0.614    0.059   10.402    0.000    0.614    0.523
   .log_rt_t2         0.468    0.092    5.094    0.000    0.468    0.181
   .log_rt_t3         0.351    0.055    6.417    0.000    0.351    0.058
   .log_rt_t4         0.230    0.067    3.429    0.001    0.230    0.020
   .log_rt_t5         0.171    0.086    1.977    0.048    0.171    0.009
   .prod_t1           0.189    0.056    3.399    0.001    0.189    0.047
   .prod_t2           0.879    0.177    4.968    0.000    0.879    0.091
   .prod_t3           0.445    0.086    5.180    0.000    0.445    0.028
   .prod_t4           0.411    0.181    2.266    0.023    0.411    0.017
   .prod_t5           0.515    0.180    2.859    0.004    0.515    0.014
    accurcy_ntrcpt    1.000                               1.000    1.000
    accuracy_slope    1.000                               1.000    1.000
    speed_intercpt    1.000                               1.000    1.000
    speed_slope       1.000                               1.000    1.000
    vocab_intercpt    1.000                               1.000    1.000
    vocab_slope       1.000                               1.000    1.000

layout <- read.csv(here("misc/layout_slopes_5.csv"), header = FALSE)

Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
incomplete final line found by readTableHeader on
'/Users/mcfrank/Projects/peekbank/peekbank-development/misc/layout_slopes_5.csv'

graph_sem(model = slopes_long, text_size = 3, layout=t(layout))

with lots of latent factor structure

This one still seems not quite right.

fa3_model_long <- "
# measurement model
vocab_t1 =~ 1*prod_t1 + s1*comp_t1
accuracy_t1 =~ 1*acc_t1 + s2*acc_sd_t1
speed_t1 =~ 1*log_rt_t1 + s3*log_rt_sd_t1

vocab_t2 =~ 1*prod_t2 
accuracy_t2 =~ 1*acc_t2 + s2*acc_sd_t2
speed_t2 =~ 1*log_rt_t2 + s3*log_rt_sd_t2

vocab_t3 =~ 1*prod_t3 
accuracy_t3 =~ 1*acc_t3 + s2*acc_sd_t3
speed_t3 =~ 1*log_rt_t3 + s3*log_rt_sd_t3

vocab_t4 =~ 1*prod_t4 
accuracy_t4 =~ 1*acc_t4 + s2*acc_sd_t4
speed_t4 =~ 1*log_rt_t4 + s3*log_rt_sd_t4

# factor covariances
# vocab_t1 ~~ accuracy_t1
# vocab_t1 ~~ speed_t1
# accuracy_t1 ~~ speed_t1
# 
# vocab_t2 ~~ accuracy_t2
# vocab_t2 ~~ speed_t2
# accuracy_t2 ~~ speed_t2
# 
# vocab_t3 ~~ accuracy_t3
# vocab_t3 ~~ speed_t3
# accuracy_t3 ~~ speed_t3
# 
# vocab_t4 ~~ accuracy_t4
# vocab_t4 ~~ speed_t4
# accuracy_t4 ~~ speed_t4

# means for the latents
# vocab_t1 ~ 1
# vocab_t2 ~ 1
# vocab_t3 ~ 1
# vocab_t4 ~ 1
# accuracy_t1 ~ 1
# accuracy_t2 ~ 1
# accuracy_t3 ~ 1
# accuracy_t4 ~ 1
# speed_t1 ~ 1
# speed_t2 ~ 1
# speed_t3 ~ 1
# speed_t4 ~ 1

# autoregression for the latents
# vocab_t2 ~ 1 * vocab_t1
# vocab_t3 ~ 1 * vocab_t2
# vocab_t4 ~ 1 * vocab_t3
# accuracy_t2 ~ 1 * accuracy_t1
# accuracy_t3 ~ 1 * accuracy_t2
# accuracy_t4 ~ 1 * accuracy_t3
# speed_t2 ~ 1 * speed_t1
# speed_t3 ~ 1 * speed_t2
# speed_t4 ~ 1 * speed_t3

# regressions
accuracy_intercept =~ 1*accuracy_t1 + 1*accuracy_t2 + 1*accuracy_t3 + 1*accuracy_t4 
accuracy_slope =~ 1*accuracy_t1 + 2*accuracy_t2 + 3*accuracy_t3 + 4*accuracy_t4
speed_intercept =~ 1*speed_t1 + 1*speed_t2 + 1*speed_t3 + 1*speed_t4 
speed_slope =~ 1*speed_t1 + 2*speed_t2 + 3*speed_t3 + 4*speed_t4
vocab_intercept =~ 1*vocab_t1 + 1*vocab_t2 + 1*vocab_t3 + 1*vocab_t4 
vocab_slope =~ 1*vocab_t1 + 2*vocab_t2 + 3*vocab_t3 + 4*vocab_t4

# parameters for the regressions
vocab_intercept ~ 1
vocab_slope ~ 1
accuracy_intercept ~ 1
accuracy_slope ~ 1
speed_intercept ~ 1
speed_slope ~ 1

# residual variance for the latents
vocab_t1 ~~ NA*vocab_t1
vocab_t2 ~~ NA*vocab_t2
vocab_t3 ~~ NA*vocab_t3
vocab_t4 ~~ NA*vocab_t4
accuracy_t1 ~~ NA*accuracy_t1
accuracy_t2 ~~ NA*accuracy_t2
accuracy_t3 ~~ NA*accuracy_t3
accuracy_t4 ~~ NA*accuracy_t4
speed_t1 ~~ NA*speed_t1
speed_t2 ~~ NA*speed_t2
speed_t3 ~~ NA*speed_t3
speed_t4 ~~ NA*speed_t4

# residual variance for the other latents
vocab_intercept ~~ NA*vocab_intercept
vocab_slope ~~ NA*vocab_slope
accuracy_intercept ~~ NA*accuracy_intercept
accuracy_slope ~~ NA*accuracy_slope
speed_intercept ~~ NA*speed_intercept
speed_slope ~~ NA*speed_slope

# set observed intercepts to zero - these should go into the latents
prod_t1 ~ 0
prod_t2 ~ 0
prod_t3 ~ 0
prod_t4 ~ 0
comp_t1 ~ 0 
log_rt_t1 ~ 0
log_rt_t2 ~ 0
log_rt_t3 ~ 0
log_rt_t4 ~ 0
log_rt_sd_t1 ~ 0 
log_rt_sd_t2 ~ 0
log_rt_sd_t3 ~ 0 
log_rt_sd_t4 ~ 0 
acc_t1 ~ 0 
acc_t2 ~ 0 
acc_t3 ~ 0 
acc_t4 ~ 0 
acc_sd_t1 ~ 0
acc_sd_t2 ~ 0
acc_sd_t3 ~ 0
acc_sd_t4 ~ 0
"

fit3_long <- sem(fa3_model_long, d_sub_wide, std.lv=TRUE, missing='fiml')

Warning: lavaan->lav_data_full():  
   some cases are empty and will be ignored: 370 548 551 559 568 700 711 715 
   717 731 740 742 744 746 750 755 756 757 759 780 784 786 796 797 817 824 
   834 842 847 851 854 865 873 879 893 895 899 910 917 925 927 943 963 964 
   965 967 974 1045 1053 1062 1063 1064 1069 1074 1077 1080 1081 1084 1086 
   1093 1096 1101 1102 1107 1108 1113 1114 1122 1133 1135 1138 1139 1140 1142 
   1144 1145 1147 1153 1156 1162 1177 1178 1184 1185 1191 1194 1199 1207 1211 
   1215 1217 1218 1219 1225 1226 1229 1230 1231 1246 1250 1256 1259 1261 1267 
   1279 1283 1284 1303 1315 1317 1318 1319 1321 1336 1338 1341 1386 1413 1415 
   1416 1511 1514 1860 1862 1865 1867 1869 1877 1878 1880 1881 1889 1890 1893 
   1901 1906 1910 1911 1912 1914 1916 1920 1924 1929 1930 1933 1934 1942 1949 
   1952 1958 1960 1963.

Warning: lavaan->lav_data_full():  
   due to missing values, some pairwise combinations have less than 10% 
   coverage; use lavInspect(fit, "coverage") to investigate.

Warning: lavaan->lav_mvnorm_missing_h1_estimate_moments():  
   Maximum number of iterations reached when computing the sample moments 
   using EM; use the em.h1.iter.max= argument to increase the number of 
   iterations

Warning: lavaan->lav_object_post_check():  
   some estimated ov variances are negative

Warning: lavaan->lav_object_post_check():  
   some estimated lv variances are negative

summary(fit3_long, fit.measures=TRUE, standardize=TRUE)

lavaan 0.6-18 ended normally after 187 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        66
  Number of equality constraints                     6

                                                  Used       Total
  Number of observations                          1810        1963
  Number of missing patterns                        38            

Model Test User Model:
                                                      
  Test statistic                               881.440
  Degrees of freedom                               192
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              2723.615
  Degrees of freedom                               210
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.726
  Tucker-Lewis Index (TLI)                       0.700
                                                      
  Robust Comparative Fit Index (CFI)             1.000
  Robust Tucker-Lewis Index (TLI)               -0.736

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)             -14894.272
  Loglikelihood unrestricted model (H1)     -14453.552
                                                      
  Akaike (AIC)                               29908.544
  Bayesian (BIC)                             30238.609
  Sample-size adjusted Bayesian (SABIC)      30047.992

Root Mean Square Error of Approximation:

  RMSEA                                          0.045
  90 Percent confidence interval - lower         0.042
  90 Percent confidence interval - upper         0.048
  P-value H_0: RMSEA <= 0.050                    0.999
  P-value H_0: RMSEA >= 0.080                    0.000
                                                      
  Robust RMSEA                                   0.000
  90 Percent confidence interval - lower         0.000
  90 Percent confidence interval - upper         0.000
  P-value H_0: Robust RMSEA <= 0.050             1.000
  P-value H_0: Robust RMSEA >= 0.080             0.000

Standardized Root Mean Square Residual:

  SRMR                                           0.186

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Observed
  Observed information based on                Hessian

Latent Variables:
                        Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  vocab_t1 =~                                                                
    prod_t1                1.000                               2.546    1.391
    comp_t1   (s1)         0.154    0.026    5.827    0.000    0.392    0.367
  accuracy_t1 =~                                                             
    acc_t1                 1.000                               1.079    0.901
    acc_sd_t1 (s2)        -0.502    0.037  -13.466    0.000   -0.541   -0.465
  speed_t1 =~                                                                
    log_rt_t1              1.000                               0.822    0.752
    lg_rt_s_1 (s3)         0.578    0.049   11.802    0.000    0.475    0.402
  vocab_t2 =~                                                                
    prod_t2                1.000                               1.823    1.000
  accuracy_t2 =~                                                             
    acc_t2                 1.000                               0.989    0.843
    acc_sd_t2 (s2)        -0.502    0.037  -13.466    0.000   -0.496   -0.547
  speed_t2 =~                                                                
    log_rt_t2              1.000                               0.778    0.892
    lg_rt_s_2 (s3)         0.578    0.049   11.802    0.000    0.450    0.477
  vocab_t3 =~                                                                
    prod_t3                1.000                               1.947    1.000
  accuracy_t3 =~                                                             
    acc_t3                 1.000                               1.010    1.132
    acc_sd_t3 (s2)        -0.502    0.037  -13.466    0.000   -0.507   -0.525
  speed_t3 =~                                                                
    log_rt_t3              1.000                               0.762    0.890
    lg_rt_s_3 (s3)         0.578    0.049   11.802    0.000    0.440    0.467
  vocab_t4 =~                                                                
    prod_t4                1.000                               2.118    1.000
  accuracy_t4 =~                                                             
    acc_t4                 1.000                               1.201    1.197
    acc_sd_t4 (s2)        -0.502    0.037  -13.466    0.000   -0.602   -0.573
  speed_t4 =~                                                                
    log_rt_t4              1.000                               0.928    1.061
    lg_rt_s_4 (s3)         0.578    0.049   11.802    0.000    0.536    0.538
  accuracy_intercept =~                                                      
    accrcy_t1              1.000                               0.856    0.856
    accrcy_t2              1.000                               0.934    0.934
    accrcy_t3              1.000                               0.914    0.914
    accrcy_t4              1.000                               0.769    0.769
  accuracy_slope =~                                                          
    accrcy_t1              1.000                               0.178    0.178
    accrcy_t2              2.000                               0.389    0.389
    accrcy_t3              3.000                               0.571    0.571
    accrcy_t4              4.000                               0.641    0.641
  speed_intercept =~                                                         
    speed_t1               1.000                               0.958    0.958
    speed_t2               1.000                               1.012    1.012
    speed_t3               1.000                               1.033    1.033
    speed_t4               1.000                               0.849    0.849
  speed_slope =~                                                             
    speed_t1               1.000                               0.198    0.198
    speed_t2               2.000                               0.419    0.419
    speed_t3               3.000                               0.642    0.642
    speed_t4               4.000                               0.703    0.703
  vocab_intercept =~                                                         
    vocab_t1               1.000                               0.057    0.057
    vocab_t2               1.000                               0.080    0.080
    vocab_t3               1.000                               0.075    0.075
    vocab_t4               1.000                               0.069    0.069
  vocab_slope =~                                                             
    vocab_t1               1.000                                  NA       NA
    vocab_t2               2.000                                  NA       NA
    vocab_t3               3.000                                  NA       NA
    vocab_t4               4.000                                  NA       NA

Covariances:
                        Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  accuracy_intercept ~~                                                      
    accuracy_slope        -0.130    0.092   -1.417    0.157   -0.733   -0.733
    speed_intercpt        -0.852    0.076  -11.143    0.000   -1.172   -1.172
    speed_slope            0.125    0.041    3.047    0.002    0.830    0.830
    vocab_intercpt         1.057    0.135    7.834    0.000    7.887    7.887
    vocab_slope            0.107    0.062    1.743    0.081    0.308    0.308
  accuracy_slope ~~                                                          
    speed_intercpt         0.156    0.033    4.722    0.000    1.027    1.027
    speed_slope           -0.042    0.012   -3.466    0.001   -1.341   -1.341
    vocab_intercpt        -0.221    0.058   -3.787    0.000   -7.916   -7.916
    vocab_slope           -0.004    0.015   -0.251    0.802   -0.054   -0.054
  speed_intercept ~~                                                         
    speed_slope           -0.062    0.048   -1.311    0.190   -0.485   -0.485
    vocab_intercpt        -0.373    0.098   -3.799    0.000   -3.259   -3.259
    vocab_slope           -0.127    0.038   -3.299    0.001   -0.426   -0.426
  speed_slope ~~                                                             
    vocab_intercpt         0.025    0.047    0.521    0.602    1.044    1.044
    vocab_slope            0.005    0.010    0.503    0.615    0.079    0.079
  vocab_intercept ~~                                                         
    vocab_slope            0.698    0.165    4.232    0.000   12.725   12.725

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
    vocab_intercpt   -2.488    0.076  -32.782    0.000  -17.141  -17.141
    vocab_slope       1.461    0.042   34.877    0.000       NA       NA
    accurcy_ntrcpt   -0.560    0.047  -11.808    0.000   -0.606   -0.606
    accuracy_slope    0.407    0.029   14.034    0.000    2.114    2.114
    speed_intercpt    0.179    0.040    4.519    0.000    0.227    0.227
    speed_slope      -0.167    0.023   -7.365    0.000   -1.023   -1.023
   .prod_t1           0.000                               0.000    0.000
   .prod_t2           0.000                               0.000    0.000
   .prod_t3           0.000                               0.000    0.000
   .prod_t4           0.000                               0.000    0.000
   .comp_t1           0.000                               0.000    0.000
   .log_rt_t1         0.000                               0.000    0.000
   .log_rt_t2         0.000                               0.000    0.000
   .log_rt_t3         0.000                               0.000    0.000
   .log_rt_t4         0.000                               0.000    0.000
   .log_rt_sd_t1      0.000                               0.000    0.000
   .log_rt_sd_t2      0.000                               0.000    0.000
   .log_rt_sd_t3      0.000                               0.000    0.000
   .log_rt_sd_t4      0.000                               0.000    0.000
   .acc_t1            0.000                               0.000    0.000
   .acc_t2            0.000                               0.000    0.000
   .acc_t3            0.000                               0.000    0.000
   .acc_t4            0.000                               0.000    0.000
   .acc_sd_t1         0.000                               0.000    0.000
   .acc_sd_t2         0.000                               0.000    0.000
   .acc_sd_t3         0.000                               0.000    0.000
   .acc_sd_t4         0.000                               0.000    0.000

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .vocab_t1          5.210    1.458    3.572    0.000    0.804    0.804
   .vocab_t2          1.081    0.148    7.331    0.000    0.325    0.325
   .vocab_t3          0.866    0.248    3.485    0.000    0.229    0.229
   .vocab_t4          1.166    0.366    3.185    0.001    0.260    0.260
   .accuracy_t1       0.535    0.129    4.143    0.000    0.460    0.460
   .accuracy_t2       0.498    0.121    4.106    0.000    0.510    0.510
   .accuracy_t3       0.617    0.188    3.275    0.001    0.604    0.604
   .accuracy_t4       1.039    0.262    3.966    0.000    0.721    0.721
   .speed_t1          0.154    0.079    1.939    0.052    0.228    0.228
   .speed_t2          0.128    0.068    1.879    0.060    0.212    0.212
   .speed_t3          0.095    0.117    0.811    0.417    0.164    0.164
   .speed_t4          0.314    0.138    2.273    0.023    0.365    0.365
    vocab_intercpt    0.021    0.521    0.040    0.968    1.000    1.000
    vocab_slope      -0.143    0.063   -2.268    0.023       NA       NA
    accurcy_ntrcpt    0.852    0.250    3.411    0.001    1.000    1.000
    accuracy_slope    0.037    0.038    0.976    0.329    1.000    1.000
    speed_intercpt    0.620    0.130    4.772    0.000    1.000    1.000
    speed_slope       0.027    0.021    1.257    0.209    1.000    1.000
   .prod_t1          -3.135    1.293   -2.425    0.015   -3.135   -0.936
   .comp_t1           0.982    0.086   11.478    0.000    0.982    0.865
   .acc_t1            0.270    0.086    3.147    0.002    0.270    0.189
   .acc_sd_t1         1.063    0.043   24.693    0.000    1.063    0.784
   .log_rt_t1         0.518    0.062    8.298    0.000    0.518    0.434
   .log_rt_sd_t1      1.170    0.049   23.933    0.000    1.170    0.838
   .prod_t2           0.000                               0.000    0.000
   .acc_t2            0.397    0.119    3.338    0.001    0.397    0.289
   .acc_sd_t2         0.576    0.060    9.613    0.000    0.576    0.701
   .log_rt_t2         0.155    0.068    2.287    0.022    0.155    0.204
   .log_rt_sd_t2      0.688    0.065   10.635    0.000    0.688    0.773
   .prod_t3           0.000                               0.000    0.000
   .acc_t3           -0.224    0.147   -1.523    0.128   -0.224   -0.281
   .acc_sd_t3         0.675    0.109    6.196    0.000    0.675    0.724
   .log_rt_t3         0.152    0.107    1.418    0.156    0.152    0.208
   .log_rt_sd_t3      0.694    0.112    6.173    0.000    0.694    0.782
   .prod_t4           0.000                               0.000    0.000
   .acc_t4           -0.436    0.164   -2.667    0.008   -0.436   -0.434
   .acc_sd_t4         0.741    0.086    8.633    0.000    0.741    0.671
   .log_rt_t4        -0.096    0.087   -1.099    0.272   -0.096   -0.125
   .log_rt_sd_t4      0.707    0.082    8.672    0.000    0.707    0.711

layout <- read.csv(here("misc/layout_dt3.csv"), header = FALSE)


graph_sem(model = fit3_long, text_size = 3, layout=t(layout))

There is something wrong with this model - we’re not getting good estimates of slopes, so it doesn’t seem interpretable. Seems like they are all constrained to zero?

Draft 4: Multi-level modeling of a single variable

Let’s try NLME.

d_sub_prod <- filter(d_sub, !is.na(prod)) |>
  group_by(subject_id) |>
  mutate(log_rt_0 = log_rt[1],
         acc_0 = long_window_accuracy[1])

library(nlme)

mod1 <- lme(prod ~ age, 
            random = ~ 1 | dataset_name / subject_id,
            na.action = na.omit, 
            data = d_sub_prod)

summary(mod1)

Linear mixed-effects model fit by REML
  Data: d_sub_prod 
        AIC      BIC   logLik
  -2647.902 -2619.99 1328.951

Random effects:
 Formula: ~1 | dataset_name
        (Intercept)
StdDev:   0.1159553

 Formula: ~1 | subject_id %in% dataset_name
        (Intercept)  Residual
StdDev:   0.1195384 0.0881392

Fixed effects:  prod ~ age 
                 Value  Std.Error   DF   t-value p-value
(Intercept) -0.7119279 0.03293471 1119 -21.61634       0
age          0.0498412 0.00054586 1119  91.30807       0
 Correlation: 
    (Intr)
age -0.33 

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-2.96893975 -0.48853102 -0.03319374  0.48838658  3.92681770 

Number of Observations: 1965
Number of Groups: 
                dataset_name subject_id %in% dataset_name 
                          15                          845

Plotting model.

ages <- seq(min(d_sub_prod$age), max(d_sub_prod$age), .1)
datasets <- unique(d_sub_prod$dataset_name)

# fixed effects with CIs
newdata_global <- tibble(age = ages) 
global_preds <- emmeans(mod1, ~ age, at = newdata_global, CIs = TRUE) |> 
  summary() |>
  tibble()

# random effects
dataset_preds <- d_sub |>
  filter(!is.na(prod)) |> # filter on predictor
  group_by(dataset_name) |>
  summarise(min_age = min(age), 
            max_age = max(age)) |>
  group_by(dataset_name) |>
  mutate(df = map2(min_age, max_age, ~expand_grid(age = seq(.x, .y, .1)))) |>
  select(-min_age, -max_age) |>
  unnest(col = "df") |>
  ungroup()
  
dataset_preds$pred <- predict(mod1, 
                              newdata = dataset_preds, 
                              level = c(0,1))$predict.dataset_name 

ggplot(d_sub_prod, aes(x = age, y = prod, col = dataset_name)) + 
  geom_point(alpha = .1) + 
  geom_line(data = global_preds, aes(y = emmean), col = "blue", size = 1.5) +
  geom_ribbon(data = global_preds, 
              aes(ymin = lower.CL, ymax = upper.CL, y = emmean), col = NA,
              fill = "blue",alpha = .1) + 
  geom_line(data = dataset_preds, aes(y = pred))

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Logistics

So I guess we need good parameters.

d_sub$logistic = SSlogis(d_sub$age, Asym = 1, xmid = 0, scal = 3)

ggplot(d_sub, aes(x = age, y = prod)) + 
  geom_point(alpha = 1.) + 
  geom_line(aes(x = age, y = logistic))

Warning: Removed 1589 rows containing missing values or values outside the scale range
(`geom_point()`).

Use a logistic growth model.

mod2 <- nlme(model = prod ~ SSlogis(age, Asym = 1, xmid, scale),
             data = d_sub,
             fixed = xmid + scale ~ 1,
             random = xmid + scale ~ 1,
             groups = ~ dataset_name,
             start = c(xmid = 20, scale = 3),
             na.action = na.exclude, 
             control = list(maxIter = 1000, tolerance = 10))

summary(mod2)

Nonlinear mixed-effects model fit by maximum likelihood
  Model: prod ~ SSlogis(age, Asym = 1, xmid, scale) 
  Data: d_sub 
        AIC       BIC   logLik
  -1916.777 -1883.277 964.3885

Random effects:
 Formula: list(xmid ~ 1, scale ~ 1)
 Level: dataset_name
 Structure: General positive-definite, Log-Cholesky parametrization
         StdDev    Corr 
xmid     2.6590112 xmid 
scale    0.6935844 0.637
Residual 0.1458039      

Fixed effects:  xmid + scale ~ 1 
          Value Std.Error   DF  t-value p-value
xmid  24.742109 0.7975295 1949 31.02344       0
scale  3.750977 0.2574107 1949 14.57195       0
 Correlation: 
      xmid
scale 0.64

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-3.6120915 -0.5769721 -0.1084153  0.5556000  4.4561570 

Number of Observations: 1965
Number of Groups: 15

# fixed effects (no CIs)
global_preds <- expand_grid(age = seq(min(d_sub_prod$age), 
                                      max(d_sub_prod$age), .1))
global_preds$pred <- predict(mod2, newdata = global_preds, level = 0)


# random effects
dataset_preds <- d_sub |>
  filter(!is.na(prod)) |> # filter on predictor
  group_by(dataset_name) |>
  summarise(min_age = min(age), 
            max_age = max(age)) |>
  group_by(dataset_name) |>
  mutate(df = map2(min_age, max_age, ~expand_grid(age = seq(.x, .y, .1)))) |>
  select(-min_age, -max_age) |>
  unnest(col = "df") |>
  ungroup()
  
dataset_preds$pred <- predict(mod2, 
                              newdata = dataset_preds, 
                              level = c(0,1))$predict.dataset_name 

ggplot(d_sub_prod, aes(x = age, y = prod, col = dataset_name)) + 
  geom_point(alpha = .1) + 
  geom_line(data = global_preds, aes(y = pred), col = "blue", size = 1.5) +
  geom_line(data = dataset_preds, aes(y = pred))

Interaction with t0 RT

Now let’s use t0 processing attributes in a regression.

d_sub_prod <- d_sub_prod |>
  group_by(subject_id) |>
  mutate(log_rt_0 = log_rt[1],
         acc_0 = long_window_accuracy[1])

mod3 <- nlme(model = prod ~ SSlogis(age, Asym = 1, xmid, scale),
             data = d_sub_prod,
             fixed = xmid + scale ~ log_rt_0,
             random = xmid + scale ~ 1,
             groups = ~ dataset_name,
             start = c(xmid = 20, scale = 3, `xmid:log_rt_0` = 0, `scale:log_rt_0` = 3),
             na.action = na.exclude, 
             control = list(maxIter = 1000, tolerance = 10))

summary(mod3)

Nonlinear mixed-effects model fit by maximum likelihood
  Model: prod ~ SSlogis(age, Asym = 1, xmid, scale) 
  Data: d_sub_prod 
        AIC       BIC   logLik
  -1750.258 -1706.228 883.1292

Random effects:
 Formula: list(xmid ~ 1, scale ~ 1)
 Level: dataset_name
 Structure: General positive-definite, Log-Cholesky parametrization
                  StdDev    Corr  
xmid.(Intercept)  2.6984947 xm.(I)
scale.(Intercept) 0.6655347 0.929 
Residual          0.1466956       

Fixed effects:  xmid + scale ~ log_rt_0 
                     Value Std.Error   DF  t-value p-value
xmid.(Intercept)  9.533440 2.0192767 1797 4.721215  0.0000
xmid.log_rt_0     2.252338 0.2730891 1797 8.247630  0.0000
scale.(Intercept) 2.783363 1.9537882 1797 1.424598  0.1544
scale.log_rt_0    0.179807 0.2806801 1797 0.640611  0.5219
 Correlation: 
                  xm.(I) xm.__0 sc.(I)
xmid.log_rt_0     -0.915              
scale.(Intercept)  0.334 -0.300       
scale.log_rt_0    -0.304  0.313 -0.992

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-3.4204414 -0.6163548 -0.1052849  0.6152033  4.4479932 

Number of Observations: 1815
Number of Groups: 15

# fixed effects (no CIs)
global_preds <- expand_grid(age = seq(min(d_sub_prod$age), 
                                      max(d_sub_prod$age), .1), 
                            log_rt_0 = c(quantile(d_sub_prod$log_rt, 
                                                  c(.1,.25,.5,.75,.9), 
                                                  na.rm=TRUE)))
global_preds$pred <- predict(mod3, newdata = global_preds, level = 0)


# random effects
# dataset_preds <- d_sub |>
#   filter(!is.na(prod)) |> # filter on predictor
#   group_by(dataset_name) |>
#   summarise(min_age = min(age), 
#             max_age = max(age)) |>
#   group_by(dataset_name) |>
#   mutate(df = map2(min_age, max_age, ~expand_grid(age = seq(.x, .y, .1)))) |>
#   select(-min_age, -max_age) |>
#   unnest(col = "df") |>
#   ungroup()
#   
# dataset_preds$pred <- predict(mod3, 
#                               newdata = dataset_preds, 
#                               level = c(0,1))$predict.dataset_name 

ggplot(d_sub_prod, aes(x = age, y = prod)) + 
  geom_point(alpha = .1) + 
  geom_line(data = global_preds, aes(y = pred, group = log_rt_0, col = as.factor(log_rt_0)))

# +
#   geom_line(data = dataset_preds, aes(y = pred))

Interaction with accuracy

mod4 <- nlme(model = prod ~ SSlogis(age, Asym = 1, xmid, scale),
             data = d_sub_prod,
             fixed = xmid + scale ~ log_rt_0 + acc_0,
             random = xmid + scale ~ 1,
             groups = ~ dataset_name,
             start = c(xmid = 20, scale = 3, `xmid:log_rt_0` = 0, `scale:log_rt_0` = 3, 
                       `xmid:acc_0` = 0, `scale:acc_0` = 3),
             na.action = na.exclude, 
             control = list(maxIter = 1000, tolerance = 10))

summary(mod4)

Nonlinear mixed-effects model fit by maximum likelihood
  Model: prod ~ SSlogis(age, Asym = 1, xmid, scale) 
  Data: d_sub_prod 
       AIC       BIC   logLik
  -1788.67 -1733.654 904.3349

Random effects:
 Formula: list(xmid ~ 1, scale ~ 1)
 Level: dataset_name
 Structure: General positive-definite, Log-Cholesky parametrization
                  StdDev    Corr  
xmid.(Intercept)  2.5811483 xm.(I)
scale.(Intercept) 0.6076861 0.903 
Residual          0.1448544       

Fixed effects:  xmid + scale ~ log_rt_0 + acc_0 
                      Value Std.Error   DF   t-value p-value
xmid.(Intercept)  18.842675 2.4513310 1791  7.686712  0.0000
xmid.log_rt_0      1.436191 0.2999384 1791  4.788286  0.0000
xmid.acc_0        -5.563681 0.8117984 1791 -6.853525  0.0000
scale.(Intercept)  3.767003 2.4779078 1791  1.520235  0.1286
scale.log_rt_0     0.092472 0.3120285 1791  0.296357  0.7670
scale.acc_0       -0.478005 0.9071683 1791 -0.526919  0.5983
 Correlation: 
                  xm.(I) xm.__0 xmd._0 sc.(I) sc.__0
xmid.log_rt_0     -0.923                            
xmid.acc_0        -0.570  0.404                     
scale.(Intercept)  0.351 -0.319 -0.207              
scale.log_rt_0    -0.326  0.332  0.150 -0.972       
scale.acc_0       -0.196  0.146  0.285 -0.612  0.430

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-3.34696278 -0.60411134 -0.09105721  0.60421865  4.47349898 

Number of Observations: 1811
Number of Groups: 15

power function with offset

So I guess we need good parameters.

power_growth <- function(X, a, b, c)
{
  return((a * (X-c)^b))
}

d_sub_prod$power = power_growth(d_sub_prod$age, a = 1, b = 2.3, c = 12)

ggplot(d_sub_prod, aes(x = age, y = prod*680)) + 
  geom_point(alpha = 1.) + 
  geom_line(aes(x = age, y = power))

Now model.

d_sub_prod$prod_680 <- d_sub_prod$prod * 680
mod5 <- nlme(model = prod_680 ~ power_growth(age, a, b, c),
             data = d_sub_prod,
             fixed = a + b + c ~ 1,
             random = a + b + c ~ 1,
             groups = ~ dataset_name,
             start = c(a = 1, b = 2.3, c = 12),
             na.action = na.exclude)

summary(mod5)

Nonlinear mixed-effects model fit by maximum likelihood
  Model: prod_680 ~ power_growth(age, a, b, c) 
  Data: d_sub_prod 
       AIC     BIC    logLik
  23704.47 23760.3 -11842.23

Random effects:
 Formula: list(a ~ 1, b ~ 1, c ~ 1)
 Level: dataset_name
 Structure: General positive-definite, Log-Cholesky parametrization
         StdDev       Corr         
a        2.983123e-05 a      b     
b        6.480106e-02 -0.388       
c        1.647055e+00  0.513 -0.703
Residual 9.875883e+01              

Fixed effects:  a + b + c ~ 1 
      Value Std.Error   DF   t-value p-value
a 26.322978  5.847750 1948  4.501385       0
b  1.114418  0.076345 1948 14.597215       0
c 14.713081  0.639604 1948 23.003426       0
 Correlation: 
  a      b     
b -0.953       
c  0.647 -0.698

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-3.57784982 -0.62561301 -0.05127355  0.63359321  4.36606516 

Number of Observations: 1965
Number of Groups: 15

# fixed effects (no CIs)
global_preds <- expand_grid(age = seq(min(d_sub_prod$age), 
                                      max(d_sub_prod$age), .1))
global_preds$pred <- predict(mod5, newdata = global_preds, level = 0)


# random effects
dataset_preds <- d_sub |>
  filter(!is.na(prod)) |> # filter on predictor
  group_by(dataset_name) |>
  summarise(min_age = min(age), 
            max_age = max(age)) |>
  group_by(dataset_name) |>
  mutate(df = map2(min_age, max_age, ~expand_grid(age = seq(.x, .y, .1)))) |>
  select(-min_age, -max_age) |>
  unnest(col = "df") |>
  ungroup()
  
dataset_preds$pred <- predict(mod5, 
                              newdata = dataset_preds, 
                              level = c(0,1))$predict.dataset_name 

ggplot(d_sub_prod, aes(x = age, y = prod_680, col = dataset_name)) + 
  geom_point(alpha = .1) + 
  geom_line(data = global_preds, aes(y = pred), col = "blue", size = 1.5) +
  geom_line(data = dataset_preds, aes(y = pred))

Warning: Removed 28 rows containing missing values or values outside the scale range
(`geom_line()`).

Warning: Removed 84 rows containing missing values or values outside the scale range
(`geom_line()`).

Having trouble fitting this model - throws an error sadly.

d_sub_prod$prod_681 <- d_sub_prod$prod_680 + 1
mod5 <- nlme(model = prod_681 ~ power_growth(age, a, b, c),
             data = d_sub_prod,
             fixed = a + b + c ~ log_rt_0,
             random = a + b + c ~ 1,
             groups = ~ dataset_name,
             start = c(a = 1, b = 2.3, c = 12, 
                       `a:log_rt_0` = 0.1, `b:log_rt_0` = 0.1, `c:log_rt_0` = 0.1),
             na.action = na.exclude)

summary(mod5)