Preliminaries

options(dplyr.width = Inf)
knitr::opts_chunk$set(message = FALSE, warning = FALSE, cache=TRUE)

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lme4)
## Loading required package: Matrix
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:Matrix':
## 
##     expand
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(lsmeans)
## Warning: package 'lsmeans' was built under R version 3.2.4
## Loading required package: estimability
library(langcog)
## 
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
## 
##     scale
theme_manylabs <- theme_bw() +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 16, family="Arial"),
        text = element_text(family="Arial"),
        legend.key = element_rect(fill = "navy"),
        legend.background = element_rect(fill = "white"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line.y = element_line(),
        axis.line.x = element_line(),
        strip.background = element_rect(fill = "white", colour = NA),
        strip.text.x = element_text(size = 14, face = "bold", colour = "black"))

Simulate Data

Create basics of data.

data <- expand.grid(Lab = factor(paste0('Lab',1:20)),
                    Subject = 1:30,
                    Trial = 1:16)

Simulation sequence:

removed this from first batch and created a second batch varyng those two factors + randomly generate method and participant vars (session, language, bilingual)

blocks <- factor(c('IDS','IDS','ADS','ADS'))
langs <- factor(c('American English','French','Spanish','German','Japanese', 'British English'))
methods <- factor(c('HPP','SingleScreen','AnotherMethod'))
sessions <- factor(c('First','Second'))
bilingual <- c(TRUE, FALSE)

lt_max <- 20        # max LT in a trial

data %<>%
  mutate(Subject = factor(toupper(paste0(Lab,'-',Subject))),
         Block = ((Trial-1) %/% 4) + 1) %>%
  group_by(Subject,Block) %>%
  mutate(Condition = sample(blocks)) %>%
  group_by(Lab) %>%
  mutate(.LabDiff = runif(1, 0, .5)) %>%
  ungroup() %>%
  mutate(LT = ifelse(Condition == 'IDS', 
                     rlnorm(n(), 1.5 + .LabDiff, .7), 
                     rlnorm(n(), 1.5, .7))) %>%
  group_by(Subject) %>%
  mutate(LT = LT + runif(1,0,.5)) %>%
  ungroup() %>%
  mutate(LT = ifelse(LT > lt_max, lt_max, LT)) %>%
  group_by(Lab) %>%
  mutate(.LabMeanAge = round(runif(1, 3, 12))) %>%
  ungroup() %>%
  group_by(Subject) %>%
  mutate(Age = round(runif(1, .LabMeanAge-.5, .LabMeanAge+.5),2)) %>%
  ungroup() %>%
  group_by(Lab) %>%
  mutate(Method = sample(methods,1)) %>%
  ungroup() %>%
  group_by(Subject) %>%
  mutate(Session = "First",
         Language = sample(langs, 1),
         Bilingual = FALSE) %>%
  arrange(Lab,Subject,Trial) %>%
  select(-starts_with("."))

Additional data labs contribute after testing the “promised” children is complete.

additional_data <- expand.grid(Lab = factor(paste0('Lab',1:5)),
                    Subject = 1:30,
                    Trial = 1:16)

additional_data  %<>%
  mutate(Subject = factor(toupper(paste0(Lab,'-',Subject))),
         Block = ((Trial-1) %/% 4) + 1) %>%
  group_by(Subject,Block) %>%
  mutate(Condition = sample(blocks)) %>%
  group_by(Lab) %>%
  mutate(.LabDiff = runif(1, .0, .25)) %>%
  ungroup() %>%
  mutate(LT = ifelse(Condition == 'IDS', 
                     rlnorm(n(), 1.1 + .LabDiff, .8), 
                     rlnorm(n(), 1.1, .8))) %>%
  group_by(Subject) %>%
  mutate(LT = LT + runif(1,0,.5)) %>%
  ungroup() %>%
  mutate(LT = ifelse(LT > lt_max, lt_max, LT)) %>%
  group_by(Lab) %>%
  mutate(.LabMeanAge = round(runif(1, 3, 12))) %>%
  ungroup() %>%
  group_by(Subject) %>%
  mutate(Age = round(runif(1, .LabMeanAge-.5, .LabMeanAge+.5),2)) %>%
  ungroup() %>%
  group_by(Lab) %>%
  mutate(Method = sample(methods,1)) %>%
  ungroup() %>%
  group_by(Subject) %>%
  mutate(Session = sample(sessions,1),
         Language = sample(langs, 1),
         Bilingual = sample(bilingual, 1)) %>%
  arrange(Lab,Subject,Trial) %>%
  select(-starts_with("."))

Data Cleaning

lt_min <- 2                # minumum LT for inclusion
z_threshold <- 3           # outlier threshold (sds)
min_trials_per_type <- 4   # min trials per type for inclusion

data_clean <- data %>%
  filter(LT >= lt_min) %>%
  group_by(Subject) %>%
  mutate(log_lt = log(LT), 
         .scaled_log_lt = as.numeric(langcog::scale(log_lt))) %>%
  filter(abs(.scaled_log_lt) < z_threshold) %>%
  group_by(Subject) %>%
  mutate(.N_IDS = sum(Condition == "IDS"),
         .N_ADS = sum(Condition == "ADS")) %>%
  filter(.N_IDS >= min_trials_per_type & 
           .N_ADS >= min_trials_per_type) %>%
  select(-starts_with("."))

additional_data_clean <- additional_data %>%
  filter(LT >= lt_min) %>%
  group_by(Subject) %>%
  mutate(log_lt = log(LT), 
         .scaled_log_lt = as.numeric(langcog::scale(log_lt))) %>%
  filter(abs(.scaled_log_lt) < z_threshold) %>%
  group_by(Subject) %>%
  mutate(.N_IDS = sum(Condition == "IDS"),
         .N_ADS = sum(Condition == "ADS")) %>%
  filter(.N_IDS >= min_trials_per_type & 
           .N_ADS >= min_trials_per_type) %>%
  select(-starts_with("."))

Join two datasets for further analyses

all_data_clean = rbind(data_clean, additional_data_clean)

Distributions

“initial” data

ggplot(data_clean, aes(x = LT)) + 
  geom_histogram() + 
  facet_grid(~Condition)

all data

ggplot(all_data_clean, aes(x = LT)) + 
  geom_histogram() + 
  facet_grid(~Condition)

Transformation

ggplot(data_clean, aes(x=log_lt)) + 
  geom_histogram() + 
  facet_grid(~Condition)

Create Aggregated Datasets

agg_subjects <- data_clean %>%
  group_by(Lab, Method, Subject, Language, Condition, Age) %>%
  summarise(MeanLogLT = mean(log_lt)) %>%
  mutate(ConditionC = ifelse(Condition == "IDS", .5, -.5)) %>%
  mutate(Native = ifelse(Language == "American English", TRUE, FALSE))

agg_subjects_paired <- agg_subjects %>%
  select(-ConditionC) %>%
  spread(Condition, MeanLogLT) %>%
  mutate(Diff = IDS - ADS, 
         Prop = IDS / (IDS + ADS))

all_agg_subjects <- all_data_clean %>%
  group_by(Lab, Method, Session, Subject, Language, Bilingual, Condition, Age) %>%
  summarise(MeanLogLT = mean(log_lt)) %>%
  mutate(ConditionC = ifelse(Condition == "IDS", .5, -.5)) %>%
  mutate(Native = ifelse(Language == "American English", TRUE, FALSE))

all_agg_subjects_paired <- all_agg_subjects %>%
  select(-ConditionC) %>%
  spread(Condition, MeanLogLT) %>%
  mutate(Diff = IDS - ADS, 
         Prop = IDS / (IDS + ADS))

Hypothesis Tests

Overall Preference for IDS v ADS

ggplot(agg_subjects, aes(x=Condition, y=MeanLogLT, group=Subject)) +
  geom_boxplot() +
  geom_jitter(width = .25, size =.5) +
  theme_manylabs

ggplot(agg_subjects_paired, aes(x='Overall', y=Diff)) +
  geom_boxplot() +
  geom_jitter(width = .25, size =.5) +
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  theme_manylabs

t.test(agg_subjects_paired$Diff)
## 
##  One Sample t-test
## 
## data:  agg_subjects_paired$Diff
## t = 10.071, df = 599, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1021410 0.1516268
## sample estimates:
## mean of x 
## 0.1268839
mean(agg_subjects_paired$Diff) / sd(agg_subjects_paired$Diff)
## [1] 0.411156
model <- lmer(MeanLogLT ~ ConditionC + 
                (ConditionC | Lab) + 
                (1 | Subject), 
              data=agg_subjects, REML=FALSE)

summary(model)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: MeanLogLT ~ ConditionC + (ConditionC | Lab) + (1 | Subject)
##    Data: agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -285.5   -249.9    149.8   -299.5     1193 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.91446 -0.67929 -0.03054  0.65325  3.12569 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 0.001024 0.03200      
##  Lab      (Intercept) 0.001487 0.03856      
##           ConditionC  0.009001 0.09488  0.82
##  Residual             0.043038 0.20746      
## Number of obs: 1200, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  1.74427    0.01058  164.89
## ConditionC   0.12688    0.02436    5.21
## 
## Correlation of Fixed Effects:
##            (Intr)
## ConditionC 0.582
drop1(model,~.,test="Chi")
## Single term deletions
## 
## Model:
## MeanLogLT ~ ConditionC + (ConditionC | Lab) + (1 | Subject)
##            Df     AIC    LRT  Pr(Chi)    
## <none>        -285.51                    
## ConditionC  1 -270.37 17.141 3.47e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Lab Variability

ggplot(agg_subjects_paired, aes(x=Lab, y=Diff)) +
  stat_summary(fun.y='mean', geom='point') +
  stat_summary(fun.data='mean_cl_normal', geom='errorbar', width=.1, fun.args=list(mult=2)) +
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  coord_flip() +
  theme_manylabs

model <- lmer(MeanLogLT ~ ConditionC + (1 + ConditionC | Lab) + (1 | Subject), data=agg_subjects, REML=F)
fixed_effect <- fixef(model)[['ConditionC']]

lab_ranefs <- data.frame(Lab = factor(rownames(ranef(model)$Lab), 
                                      levels=rownames(ranef(model)$Lab)),
                         ConditionRanef = ranef(model)$Lab$ConditionC + 
                           fixed_effect)

ggplot(lab_ranefs, aes(x=Lab, y=ConditionRanef, group=Lab)) +
  geom_point() +
  geom_errorbar(aes(ymin=fixed_effect, ymax=ConditionRanef), width=.1) +
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  geom_hline(yintercept=fixed_effect, linetype="solid", alpha=.5) +
  scale_x_discrete('') +
  scale_y_continuous('Random Effect') +
  coord_flip() +
  theme_manylabs

Does IDS preference change by age?

agg_subjects %<>%
  ungroup() %>%
  mutate(AgeC = Age - mean(Age))

ggplot(agg_subjects_paired, aes(x=Age, y=Diff)) +
  geom_point() +
  stat_smooth() +
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  theme_manylabs

model <- lmer(MeanLogLT ~ ConditionC*AgeC + 
                (1 + ConditionC + AgeC | Lab) + 
                (1 | Subject), data=agg_subjects, 
              REML=FALSE)
summary(model)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) +  
##     (1 | Subject)
##    Data: agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -276.7   -215.6    150.3   -300.7     1188 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.89873 -0.68124 -0.03555  0.65026  3.14672 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr     
##  Subject  (Intercept) 1.046e-03 0.032342          
##  Lab      (Intercept) 1.124e-03 0.033532          
##           ConditionC  8.672e-03 0.093124 0.79     
##           AgeC        1.494e-05 0.003865 0.86 0.99
##  Residual             4.305e-02 0.207483          
## Number of obs: 1200, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)     1.7435936  0.0099751  174.79
## ConditionC      0.1268839  0.0240230    5.28
## AgeC            0.0009819  0.0032051    0.31
## ConditionC:AgeC 0.0044125  0.0080797    0.55
## 
## Correlation of Fixed Effects:
##             (Intr) CndtnC AgeC 
## ConditionC  0.512              
## AgeC        0.305  0.232       
## CondtnC:AgC 0.203  0.000  0.424
drop1(model,~.,test="Chi")
## Single term deletions
## 
## Model:
## MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) + 
##     (1 | Subject)
##                 Df     AIC     LRT   Pr(Chi)    
## <none>             -276.69                      
## ConditionC       1 -261.29 17.4094 3.013e-05 ***
## AgeC             1 -278.61  0.0873    0.7676    
## ConditionC:AgeC  1 -278.41  0.2825    0.5951    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Does quadratic age term improve the fit?

# model with linear+quadratic random effect of age, but only linear fixed effect
model <- lmer(MeanLogLT ~ ConditionC*poly(AgeC,1) + 
                (1 + ConditionC + poly(AgeC,2) | Lab) + 
                (1 | Subject), 
              data=agg_subjects, REML=FALSE)

# model with linear+quadratic random and fixed effects of age
model_2 <- lmer(MeanLogLT ~ ConditionC*poly(AgeC,2) + 
                  (1 + ConditionC + poly(AgeC,2) | Lab) + 
                  (1 | Subject), data=agg_subjects, 
                REML=FALSE)

anova(model,model_2)
## Data: agg_subjects
## Models:
## model: MeanLogLT ~ ConditionC * poly(AgeC, 1) + (1 + ConditionC + poly(AgeC, 
## model:     2) | Lab) + (1 | Subject)
## model_2: MeanLogLT ~ ConditionC * poly(AgeC, 2) + (1 + ConditionC + poly(AgeC, 
## model_2:     2) | Lab) + (1 | Subject)
##         Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## model   16 -271.83 -190.39 151.92  -303.83                         
## model_2 18 -268.21 -176.58 152.10  -304.21 0.3703      2      0.831

Are there trial order effects?

data_clean %<>%
  ungroup() %>%
  mutate(TrialC = Trial - mean(Trial),
         ConditionC = ifelse(Condition == "IDS", .5, -.5),
         AgeC = Age - mean(Age))

ggplot(data_clean, aes(x=Trial, y=log_lt, color=Condition)) +
  stat_summary(fun.y='mean', geom='point') +
  stat_summary(fun.y='mean', geom='line') +
  stat_summary(fun.data='mean_cl_normal', geom='errorbar', width=.1, fun.args=list(mult=2)) +
  theme_manylabs

# model <- lmer(log_lt ~ ConditionC*AgeC*TrialC + 
#                 (1 + ConditionC + AgeC + TrialC | Lab) + 
#                 (1 + TrialC + ConditionC | Subject), 
#               data=data_clean, REML=FALSE)
# summary(model)
# drop1(model,~.,test="Chi")

Moderator Analyses

Method

Note age | lab doesn’t converge.

#NB: for some reason I cannot knit without adding this bit again. 
agg_subjects %<>%
  ungroup() %>%
  mutate(AgeC = Age - mean(Age))

ggplot(agg_subjects_paired, aes(x=Method, y=Diff)) +
  geom_boxplot() +
  geom_jitter(width = .25, size =.5) +
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  theme_manylabs

contrasts(agg_subjects$Method) <- contr.sum(length(unique(agg_subjects$Method)))

model <- lmer(MeanLogLT ~ ConditionC * Method * AgeC + 
                (1 + ConditionC  | Lab) + 
                (1 | Subject), 
              data=agg_subjects, REML=FALSE)
summary(model)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: 
## MeanLogLT ~ ConditionC * Method * AgeC + (1 + ConditionC | Lab) +  
##     (1 | Subject)
##    Data: agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -288.1   -201.5    161.0   -322.1     1183 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0178 -0.6766 -0.0249  0.6659  3.1945 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr
##  Subject  (Intercept) 0.0007343 0.02710      
##  Lab      (Intercept) 0.0006466 0.02543      
##           ConditionC  0.0072901 0.08538  1.00
##  Residual             0.0429444 0.20723      
## Number of obs: 1200, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##                           Estimate Std. Error t value
## (Intercept)              1.6721338  0.0280075   59.70
## ConditionC               0.1034075  0.0694399    1.49
## Method1                  0.1031838  0.0292002    3.53
## Method2                  0.0618840  0.0288342    2.15
## AgeC                     0.0287053  0.0106858    2.69
## ConditionC:Method1       0.0331589  0.0729471    0.45
## ConditionC:Method2       0.0064572  0.0718737    0.09
## ConditionC:AgeC          0.0164965  0.0262665    0.63
## Method1:AgeC            -0.0273000  0.0109887   -2.48
## Method2:AgeC            -0.0254217  0.0109723   -2.32
## ConditionC:Method1:AgeC -0.0008044  0.0271583   -0.03
## ConditionC:Method2:AgeC -0.0255790  0.0271076   -0.94
## 
## Correlation of Fixed Effects:
##             (Intr) CndtnC Methd1 Methd2 AgeC   CnC:M1 CnC:M2 CnC:AC Mt1:AC
## ConditionC   0.417                                                        
## Method1     -0.876 -0.347                                                 
## Method2     -0.913 -0.368  0.795                                          
## AgeC        -0.917 -0.355  0.891  0.897                                   
## CndtnC:Mth1 -0.345 -0.853  0.431  0.300  0.346                            
## CndtnC:Mth2 -0.366 -0.897  0.300  0.427  0.347  0.759                     
## CondtnC:AgC -0.358 -0.900  0.351  0.352  0.401  0.872  0.878              
## Method1:AgC  0.904  0.353 -0.843 -0.885 -0.917 -0.321 -0.345 -0.355       
## Method2:AgC  0.900  0.350 -0.875 -0.860 -0.921 -0.341 -0.330 -0.358  0.841
## CndtC:M1:AC  0.354  0.885 -0.325 -0.348 -0.353 -0.815 -0.864 -0.900  0.411
## CndtC:M2:AC  0.351  0.881 -0.344 -0.333 -0.356 -0.852 -0.835 -0.906  0.312
##             Mt2:AC CC:M1:
## ConditionC               
## Method1                  
## Method2                  
## AgeC                     
## CndtnC:Mth1              
## CndtnC:Mth2              
## CondtnC:AgC              
## Method1:AgC              
## Method2:AgC              
## CndtC:M1:AC  0.312       
## CndtC:M2:AC  0.410  0.811
model_null <- lmer(MeanLogLT ~ ConditionC*AgeC + 
                     (1 + ConditionC + AgeC | Lab) + 
                     (1 | Subject), data=agg_subjects, 
                   REML=FALSE)

anova(model, model_null)
## Data: agg_subjects
## Models:
## model_null: MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) + 
## model_null:     (1 | Subject)
## model: MeanLogLT ~ ConditionC * Method * AgeC + (1 + ConditionC | Lab) + 
## model:     (1 | Subject)
##            Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## model_null 12 -276.69 -215.61 150.35  -300.69                             
## model      17 -288.07 -201.54 161.03  -322.07 21.373      5  0.0006887 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# post-hoc least-squares contrasts
lstrends(model, ~ Method, var="ConditionC", adjust="none")
##  Method        ConditionC.trend         SE    df    lower.CL  upper.CL
##  AnotherMethod       0.13656641 0.04540421 26.85  0.04337977 0.2297531
##  HPP                 0.10986467 0.03768034 26.83  0.03252801 0.1872013
##  SingleScreen        0.06379142 0.22557736 83.57 -0.38482725 0.5124101
## 
## Confidence level used: 0.95

Session

Tested on all data, but very imbalanced. But: How to select subset?

all_agg_subjects %<>%
  ungroup() %>%
  mutate(AgeC = Age - mean(Age))

ggplot(all_agg_subjects_paired, aes(x=Session, y=Diff)) +
  geom_boxplot()+
  geom_jitter(width = .25, size =.5)+
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  theme_manylabs

all_agg_subjects$Session = as.factor(all_agg_subjects$Session)
contrasts(all_agg_subjects$Session) <- contr.sum(length(unique(all_agg_subjects$Session)))

model <- lmer(MeanLogLT ~ ConditionC*Session*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=all_agg_subjects, REML=F)
summary(model)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: 
## MeanLogLT ~ ConditionC * Session * AgeC + (1 + ConditionC + AgeC |  
##     Lab) + (1 | Subject)
##    Data: all_agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -233.8   -149.0    132.9   -265.8     1464 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9514 -0.6655 -0.0237  0.6497  3.5202 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr       
##  Subject  (Intercept) 0.0000000 0.00000             
##  Lab      (Intercept) 0.0057544 0.07586             
##           ConditionC  0.0056142 0.07493   0.07      
##           AgeC        0.0009693 0.03113  -0.56  0.45
##  Residual             0.0461829 0.21490             
## Number of obs: 1480, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)              1.677279   0.023485   71.42
## ConditionC               0.049797   0.027050    1.84
## Session1                 0.026245   0.012577    2.09
## AgeC                     0.003532   0.010224    0.35
## ConditionC:Session1      0.069150   0.020691    3.34
## ConditionC:AgeC          0.004796   0.009662    0.50
## Session1:AgeC            0.012048   0.005664    2.13
## ConditionC:Session1:AgeC 0.004325   0.009921    0.44
## 
## Correlation of Fixed Effects:
##             (Intr) CndtnC Sessn1 AgeC   CnC:S1 CnC:AC Ss1:AC
## ConditionC   0.042                                          
## Session1    -0.402 -0.003                                   
## AgeC        -0.340  0.218 -0.144                            
## CndtnC:Sss1 -0.015 -0.660  0.004 -0.029                     
## CondtnC:AgC  0.036  0.121 -0.016  0.015 -0.160              
## Sessin1:AgC -0.048 -0.012  0.139 -0.493  0.013 -0.009       
## CndtC:S1:AC  0.022 -0.092  0.034 -0.007  0.122 -0.821  0.025
model_null <- lmer(MeanLogLT ~ ConditionC*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=all_agg_subjects, REML=F)

anova(model,model_null)
## Data: all_agg_subjects
## Models:
## model_null: MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) + 
## model_null:     (1 | Subject)
## model: MeanLogLT ~ ConditionC * Session * AgeC + (1 + ConditionC + AgeC | 
## model:     Lab) + (1 | Subject)
##            Df     AIC     BIC logLik deviance Chisq Chi Df Pr(>Chisq)   
## model_null 12 -224.53 -160.93 124.26  -248.53                           
## model      16 -233.76 -148.97 132.88  -265.76 17.24      4   0.001736 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# post-hoc least-squares contrasts
lstrends(model, ~ Session, var="ConditionC", adjust="none")
##  Session ConditionC.trend         SE     df    lower.CL   upper.CL
##  First         0.11894732 0.02155302  22.86  0.07434595 0.16354870
##  Second       -0.01935252 0.04505958 244.44 -0.10810712 0.06940208
## 
## Confidence level used: 0.95

Nativeness of the test (American English versus everyone else)

Tested only on the “initial” dataset, analysis conditional on meeting a minimum N for each language to be included

agg_subjects %<>%
  ungroup() %>%
  mutate(AgeC = Age - mean(Age))


ggplot(agg_subjects_paired, aes(x=Native, y=Diff)) +
  geom_boxplot()+
  geom_jitter(width = .25, size =.5)+
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  theme_manylabs

contrasts(agg_subjects$Native) <- contr.sum(length(unique(agg_subjects$Native)))

model <- lmer(MeanLogLT ~ ConditionC*Native*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=agg_subjects, REML=F)
summary(model)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: 
## MeanLogLT ~ ConditionC * Native * AgeC + (1 + ConditionC + AgeC |  
##     Lab) + (1 | Subject)
##    Data: agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -270.1   -188.7    151.1   -302.1     1184 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.89672 -0.66647 -0.03374  0.64422  3.13995 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr     
##  Subject  (Intercept) 1.079e-03 0.032854          
##  Lab      (Intercept) 1.145e-03 0.033840          
##           ConditionC  8.861e-03 0.094131 0.78     
##           AgeC        1.327e-05 0.003642 0.86 0.99
##  Residual             4.295e-02 0.207236          
## Number of obs: 1200, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##                           Estimate Std. Error t value
## (Intercept)              1.7400607  0.0111155  156.54
## ConditionC               0.1216380  0.0260154    4.68
## Native1                  0.0057668  0.0078573    0.73
## AgeC                     0.0013457  0.0036832    0.37
## ConditionC:Native1       0.0085479  0.0153529    0.56
## ConditionC:AgeC          0.0018628  0.0088963    0.21
## Native1:AgeC            -0.0005989  0.0028030   -0.21
## ConditionC:Native1:AgeC  0.0039913  0.0054757    0.73
## 
## Correlation of Fixed Effects:
##             (Intr) CndtnC Nativ1 AgeC   CnC:N1 CnC:AC Nt1:AC
## ConditionC   0.433                                          
## Native1     -0.438 -0.002                                   
## AgeC         0.226  0.178  0.007                            
## CndtnC:Ntv1 -0.004 -0.366  0.006 -0.003                     
## CondtnC:AgC  0.157 -0.001  0.001  0.345 -0.002              
## Native1:AgC  0.005 -0.002 -0.001 -0.487  0.004  0.003       
## CndtC:N1:AC -0.003  0.003  0.004 -0.002 -0.001 -0.400  0.005
model_null <- lmer(MeanLogLT ~ ConditionC*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=agg_subjects, REML=F)

anova(model,model_null)
## Data: agg_subjects
## Models:
## model_null: MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) + 
## model_null:     (1 | Subject)
## model: MeanLogLT ~ ConditionC * Native * AgeC + (1 + ConditionC + AgeC | 
## model:     Lab) + (1 | Subject)
##            Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## model_null 12 -276.69 -215.61 150.35  -300.69                         
## model      16 -270.11 -188.66 151.05  -302.11 1.4109      4     0.8423
# post-hoc least-squares contrasts
lstrends(model, ~ Native, var="ConditionC", adjust="none")
##  Native ConditionC.trend         SE    df   lower.CL  upper.CL
##  FALSE         0.1301858 0.02614705 24.54 0.07628317 0.1840885
##  TRUE          0.1130901 0.03567997 87.80 0.04218145 0.1839988
## 
## Confidence level used: 0.95

Native Language

Tested only on the “initial” dataset, analysis conditional on meeting a minimum N for each language to be included

ggplot(agg_subjects_paired, aes(x=Language, y=Diff)) +  
  geom_boxplot()+
  geom_jitter(width = .25, size =.5)+
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  theme_manylabs

contrasts(agg_subjects$Language) <- contr.sum(length(unique(agg_subjects$Language)))

model <- lmer(MeanLogLT ~ ConditionC*Language*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=agg_subjects, REML=F)
summary(model)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: MeanLogLT ~ ConditionC * Language * AgeC + (1 + ConditionC +  
##     AgeC | Lab) + (1 | Subject)
##    Data: agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -252.0    -89.1    158.0   -316.0     1168 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8947 -0.6715 -0.0304  0.6536  3.0588 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr     
##  Subject  (Intercept) 1.179e-03 0.034334          
##  Lab      (Intercept) 8.627e-04 0.029372          
##           ConditionC  8.881e-03 0.094239 0.98     
##           AgeC        6.475e-05 0.008046 0.66 0.49
##  Residual             4.233e-02 0.205749          
## Number of obs: 1200, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                1.7437674  0.0101251  172.22
## ConditionC                 0.1272437  0.0242266    5.25
## Language1                 -0.0102703  0.0130506   -0.79
## Language2                  0.0098594  0.0136133    0.72
## Language3                  0.0110850  0.0138097    0.80
## Language4                  0.0075649  0.0145975    0.52
## Language5                 -0.0015339  0.0139931   -0.11
## AgeC                       0.0010895  0.0034523    0.32
## ConditionC:Language1      -0.0142435  0.0254293   -0.56
## ConditionC:Language2      -0.0223540  0.0265449   -0.84
## ConditionC:Language3       0.0691236  0.0269506    2.56
## ConditionC:Language4       0.0009791  0.0284860    0.03
## ConditionC:Language5      -0.0114283  0.0273439   -0.42
## ConditionC:AgeC            0.0050678  0.0081435    0.62
## Language1:AgeC             0.0014774  0.0046596    0.32
## Language2:AgeC            -0.0007674  0.0046821   -0.16
## Language3:AgeC            -0.0003546  0.0046187   -0.08
## Language4:AgeC            -0.0023421  0.0050166   -0.47
## Language5:AgeC            -0.0046067  0.0051137   -0.90
## ConditionC:Language1:AgeC -0.0070830  0.0090741   -0.78
## ConditionC:Language2:AgeC  0.0017861  0.0091328    0.20
## ConditionC:Language3:AgeC -0.0082213  0.0090034   -0.91
## ConditionC:Language4:AgeC  0.0106457  0.0097789    1.09
## ConditionC:Language5:AgeC  0.0007898  0.0099847    0.08
model_null <- lmer(MeanLogLT ~ ConditionC*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=agg_subjects, REML=F)

anova(model,model_null)
## Data: agg_subjects
## Models:
## model_null: MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) + 
## model_null:     (1 | Subject)
## model: MeanLogLT ~ ConditionC * Language * AgeC + (1 + ConditionC + 
## model:     AgeC | Lab) + (1 | Subject)
##            Df     AIC      BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## model_null 12 -276.69 -215.614 150.35  -300.69                         
## model      32 -251.98  -89.096 157.99  -315.98 15.284     20       0.76
# post-hoc least-squares contrasts
lstrends(model, ~ Language, var="ConditionC", adjust="none")
##  Language         ConditionC.trend         SE     df   lower.CL  upper.CL
##  American English        0.1130002 0.03568783  87.52 0.04207276 0.1839277
##  British English         0.1048897 0.03690440  99.12 0.03166447 0.1781149
##  French                  0.1963672 0.03734649 103.48 0.12230334 0.2704311
##  German                  0.1282227 0.03906431 122.30 0.05089293 0.2055525
##  Japanese                0.1158153 0.03777210 106.68 0.04093397 0.1906967
##  Spanish                 0.1051668 0.03728583 103.02 0.03121934 0.1791143
## 
## Confidence level used: 0.95

Biligualism

Tested on all data, but very imbalanced. But: How to select subset?

ggplot(all_agg_subjects_paired, aes(x=Bilingual, y=Diff)) +
  geom_boxplot()+
  geom_jitter(width = .25, size =.5)+
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  theme_manylabs

contrasts(all_agg_subjects$Bilingual) <- contr.sum(length(unique(all_agg_subjects$Bilingual)))

model <- lmer(MeanLogLT ~ ConditionC*Bilingual*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=all_agg_subjects, REML=F)
summary(model)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: MeanLogLT ~ ConditionC * Bilingual * AgeC + (1 + ConditionC +  
##     AgeC | Lab) + (1 | Subject)
##    Data: all_agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -228.2   -143.4    130.1   -260.2     1464 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4108 -0.6649 -0.0200  0.6500  3.4557 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr       
##  Subject  (Intercept) 0.000000 0.00000             
##  Lab      (Intercept) 0.005266 0.07257             
##           ConditionC  0.005734 0.07572   0.24      
##           AgeC        0.000811 0.02848  -0.48  0.30
##  Residual             0.046393 0.21539             
## Number of obs: 1480, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##                              Estimate Std. Error t value
## (Intercept)                 1.6647332  0.0226689   73.44
## ConditionC                  0.0810985  0.0270180    3.00
## Bilingual1                  0.0390717  0.0123599    3.16
## AgeC                        0.0148000  0.0096759    1.53
## ConditionC:Bilingual1       0.0335077  0.0205000    1.63
## ConditionC:AgeC             0.0018924  0.0094660    0.20
## Bilingual1:AgeC            -0.0007156  0.0056052   -0.13
## ConditionC:Bilingual1:AgeC  0.0100267  0.0097042    1.03
## 
## Correlation of Fixed Effects:
##             (Intr) CndtnC Blngl1 AgeC   CnC:B1 CnC:AC Bl1:AC
## ConditionC   0.125                                          
## Bilingual1  -0.416 -0.006                                   
## AgeC        -0.266  0.149 -0.160                            
## CndtnC:Bln1 -0.019 -0.652  0.007 -0.026                     
## CondtnC:AgC  0.030  0.175 -0.018  0.024 -0.229              
## Bilngl1:AgC -0.060 -0.013  0.141 -0.502  0.014 -0.007       
## CndtC:B1:AC  0.014 -0.147  0.028  0.005  0.190 -0.806  0.022
model_null <- lmer(MeanLogLT ~ ConditionC*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=all_agg_subjects, REML=F)

anova(model,model_null)
## Data: all_agg_subjects
## Models:
## model_null: MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) + 
## model_null:     (1 | Subject)
## model: MeanLogLT ~ ConditionC * Bilingual * AgeC + (1 + ConditionC + 
## model:     AgeC | Lab) + (1 | Subject)
##            Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)  
## model_null 12 -224.53 -160.93 124.26  -248.53                           
## model      16 -228.24 -143.44 130.12  -260.24 11.715      4     0.0196 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# post-hoc least-squares contrasts
lstrends(model, ~ Bilingual, var="ConditionC", adjust="none")
##  Bilingual ConditionC.trend         SE     df    lower.CL  upper.CL
##  FALSE           0.11460620 0.02178942  23.13  0.06954487 0.1596675
##  TRUE            0.04759074 0.04469517 239.75 -0.04045465 0.1356361
## 
## Confidence level used: 0.95