Preliminaries

options(dplyr.width = Inf)
knitr::opts_chunk$set(message = FALSE, warning = FALSE, cache=TRUE)

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lme4)

## Loading required package: Matrix

library(tidyr)

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:Matrix':
## 
##     expand

library(magrittr)

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:tidyr':
## 
##     extract

library(lsmeans)

## Warning: package 'lsmeans' was built under R version 3.2.4

## Loading required package: estimability

library(langcog)

## 
## Attaching package: 'langcog'

## The following object is masked from 'package:base':
## 
##     scale

theme_manylabs <- theme_bw() +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 16, family="Arial"),
        text = element_text(family="Arial"),
        legend.key = element_rect(fill = "navy"),
        legend.background = element_rect(fill = "white"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line.y = element_line(),
        axis.line.x = element_line(),
        strip.background = element_rect(fill = "white", colour = NA),
        strip.text.x = element_text(size = 14, face = "bold", colour = "black"))

Simulate Data

Create basics of data.

data <- expand.grid(Lab = factor(paste0('Lab',1:20)),
                    Subject = 1:30,
                    Trial = 1:16)

Simulation sequence:

assign block
randomly assign conditions within blocks
randomly generate looking times within constraints
do it lab-by-lab and then add some subject attention diffs
for (1) realism and (2) convergence issues
max 20s looking per trial
randomly generate ages

removed this from first batch and created a second batch varyng those two factors + randomly generate method and participant vars (session, language, bilingual)

blocks <- factor(c('IDS','IDS','ADS','ADS'))
langs <- factor(c('American English','French','Spanish','German','Japanese', 'British English'))
methods <- factor(c('HPP','SingleScreen','AnotherMethod'))
sessions <- factor(c('First','Second'))
bilingual <- c(TRUE, FALSE)

lt_max <- 20        # max LT in a trial

data %<>%
  mutate(Subject = factor(toupper(paste0(Lab,'-',Subject))),
         Block = ((Trial-1) %/% 4) + 1) %>%
  group_by(Subject,Block) %>%
  mutate(Condition = sample(blocks)) %>%
  group_by(Lab) %>%
  mutate(.LabDiff = runif(1, 0, .5)) %>%
  ungroup() %>%
  mutate(LT = ifelse(Condition == 'IDS', 
                     rlnorm(n(), 1.5 + .LabDiff, .7), 
                     rlnorm(n(), 1.5, .7))) %>%
  group_by(Subject) %>%
  mutate(LT = LT + runif(1,0,.5)) %>%
  ungroup() %>%
  mutate(LT = ifelse(LT > lt_max, lt_max, LT)) %>%
  group_by(Lab) %>%
  mutate(.LabMeanAge = round(runif(1, 3, 12))) %>%
  ungroup() %>%
  group_by(Subject) %>%
  mutate(Age = round(runif(1, .LabMeanAge-.5, .LabMeanAge+.5),2)) %>%
  ungroup() %>%
  group_by(Lab) %>%
  mutate(Method = sample(methods,1)) %>%
  ungroup() %>%
  group_by(Subject) %>%
  mutate(Session = "First",
         Language = sample(langs, 1),
         Bilingual = FALSE) %>%
  arrange(Lab,Subject,Trial) %>%
  select(-starts_with("."))

Additional data labs contribute after testing the “promised” children is complete.

Number of labs: 5 (instead of 20)
Lab-specific difference decreased (from 0-.5 to 0-.25)
Looking times slightly decreased

additional_data <- expand.grid(Lab = factor(paste0('Lab',1:5)),
                    Subject = 1:30,
                    Trial = 1:16)

additional_data  %<>%
  mutate(Subject = factor(toupper(paste0(Lab,'-',Subject))),
         Block = ((Trial-1) %/% 4) + 1) %>%
  group_by(Subject,Block) %>%
  mutate(Condition = sample(blocks)) %>%
  group_by(Lab) %>%
  mutate(.LabDiff = runif(1, .0, .25)) %>%
  ungroup() %>%
  mutate(LT = ifelse(Condition == 'IDS', 
                     rlnorm(n(), 1.1 + .LabDiff, .8), 
                     rlnorm(n(), 1.1, .8))) %>%
  group_by(Subject) %>%
  mutate(LT = LT + runif(1,0,.5)) %>%
  ungroup() %>%
  mutate(LT = ifelse(LT > lt_max, lt_max, LT)) %>%
  group_by(Lab) %>%
  mutate(.LabMeanAge = round(runif(1, 3, 12))) %>%
  ungroup() %>%
  group_by(Subject) %>%
  mutate(Age = round(runif(1, .LabMeanAge-.5, .LabMeanAge+.5),2)) %>%
  ungroup() %>%
  group_by(Lab) %>%
  mutate(Method = sample(methods,1)) %>%
  ungroup() %>%
  group_by(Subject) %>%
  mutate(Session = sample(sessions,1),
         Language = sample(langs, 1),
         Bilingual = sample(bilingual, 1)) %>%
  arrange(Lab,Subject,Trial) %>%
  select(-starts_with("."))

Data Cleaning

exclude outliers based on log-transformed LT
filter based on minimum trials of each type
remove hidden fields

lt_min <- 2                # minumum LT for inclusion
z_threshold <- 3           # outlier threshold (sds)
min_trials_per_type <- 4   # min trials per type for inclusion

data_clean <- data %>%
  filter(LT >= lt_min) %>%
  group_by(Subject) %>%
  mutate(log_lt = log(LT), 
         .scaled_log_lt = as.numeric(langcog::scale(log_lt))) %>%
  filter(abs(.scaled_log_lt) < z_threshold) %>%
  group_by(Subject) %>%
  mutate(.N_IDS = sum(Condition == "IDS"),
         .N_ADS = sum(Condition == "ADS")) %>%
  filter(.N_IDS >= min_trials_per_type & 
           .N_ADS >= min_trials_per_type) %>%
  select(-starts_with("."))

additional_data_clean <- additional_data %>%
  filter(LT >= lt_min) %>%
  group_by(Subject) %>%
  mutate(log_lt = log(LT), 
         .scaled_log_lt = as.numeric(langcog::scale(log_lt))) %>%
  filter(abs(.scaled_log_lt) < z_threshold) %>%
  group_by(Subject) %>%
  mutate(.N_IDS = sum(Condition == "IDS"),
         .N_ADS = sum(Condition == "ADS")) %>%
  filter(.N_IDS >= min_trials_per_type & 
           .N_ADS >= min_trials_per_type) %>%
  select(-starts_with("."))

Join two datasets for further analyses

all_data_clean = rbind(data_clean, additional_data_clean)

Distributions

“initial” data

ggplot(data_clean, aes(x = LT)) + 
  geom_histogram() + 
  facet_grid(~Condition)

all data

ggplot(all_data_clean, aes(x = LT)) + 
  geom_histogram() + 
  facet_grid(~Condition)

Transformation

ggplot(data_clean, aes(x=log_lt)) + 
  geom_histogram() + 
  facet_grid(~Condition)

Create Aggregated Datasets

agg_subjects <- data_clean %>%
  group_by(Lab, Method, Subject, Language, Condition, Age) %>%
  summarise(MeanLogLT = mean(log_lt)) %>%
  mutate(ConditionC = ifelse(Condition == "IDS", .5, -.5)) %>%
  mutate(Native = ifelse(Language == "American English", TRUE, FALSE))

agg_subjects_paired <- agg_subjects %>%
  select(-ConditionC) %>%
  spread(Condition, MeanLogLT) %>%
  mutate(Diff = IDS - ADS, 
         Prop = IDS / (IDS + ADS))

all_agg_subjects <- all_data_clean %>%
  group_by(Lab, Method, Session, Subject, Language, Bilingual, Condition, Age) %>%
  summarise(MeanLogLT = mean(log_lt)) %>%
  mutate(ConditionC = ifelse(Condition == "IDS", .5, -.5)) %>%
  mutate(Native = ifelse(Language == "American English", TRUE, FALSE))

all_agg_subjects_paired <- all_agg_subjects %>%
  select(-ConditionC) %>%
  spread(Condition, MeanLogLT) %>%
  mutate(Diff = IDS - ADS, 
         Prop = IDS / (IDS + ADS))

Hypothesis Tests

Overall Preference for IDS v ADS

ggplot(agg_subjects, aes(x=Condition, y=MeanLogLT, group=Subject)) +
  geom_boxplot() +
  geom_jitter(width = .25, size =.5) +
  theme_manylabs

ggplot(agg_subjects_paired, aes(x='Overall', y=Diff)) +
  geom_boxplot() +
  geom_jitter(width = .25, size =.5) +
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  theme_manylabs

t.test(agg_subjects_paired$Diff)

## 
##  One Sample t-test
## 
## data:  agg_subjects_paired$Diff
## t = 10.071, df = 599, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1021410 0.1516268
## sample estimates:
## mean of x 
## 0.1268839

mean(agg_subjects_paired$Diff) / sd(agg_subjects_paired$Diff)

## [1] 0.411156

model <- lmer(MeanLogLT ~ ConditionC + 
                (ConditionC | Lab) + 
                (1 | Subject), 
              data=agg_subjects, REML=FALSE)

summary(model)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: MeanLogLT ~ ConditionC + (ConditionC | Lab) + (1 | Subject)
##    Data: agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -285.5   -249.9    149.8   -299.5     1193 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.91446 -0.67929 -0.03054  0.65325  3.12569 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 0.001024 0.03200      
##  Lab      (Intercept) 0.001487 0.03856      
##           ConditionC  0.009001 0.09488  0.82
##  Residual             0.043038 0.20746      
## Number of obs: 1200, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  1.74427    0.01058  164.89
## ConditionC   0.12688    0.02436    5.21
## 
## Correlation of Fixed Effects:
##            (Intr)
## ConditionC 0.582

drop1(model,~.,test="Chi")

## Single term deletions
## 
## Model:
## MeanLogLT ~ ConditionC + (ConditionC | Lab) + (1 | Subject)
##            Df     AIC    LRT  Pr(Chi)    
## <none>        -285.51                    
## ConditionC  1 -270.37 17.141 3.47e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Lab Variability

ggplot(agg_subjects_paired, aes(x=Lab, y=Diff)) +
  stat_summary(fun.y='mean', geom='point') +
  stat_summary(fun.data='mean_cl_normal', geom='errorbar', width=.1, fun.args=list(mult=2)) +
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  coord_flip() +
  theme_manylabs

model <- lmer(MeanLogLT ~ ConditionC + (1 + ConditionC | Lab) + (1 | Subject), data=agg_subjects, REML=F)
fixed_effect <- fixef(model)[['ConditionC']]

lab_ranefs <- data.frame(Lab = factor(rownames(ranef(model)$Lab), 
                                      levels=rownames(ranef(model)$Lab)),
                         ConditionRanef = ranef(model)$Lab$ConditionC + 
                           fixed_effect)

ggplot(lab_ranefs, aes(x=Lab, y=ConditionRanef, group=Lab)) +
  geom_point() +
  geom_errorbar(aes(ymin=fixed_effect, ymax=ConditionRanef), width=.1) +
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  geom_hline(yintercept=fixed_effect, linetype="solid", alpha=.5) +
  scale_x_discrete('') +
  scale_y_continuous('Random Effect') +
  coord_flip() +
  theme_manylabs

Does IDS preference change by age?

agg_subjects %<>%
  ungroup() %>%
  mutate(AgeC = Age - mean(Age))

ggplot(agg_subjects_paired, aes(x=Age, y=Diff)) +
  geom_point() +
  stat_smooth() +
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  theme_manylabs

model <- lmer(MeanLogLT ~ ConditionC*AgeC + 
                (1 + ConditionC + AgeC | Lab) + 
                (1 | Subject), data=agg_subjects, 
              REML=FALSE)
summary(model)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) +  
##     (1 | Subject)
##    Data: agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -276.7   -215.6    150.3   -300.7     1188 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.89873 -0.68124 -0.03555  0.65026  3.14672 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr     
##  Subject  (Intercept) 1.046e-03 0.032342          
##  Lab      (Intercept) 1.124e-03 0.033532          
##           ConditionC  8.672e-03 0.093124 0.79     
##           AgeC        1.494e-05 0.003865 0.86 0.99
##  Residual             4.305e-02 0.207483          
## Number of obs: 1200, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)     1.7435936  0.0099751  174.79
## ConditionC      0.1268839  0.0240230    5.28
## AgeC            0.0009819  0.0032051    0.31
## ConditionC:AgeC 0.0044125  0.0080797    0.55
## 
## Correlation of Fixed Effects:
##             (Intr) CndtnC AgeC 
## ConditionC  0.512              
## AgeC        0.305  0.232       
## CondtnC:AgC 0.203  0.000  0.424

drop1(model,~.,test="Chi")

## Single term deletions
## 
## Model:
## MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) + 
##     (1 | Subject)
##                 Df     AIC     LRT   Pr(Chi)    
## <none>             -276.69                      
## ConditionC       1 -261.29 17.4094 3.013e-05 ***
## AgeC             1 -278.61  0.0873    0.7676    
## ConditionC:AgeC  1 -278.41  0.2825    0.5951    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Does quadratic age term improve the fit?

# model with linear+quadratic random effect of age, but only linear fixed effect
model <- lmer(MeanLogLT ~ ConditionC*poly(AgeC,1) + 
                (1 + ConditionC + poly(AgeC,2) | Lab) + 
                (1 | Subject), 
              data=agg_subjects, REML=FALSE)

# model with linear+quadratic random and fixed effects of age
model_2 <- lmer(MeanLogLT ~ ConditionC*poly(AgeC,2) + 
                  (1 + ConditionC + poly(AgeC,2) | Lab) + 
                  (1 | Subject), data=agg_subjects, 
                REML=FALSE)

anova(model,model_2)

## Data: agg_subjects
## Models:
## model: MeanLogLT ~ ConditionC * poly(AgeC, 1) + (1 + ConditionC + poly(AgeC, 
## model:     2) | Lab) + (1 | Subject)
## model_2: MeanLogLT ~ ConditionC * poly(AgeC, 2) + (1 + ConditionC + poly(AgeC, 
## model_2:     2) | Lab) + (1 | Subject)
##         Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## model   16 -271.83 -190.39 151.92  -303.83                         
## model_2 18 -268.21 -176.58 152.10  -304.21 0.3703      2      0.831

Are there trial order effects?

data_clean %<>%
  ungroup() %>%
  mutate(TrialC = Trial - mean(Trial),
         ConditionC = ifelse(Condition == "IDS", .5, -.5),
         AgeC = Age - mean(Age))

ggplot(data_clean, aes(x=Trial, y=log_lt, color=Condition)) +
  stat_summary(fun.y='mean', geom='point') +
  stat_summary(fun.y='mean', geom='line') +
  stat_summary(fun.data='mean_cl_normal', geom='errorbar', width=.1, fun.args=list(mult=2)) +
  theme_manylabs

# model <- lmer(log_lt ~ ConditionC*AgeC*TrialC + 
#                 (1 + ConditionC + AgeC + TrialC | Lab) + 
#                 (1 + TrialC + ConditionC | Subject), 
#               data=data_clean, REML=FALSE)
# summary(model)
# drop1(model,~.,test="Chi")

Moderator Analyses

Method

Note age | lab doesn’t converge.

#NB: for some reason I cannot knit without adding this bit again. 
agg_subjects %<>%
  ungroup() %>%
  mutate(AgeC = Age - mean(Age))

ggplot(agg_subjects_paired, aes(x=Method, y=Diff)) +
  geom_boxplot() +
  geom_jitter(width = .25, size =.5) +
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  theme_manylabs

contrasts(agg_subjects$Method) <- contr.sum(length(unique(agg_subjects$Method)))

model <- lmer(MeanLogLT ~ ConditionC * Method * AgeC + 
                (1 + ConditionC  | Lab) + 
                (1 | Subject), 
              data=agg_subjects, REML=FALSE)
summary(model)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: 
## MeanLogLT ~ ConditionC * Method * AgeC + (1 + ConditionC | Lab) +  
##     (1 | Subject)
##    Data: agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -288.1   -201.5    161.0   -322.1     1183 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0178 -0.6766 -0.0249  0.6659  3.1945 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr
##  Subject  (Intercept) 0.0007343 0.02710      
##  Lab      (Intercept) 0.0006466 0.02543      
##           ConditionC  0.0072901 0.08538  1.00
##  Residual             0.0429444 0.20723      
## Number of obs: 1200, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##                           Estimate Std. Error t value
## (Intercept)              1.6721338  0.0280075   59.70
## ConditionC               0.1034075  0.0694399    1.49
## Method1                  0.1031838  0.0292002    3.53
## Method2                  0.0618840  0.0288342    2.15
## AgeC                     0.0287053  0.0106858    2.69
## ConditionC:Method1       0.0331589  0.0729471    0.45
## ConditionC:Method2       0.0064572  0.0718737    0.09
## ConditionC:AgeC          0.0164965  0.0262665    0.63
## Method1:AgeC            -0.0273000  0.0109887   -2.48
## Method2:AgeC            -0.0254217  0.0109723   -2.32
## ConditionC:Method1:AgeC -0.0008044  0.0271583   -0.03
## ConditionC:Method2:AgeC -0.0255790  0.0271076   -0.94
## 
## Correlation of Fixed Effects:
##             (Intr) CndtnC Methd1 Methd2 AgeC   CnC:M1 CnC:M2 CnC:AC Mt1:AC
## ConditionC   0.417                                                        
## Method1     -0.876 -0.347                                                 
## Method2     -0.913 -0.368  0.795                                          
## AgeC        -0.917 -0.355  0.891  0.897                                   
## CndtnC:Mth1 -0.345 -0.853  0.431  0.300  0.346                            
## CndtnC:Mth2 -0.366 -0.897  0.300  0.427  0.347  0.759                     
## CondtnC:AgC -0.358 -0.900  0.351  0.352  0.401  0.872  0.878              
## Method1:AgC  0.904  0.353 -0.843 -0.885 -0.917 -0.321 -0.345 -0.355       
## Method2:AgC  0.900  0.350 -0.875 -0.860 -0.921 -0.341 -0.330 -0.358  0.841
## CndtC:M1:AC  0.354  0.885 -0.325 -0.348 -0.353 -0.815 -0.864 -0.900  0.411
## CndtC:M2:AC  0.351  0.881 -0.344 -0.333 -0.356 -0.852 -0.835 -0.906  0.312
##             Mt2:AC CC:M1:
## ConditionC               
## Method1                  
## Method2                  
## AgeC                     
## CndtnC:Mth1              
## CndtnC:Mth2              
## CondtnC:AgC              
## Method1:AgC              
## Method2:AgC              
## CndtC:M1:AC  0.312       
## CndtC:M2:AC  0.410  0.811

model_null <- lmer(MeanLogLT ~ ConditionC*AgeC + 
                     (1 + ConditionC + AgeC | Lab) + 
                     (1 | Subject), data=agg_subjects, 
                   REML=FALSE)

anova(model, model_null)

## Data: agg_subjects
## Models:
## model_null: MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) + 
## model_null:     (1 | Subject)
## model: MeanLogLT ~ ConditionC * Method * AgeC + (1 + ConditionC | Lab) + 
## model:     (1 | Subject)
##            Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## model_null 12 -276.69 -215.61 150.35  -300.69                             
## model      17 -288.07 -201.54 161.03  -322.07 21.373      5  0.0006887 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# post-hoc least-squares contrasts
lstrends(model, ~ Method, var="ConditionC", adjust="none")

##  Method        ConditionC.trend         SE    df    lower.CL  upper.CL
##  AnotherMethod       0.13656641 0.04540421 26.85  0.04337977 0.2297531
##  HPP                 0.10986467 0.03768034 26.83  0.03252801 0.1872013
##  SingleScreen        0.06379142 0.22557736 83.57 -0.38482725 0.5124101
## 
## Confidence level used: 0.95

Session

Tested on all data, but very imbalanced. But: How to select subset?

all_agg_subjects %<>%
  ungroup() %>%
  mutate(AgeC = Age - mean(Age))

ggplot(all_agg_subjects_paired, aes(x=Session, y=Diff)) +
  geom_boxplot()+
  geom_jitter(width = .25, size =.5)+
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  theme_manylabs

all_agg_subjects$Session = as.factor(all_agg_subjects$Session)
contrasts(all_agg_subjects$Session) <- contr.sum(length(unique(all_agg_subjects$Session)))

model <- lmer(MeanLogLT ~ ConditionC*Session*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=all_agg_subjects, REML=F)
summary(model)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: 
## MeanLogLT ~ ConditionC * Session * AgeC + (1 + ConditionC + AgeC |  
##     Lab) + (1 | Subject)
##    Data: all_agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -233.8   -149.0    132.9   -265.8     1464 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9514 -0.6655 -0.0237  0.6497  3.5202 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr       
##  Subject  (Intercept) 0.0000000 0.00000             
##  Lab      (Intercept) 0.0057544 0.07586             
##           ConditionC  0.0056142 0.07493   0.07      
##           AgeC        0.0009693 0.03113  -0.56  0.45
##  Residual             0.0461829 0.21490             
## Number of obs: 1480, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##                          Estimate Std. Error t value
## (Intercept)              1.677279   0.023485   71.42
## ConditionC               0.049797   0.027050    1.84
## Session1                 0.026245   0.012577    2.09
## AgeC                     0.003532   0.010224    0.35
## ConditionC:Session1      0.069150   0.020691    3.34
## ConditionC:AgeC          0.004796   0.009662    0.50
## Session1:AgeC            0.012048   0.005664    2.13
## ConditionC:Session1:AgeC 0.004325   0.009921    0.44
## 
## Correlation of Fixed Effects:
##             (Intr) CndtnC Sessn1 AgeC   CnC:S1 CnC:AC Ss1:AC
## ConditionC   0.042                                          
## Session1    -0.402 -0.003                                   
## AgeC        -0.340  0.218 -0.144                            
## CndtnC:Sss1 -0.015 -0.660  0.004 -0.029                     
## CondtnC:AgC  0.036  0.121 -0.016  0.015 -0.160              
## Sessin1:AgC -0.048 -0.012  0.139 -0.493  0.013 -0.009       
## CndtC:S1:AC  0.022 -0.092  0.034 -0.007  0.122 -0.821  0.025

model_null <- lmer(MeanLogLT ~ ConditionC*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=all_agg_subjects, REML=F)

anova(model,model_null)

## Data: all_agg_subjects
## Models:
## model_null: MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) + 
## model_null:     (1 | Subject)
## model: MeanLogLT ~ ConditionC * Session * AgeC + (1 + ConditionC + AgeC | 
## model:     Lab) + (1 | Subject)
##            Df     AIC     BIC logLik deviance Chisq Chi Df Pr(>Chisq)   
## model_null 12 -224.53 -160.93 124.26  -248.53                           
## model      16 -233.76 -148.97 132.88  -265.76 17.24      4   0.001736 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# post-hoc least-squares contrasts
lstrends(model, ~ Session, var="ConditionC", adjust="none")

##  Session ConditionC.trend         SE     df    lower.CL   upper.CL
##  First         0.11894732 0.02155302  22.86  0.07434595 0.16354870
##  Second       -0.01935252 0.04505958 244.44 -0.10810712 0.06940208
## 
## Confidence level used: 0.95

Nativeness of the test (American English versus everyone else)

Tested only on the “initial” dataset, analysis conditional on meeting a minimum N for each language to be included

agg_subjects %<>%
  ungroup() %>%
  mutate(AgeC = Age - mean(Age))


ggplot(agg_subjects_paired, aes(x=Native, y=Diff)) +
  geom_boxplot()+
  geom_jitter(width = .25, size =.5)+
  geom_hline(yintercept=0, linetype="dashed", alpha=.5) +
  scale_x_discrete('') +
  theme_manylabs

contrasts(agg_subjects$Native) <- contr.sum(length(unique(agg_subjects$Native)))

model <- lmer(MeanLogLT ~ ConditionC*Native*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=agg_subjects, REML=F)
summary(model)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: 
## MeanLogLT ~ ConditionC * Native * AgeC + (1 + ConditionC + AgeC |  
##     Lab) + (1 | Subject)
##    Data: agg_subjects
## 
##      AIC      BIC   logLik deviance df.resid 
##   -270.1   -188.7    151.1   -302.1     1184 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.89672 -0.66647 -0.03374  0.64422  3.13995 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr     
##  Subject  (Intercept) 1.079e-03 0.032854          
##  Lab      (Intercept) 1.145e-03 0.033840          
##           ConditionC  8.861e-03 0.094131 0.78     
##           AgeC        1.327e-05 0.003642 0.86 0.99
##  Residual             4.295e-02 0.207236          
## Number of obs: 1200, groups:  Subject, 600; Lab, 20
## 
## Fixed effects:
##                           Estimate Std. Error t value
## (Intercept)              1.7400607  0.0111155  156.54
## ConditionC               0.1216380  0.0260154    4.68
## Native1                  0.0057668  0.0078573    0.73
## AgeC                     0.0013457  0.0036832    0.37
## ConditionC:Native1       0.0085479  0.0153529    0.56
## ConditionC:AgeC          0.0018628  0.0088963    0.21
## Native1:AgeC            -0.0005989  0.0028030   -0.21
## ConditionC:Native1:AgeC  0.0039913  0.0054757    0.73
## 
## Correlation of Fixed Effects:
##             (Intr) CndtnC Nativ1 AgeC   CnC:N1 CnC:AC Nt1:AC
## ConditionC   0.433                                          
## Native1     -0.438 -0.002                                   
## AgeC         0.226  0.178  0.007                            
## CndtnC:Ntv1 -0.004 -0.366  0.006 -0.003                     
## CondtnC:AgC  0.157 -0.001  0.001  0.345 -0.002              
## Native1:AgC  0.005 -0.002 -0.001 -0.487  0.004  0.003       
## CndtC:N1:AC -0.003  0.003  0.004 -0.002 -0.001 -0.400  0.005

model_null <- lmer(MeanLogLT ~ ConditionC*AgeC + (1 + ConditionC + AgeC | Lab) + (1 | Subject), data=agg_subjects, REML=F)

anova(model,model_null)

## Data: agg_subjects
## Models:
## model_null: MeanLogLT ~ ConditionC * AgeC + (1 + ConditionC + AgeC | Lab) + 
## model_null:     (1 | Subject)
## model: MeanLogLT ~ ConditionC * Native * AgeC + (1 + ConditionC + AgeC | 
## model:     Lab) + (1 | Subject)
##            Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## model_null 12 -276.69 -215.61 150.35  -300.69                         
## model      16 -270.11 -188.66 151.05  -302.11 1.4109      4     0.8423

# post-hoc least-squares contrasts
lstrends(model, ~ Native, var="ConditionC", adjust="none")

##  Native ConditionC.trend         SE    df   lower.CL  upper.CL
##  FALSE         0.1301858 0.02614705 24.54 0.07628317 0.1840885
##  TRUE          0.1130901 0.03567997 87.80 0.04218145 0.1839988
## 
## Confidence level used: 0.95

Native Language