1 Comparison between Experiments 2 and 3

This file presents the analyses reported in the manuscript titled “Lexical stress representation in Greek spoken word recognition” by Andrikopoulou et al. See the file “mega script.R” for pre-processing of raw data and additional analyses.

load("../Experiment 2/newcordat.rda") 
newcordat -> ncd2
load("../Experiment 3/newcordat.rda") 
newcordat -> ncd3
rm(newcordat)
ncd2$Exp <- "Exp2"
levels(ncd2$Subject) <- paste("Exp2",levels(ncd2$Subject),sep="")
ncd3$Exp <- "Exp3"
levels(ncd3$Subject) <- paste("Exp3",levels(ncd3$Subject),sep="")
ncd2 <- ncd2[,c("Exp","Subject","TRIAL_INDEX","str_type","Item","Time","TargetLooks")]
ncd3 <- ncd3[,c("Exp","Subject","TRIAL_INDEX","str_type","Item","Time","TargetLooks")]
ncd <- rbind(ncd2,ncd3)
rm(ncd2,ncd3)
ncd$Exp <- as.factor(ncd$Exp)

We first fit a model with just a fixed effect of experiment, i.e., a difference in the average relative proportions of looks to targets vs. distractors.

if (FALSE) {
  m2a <- bam(TargetLooks ~ str_type + s(TRIAL_INDEX) + Exp 
              +s(Time)
              +s(Time, by=str_type, m = 1)
              +s(Time, Subject, bs = 'fs', m = 1)
              +s(Time, Item, bs = 'fs', m = 1),
              data = ncd, family = "binomial",
              cluster = cl)
} else { load("model_m2a.rda") }
summary(m2a)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## TargetLooks ~ str_type + s(TRIAL_INDEX) + Exp + s(Time) + s(Time, 
##     by = str_type, m = 1) + s(Time, Subject, bs = "fs", m = 1) + 
##     s(Time, Item, bs = "fs", m = 1)
## 
## Parametric coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          1.0247     0.1918   5.343 9.14e-08 ***
## str_typefirstthird   0.3492     0.1997   1.749   0.0804 .  
## str_typesecfirst     0.1203     0.2025   0.594   0.5523    
## str_typesecthird     0.2396     0.2026   1.183   0.2369    
## str_typethirdfirst   0.1372     0.1947   0.705   0.4810    
## str_typethirdsec     0.1793     0.2169   0.827   0.4085    
## ExpExp3             -0.3929     0.1851  -2.122   0.0338 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                  edf  Ref.df    Chi.sq  p-value    
## s(TRIAL_INDEX)             8.992e+00   9.000 4.124e+03  < 2e-16 ***
## s(Time)                    6.133e+00   6.412 2.876e+02  < 2e-16 ***
## s(Time):str_typefirstsec   6.465e-03   8.000 4.000e-03  < 2e-16 ***
## s(Time):str_typefirstthird 7.949e-03   8.000 6.000e-03  < 2e-16 ***
## s(Time):str_typesecfirst   6.307e-03   8.000 4.000e-03  < 2e-16 ***
## s(Time):str_typesecthird   6.174e-03   8.000 4.000e-03  < 2e-16 ***
## s(Time):str_typethirdfirst 5.241e-03   8.000 2.000e-03 3.19e-07 ***
## s(Time):str_typethirdsec   5.072e+00   8.000 1.380e+01  < 2e-16 ***
## s(Time,Subject)            6.735e+02 700.000 1.228e+05  < 2e-16 ***
## s(Time,Item)               9.035e+02 947.000 1.225e+05  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.173   Deviance explained = 16.4%
## fREML = 1.6485e+06  Scale est. = 1         n = 204806

We then fit a model with the addition of a Time smooth by experiment, i.e., we allow the average within-trial temporal course to vary between experiments (equally for all stress patterns conditions).

if (FALSE) {
  m2b <- bam(TargetLooks ~ str_type + s(TRIAL_INDEX, k=12) + Exp 
           +s(Time, by=Exp)
           +s(Time, by=str_type, m = 1)
           +s(Time, Subject, bs = 'fs', m = 1)
           +s(Time, Item, bs = 'fs', m = 1),
           data = ncd, family = "binomial",
           cluster = cl)
} else { load("model_m2b.rda") }
summary(m2b)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time, 
##     by = Exp) + s(Time, by = str_type, m = 1) + s(Time, Subject, 
##     bs = "fs", m = 1) + s(Time, Item, bs = "fs", m = 1)
## 
## Parametric coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         0.95113    0.19599   4.853 1.22e-06 ***
## str_typefirstthird  0.29112    0.19687   1.479    0.139    
## str_typesecfirst    0.01269    0.19969   0.064    0.949    
## str_typesecthird    0.17501    0.19974   0.876    0.381    
## str_typethirdfirst  0.13679    0.19187   0.713    0.476    
## str_typethirdsec    0.17357    0.21431   0.810    0.418    
## ExpExp3            -0.15071    0.20572  -0.733    0.464    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                  edf  Ref.df    Chi.sq  p-value    
## s(TRIAL_INDEX)             1.098e+01  11.000 4.669e+03  < 2e-16 ***
## s(Time):ExpExp2            5.751e+00   6.061 1.960e+02  < 2e-16 ***
## s(Time):ExpExp3            4.923e+00   5.263 1.106e+02  < 2e-16 ***
## s(Time):str_typefirstsec   6.328e-03   8.000 4.000e-03  < 2e-16 ***
## s(Time):str_typefirstthird 9.603e-03   8.000 7.000e-03  < 2e-16 ***
## s(Time):str_typesecfirst   1.168e-02   8.000 8.000e-03  < 2e-16 ***
## s(Time):str_typesecthird   1.346e-02   8.000 9.000e-03  < 2e-16 ***
## s(Time):str_typethirdfirst 1.756e-03   8.000 1.000e-03 1.77e-07 ***
## s(Time):str_typethirdsec   5.140e+00   8.000 1.432e+01  < 2e-16 ***
## s(Time,Subject)            6.717e+02 700.000 1.228e+05  < 2e-16 ***
## s(Time,Item)               9.020e+02 947.000 1.222e+05  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.173   Deviance explained = 16.4%
## fREML = 1.6485e+06  Scale est. = 1         n = 204806

In direct comparison, the more complex model is not preferred (the difference is miniscule).

compareML(m2a,m2b)
## m2a: TargetLooks ~ str_type + s(TRIAL_INDEX) + Exp + s(Time) + s(Time, 
##     by = str_type, m = 1) + s(Time, Subject, bs = "fs", m = 1) + 
##     s(Time, Item, bs = "fs", m = 1)
## 
## m2b: TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time, 
##     by = Exp) + s(Time, by = str_type, m = 1) + s(Time, Subject, 
##     bs = "fs", m = 1) + s(Time, Item, bs = "fs", m = 1)
## 
## Model m2a preferred: lower fREML score (3.063), and lower df (2.000).
## -----
##   Model   Score Edf Difference     Df
## 1   m2b 1648539  23                  
## 2   m2a 1648536  21      3.063 -2.000
## 
## AIC difference: 535.86, model m2b has lower AIC.
## Warning in compareML(m2a, m2b): Only small difference in fREML...

We then define an interaction factor, by expanding the experiment factor over stress conditions, so that we can fit a smooth for each combination. So we fit a more complex model by adding a smooth for each combination of experiment and stress condition over the smooths per experiment and the smooths per condition.

ncd$str_typeExp <- as.factor(paste(ncd$str_type,ncd$Exp,sep=""))
if (FALSE) {
  m2c <- bam(TargetLooks ~ str_type + s(TRIAL_INDEX, k=12) + Exp 
           +s(Time, by=Exp)
           +s(Time, by=str_type)
           +s(Time, by=str_typeExp, m = 1)
           +s(Time, Subject, bs = 'fs', m = 1)
           +s(Time, Item, bs = 'fs', m = 1),
           data = ncd, family = "binomial",
           cluster = cl)
} else { load("model_m2c.rda") }
summary(m2c)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time, 
##     by = Exp) + s(Time, by = str_type) + s(Time, by = str_typeExp, 
##     m = 1) + s(Time, Subject, bs = "fs", m = 1) + s(Time, Item, 
##     bs = "fs", m = 1)
## 
## Parametric coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         0.95511    0.20058   4.762 1.92e-06 ***
## str_typefirstthird  0.32372    0.20617   1.570    0.116    
## str_typesecfirst    0.01155    0.22147   0.052    0.958    
## str_typesecthird    0.08134    0.22112   0.368    0.713    
## str_typethirdfirst  0.19838    0.19700   1.007    0.314    
## str_typethirdsec    0.15987    0.22105   0.723    0.470    
## ExpExp3            -0.15869    0.20505  -0.774    0.439    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                         edf    Ref.df    Chi.sq  p-value    
## s(TRIAL_INDEX)                    1.098e+01  10.99985 4.669e+03  < 2e-16 ***
## s(Time):ExpExp2                   5.444e+00   5.76083 9.320e+01  < 2e-16 ***
## s(Time):ExpExp3                   4.392e+00   4.73413 3.627e+01 7.82e-07 ***
## s(Time):str_typefirstsec          1.615e-02   0.02065 2.000e-03   0.9663    
## s(Time):str_typefirstthird        1.407e+00   1.49799 6.880e-01   0.4711    
## s(Time):str_typesecfirst          5.356e+00   5.69562 1.721e+00   0.9462    
## s(Time):str_typesecthird          5.109e+00   5.46254 7.503e+00   0.3274    
## s(Time):str_typethirdfirst        1.003e+00   1.00432 1.620e+00   0.2041    
## s(Time):str_typethirdsec          4.784e+00   5.14179 1.272e+01   0.0303 *  
## s(Time):str_typeExpfirstsecExp2   5.926e-01   8.00000 6.470e-01  < 2e-16 ***
## s(Time):str_typeExpfirstsecExp3   3.519e+00   8.00000 6.463e+00  < 2e-16 ***
## s(Time):str_typeExpfirstthirdExp2 7.732e-03   8.00000 6.000e-03 1.16e-14 ***
## s(Time):str_typeExpfirstthirdExp3 6.432e-01   8.00000 6.930e-01  < 2e-16 ***
## s(Time):str_typeExpsecfirstExp2   5.283e-01   8.00000 5.660e-01 4.16e-13 ***
## s(Time):str_typeExpsecfirstExp3   1.599e-03   8.00000 1.000e-03 4.44e-05 ***
## s(Time):str_typeExpsecthirdExp2   2.793e-03   8.00000 0.000e+00   0.0166 *  
## s(Time):str_typeExpsecthirdExp3   9.887e-03   8.00000 7.000e-03 4.51e-10 ***
## s(Time):str_typeExpthirdfirstExp2 2.636e-03   8.00000 1.000e-03 1.70e-08 ***
## s(Time):str_typeExpthirdfirstExp3 2.096e-02   8.00000 1.700e-02  < 2e-16 ***
## s(Time):str_typeExpthirdsecExp2   1.621e-02   8.00000 1.500e-02 1.77e-11 ***
## s(Time):str_typeExpthirdsecExp3   1.471e+00   8.00000 1.821e+00  < 2e-16 ***
## s(Time,Subject)                   6.717e+02 700.00000 1.228e+05  < 2e-16 ***
## s(Time,Item)                      8.876e+02 947.00000 1.182e+05  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 1841/1842
## R-sq.(adj) =  0.173   Deviance explained = 16.4%
## fREML = 1.6486e+06  Scale est. = 1         n = 204806

This model accounts for the exact same proportion of deviance as the preceding one. The additional curves are essentially flat. These are the model smooths:

par(mgp=c(1.5,0.35,0))
par(tcl=-0.2)
par(mai=c(0.5,0.5,0.1,0.1))
par(mai=c(0.35,0.35,0.1,0.1))
plot.gam(m2c,page=2) -> c2

Are these smooths justified, consistent with different effects of (some) stress conditions between experiments? If not, then the stress effects are the same across experiments.

compareML(m2c,m2a)
## m2c: TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time, 
##     by = Exp) + s(Time, by = str_type) + s(Time, by = str_typeExp, 
##     m = 1) + s(Time, Subject, bs = "fs", m = 1) + s(Time, Item, 
##     bs = "fs", m = 1)
## 
## m2a: TargetLooks ~ str_type + s(TRIAL_INDEX) + Exp + s(Time) + s(Time, 
##     by = str_type, m = 1) + s(Time, Subject, bs = "fs", m = 1) + 
##     s(Time, Item, bs = "fs", m = 1)
## 
## Model m2a preferred: lower fREML score (46.785), and lower df (20.000).
## -----
##   Model   Score Edf Difference     Df
## 1   m2c 1648583  41                  
## 2   m2a 1648536  21    -46.785 20.000
## 
## AIC difference: -575.90, model m2c has lower AIC.
compareML(m2c,m2b)
## m2c: TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time, 
##     by = Exp) + s(Time, by = str_type) + s(Time, by = str_typeExp, 
##     m = 1) + s(Time, Subject, bs = "fs", m = 1) + s(Time, Item, 
##     bs = "fs", m = 1)
## 
## m2b: TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time, 
##     by = Exp) + s(Time, by = str_type, m = 1) + s(Time, Subject, 
##     bs = "fs", m = 1) + s(Time, Item, bs = "fs", m = 1)
## 
## Model m2b preferred: lower fREML score (43.722), and lower df (18.000).
## -----
##   Model   Score Edf Difference     Df
## 1   m2c 1648583  41                  
## 2   m2b 1648539  23    -43.722 18.000
## 
## AIC difference: -40.04, model m2c has lower AIC.

The least complex model has lower fREML with fewer degrees of freedom and is therefore preferred.

1.1 Graphical comparison

The following graph plots the two experiment-specific smooths from the latest (most complex) model, adjusted for intercept and fixed effects of experiment, along with the corresponding confidence intervals.

par(mfrow=c(1,1))
plot (NA,NA,type="n",xlim=c(0,1000),ylim=c(0,3),xlab="Time (ms)",ylab="Target vs. competitor looks (log odds)",las=1)
polygon(c(c2[[2]]$x,rev(c2[[2]]$x)),c(c2[[2]]$fit[,1]+m2c$coefficients[1]-c2[[2]]$se,rev(c2[[2]]$fit[,1]+m2c$coefficients[1]+c2[[2]]$se)),col=rgb(0.5,0.5,1,0.5),border=NA)
polygon(c(c2[[3]]$x,rev(c2[[3]]$x)),c(c2[[3]]$fit[,1]+m2c$coefficients[1]+m2c$coefficients[7]-c2[[3]]$se,rev(c2[[3]]$fit[,1]+m2c$coefficients[1]+m2c$coefficients[7]+c2[[3]]$se)),col=rgb(0.5,1,0.5,0.5),border=NA)
lines(c2[[2]]$x,c2[[2]]$fit[,1]+m2c$coefficients[1],col="blue",lwd=3)
lines(c2[[3]]$x,c2[[3]]$fit[,1]+m2c$coefficients[1]+m2c$coefficients[7],col="darkgreen",lwd=3)
text( 970,3,"Experiment 2",col="blue",pos=2,cex=1.25)
text(1020,1.25,"Experiment 3",col="darkgreen",pos=2,cex=1.25)

The difference between these two smooths becomes statistically significant after 818 ms.

plot_diff(m2c,view="Time",comp=list(Exp=c("Exp2","Exp3")),rm.ranef=T,main="Experiment 2 minus Experiment 3",ylab="Difference in target looks (logits)",ylim=c(-1,1.5),hide.label=T,print.summary=F)

1.2 Session information

sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] itsadug_2.3       plotfunctions_1.3 mgcv_1.8-31       nlme_3.1-144     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.3      lattice_0.20-40 digest_0.6.25   grid_3.6.2     
##  [5] magrittr_1.5    evaluate_0.14   rlang_0.4.2     stringi_1.4.6  
##  [9] Matrix_1.2-18   rmarkdown_2.1   splines_3.6.2   tools_3.6.2    
## [13] stringr_1.4.0   xfun_0.12       yaml_2.2.1      compiler_3.6.2 
## [17] htmltools_0.4.0 knitr_1.28