This file presents the analyses reported in the manuscript titled “Lexical stress representation in Greek spoken word recognition” by Andrikopoulou et al. See the file “mega script.R” for pre-processing of raw data and additional analyses.
load("../Experiment 2/newcordat.rda")
newcordat -> ncd2
load("../Experiment 3/newcordat.rda")
newcordat -> ncd3
rm(newcordat)
ncd2$Exp <- "Exp2"
levels(ncd2$Subject) <- paste("Exp2",levels(ncd2$Subject),sep="")
ncd3$Exp <- "Exp3"
levels(ncd3$Subject) <- paste("Exp3",levels(ncd3$Subject),sep="")
ncd2 <- ncd2[,c("Exp","Subject","TRIAL_INDEX","str_type","Item","Time","TargetLooks")]
ncd3 <- ncd3[,c("Exp","Subject","TRIAL_INDEX","str_type","Item","Time","TargetLooks")]
ncd <- rbind(ncd2,ncd3)
rm(ncd2,ncd3)
ncd$Exp <- as.factor(ncd$Exp)
We first fit a model with just a fixed effect of experiment, i.e., a difference in the average relative proportions of looks to targets vs. distractors.
if (FALSE) {
m2a <- bam(TargetLooks ~ str_type + s(TRIAL_INDEX) + Exp
+s(Time)
+s(Time, by=str_type, m = 1)
+s(Time, Subject, bs = 'fs', m = 1)
+s(Time, Item, bs = 'fs', m = 1),
data = ncd, family = "binomial",
cluster = cl)
} else { load("model_m2a.rda") }
summary(m2a)
##
## Family: binomial
## Link function: logit
##
## Formula:
## TargetLooks ~ str_type + s(TRIAL_INDEX) + Exp + s(Time) + s(Time,
## by = str_type, m = 1) + s(Time, Subject, bs = "fs", m = 1) +
## s(Time, Item, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0247 0.1918 5.343 9.14e-08 ***
## str_typefirstthird 0.3492 0.1997 1.749 0.0804 .
## str_typesecfirst 0.1203 0.2025 0.594 0.5523
## str_typesecthird 0.2396 0.2026 1.183 0.2369
## str_typethirdfirst 0.1372 0.1947 0.705 0.4810
## str_typethirdsec 0.1793 0.2169 0.827 0.4085
## ExpExp3 -0.3929 0.1851 -2.122 0.0338 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(TRIAL_INDEX) 8.992e+00 9.000 4.124e+03 < 2e-16 ***
## s(Time) 6.133e+00 6.412 2.876e+02 < 2e-16 ***
## s(Time):str_typefirstsec 6.465e-03 8.000 4.000e-03 < 2e-16 ***
## s(Time):str_typefirstthird 7.949e-03 8.000 6.000e-03 < 2e-16 ***
## s(Time):str_typesecfirst 6.307e-03 8.000 4.000e-03 < 2e-16 ***
## s(Time):str_typesecthird 6.174e-03 8.000 4.000e-03 < 2e-16 ***
## s(Time):str_typethirdfirst 5.241e-03 8.000 2.000e-03 3.19e-07 ***
## s(Time):str_typethirdsec 5.072e+00 8.000 1.380e+01 < 2e-16 ***
## s(Time,Subject) 6.735e+02 700.000 1.228e+05 < 2e-16 ***
## s(Time,Item) 9.035e+02 947.000 1.225e+05 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.173 Deviance explained = 16.4%
## fREML = 1.6485e+06 Scale est. = 1 n = 204806
We then fit a model with the addition of a Time smooth by experiment, i.e., we allow the average within-trial temporal course to vary between experiments (equally for all stress patterns conditions).
if (FALSE) {
m2b <- bam(TargetLooks ~ str_type + s(TRIAL_INDEX, k=12) + Exp
+s(Time, by=Exp)
+s(Time, by=str_type, m = 1)
+s(Time, Subject, bs = 'fs', m = 1)
+s(Time, Item, bs = 'fs', m = 1),
data = ncd, family = "binomial",
cluster = cl)
} else { load("model_m2b.rda") }
summary(m2b)
##
## Family: binomial
## Link function: logit
##
## Formula:
## TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time,
## by = Exp) + s(Time, by = str_type, m = 1) + s(Time, Subject,
## bs = "fs", m = 1) + s(Time, Item, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.95113 0.19599 4.853 1.22e-06 ***
## str_typefirstthird 0.29112 0.19687 1.479 0.139
## str_typesecfirst 0.01269 0.19969 0.064 0.949
## str_typesecthird 0.17501 0.19974 0.876 0.381
## str_typethirdfirst 0.13679 0.19187 0.713 0.476
## str_typethirdsec 0.17357 0.21431 0.810 0.418
## ExpExp3 -0.15071 0.20572 -0.733 0.464
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(TRIAL_INDEX) 1.098e+01 11.000 4.669e+03 < 2e-16 ***
## s(Time):ExpExp2 5.751e+00 6.061 1.960e+02 < 2e-16 ***
## s(Time):ExpExp3 4.923e+00 5.263 1.106e+02 < 2e-16 ***
## s(Time):str_typefirstsec 6.328e-03 8.000 4.000e-03 < 2e-16 ***
## s(Time):str_typefirstthird 9.603e-03 8.000 7.000e-03 < 2e-16 ***
## s(Time):str_typesecfirst 1.168e-02 8.000 8.000e-03 < 2e-16 ***
## s(Time):str_typesecthird 1.346e-02 8.000 9.000e-03 < 2e-16 ***
## s(Time):str_typethirdfirst 1.756e-03 8.000 1.000e-03 1.77e-07 ***
## s(Time):str_typethirdsec 5.140e+00 8.000 1.432e+01 < 2e-16 ***
## s(Time,Subject) 6.717e+02 700.000 1.228e+05 < 2e-16 ***
## s(Time,Item) 9.020e+02 947.000 1.222e+05 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.173 Deviance explained = 16.4%
## fREML = 1.6485e+06 Scale est. = 1 n = 204806
In direct comparison, the more complex model is not preferred (the difference is miniscule).
compareML(m2a,m2b)
## m2a: TargetLooks ~ str_type + s(TRIAL_INDEX) + Exp + s(Time) + s(Time,
## by = str_type, m = 1) + s(Time, Subject, bs = "fs", m = 1) +
## s(Time, Item, bs = "fs", m = 1)
##
## m2b: TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time,
## by = Exp) + s(Time, by = str_type, m = 1) + s(Time, Subject,
## bs = "fs", m = 1) + s(Time, Item, bs = "fs", m = 1)
##
## Model m2a preferred: lower fREML score (3.063), and lower df (2.000).
## -----
## Model Score Edf Difference Df
## 1 m2b 1648539 23
## 2 m2a 1648536 21 3.063 -2.000
##
## AIC difference: 535.86, model m2b has lower AIC.
## Warning in compareML(m2a, m2b): Only small difference in fREML...
We then define an interaction factor, by expanding the experiment factor over stress conditions, so that we can fit a smooth for each combination. So we fit a more complex model by adding a smooth for each combination of experiment and stress condition over the smooths per experiment and the smooths per condition.
ncd$str_typeExp <- as.factor(paste(ncd$str_type,ncd$Exp,sep=""))
if (FALSE) {
m2c <- bam(TargetLooks ~ str_type + s(TRIAL_INDEX, k=12) + Exp
+s(Time, by=Exp)
+s(Time, by=str_type)
+s(Time, by=str_typeExp, m = 1)
+s(Time, Subject, bs = 'fs', m = 1)
+s(Time, Item, bs = 'fs', m = 1),
data = ncd, family = "binomial",
cluster = cl)
} else { load("model_m2c.rda") }
summary(m2c)
##
## Family: binomial
## Link function: logit
##
## Formula:
## TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time,
## by = Exp) + s(Time, by = str_type) + s(Time, by = str_typeExp,
## m = 1) + s(Time, Subject, bs = "fs", m = 1) + s(Time, Item,
## bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.95511 0.20058 4.762 1.92e-06 ***
## str_typefirstthird 0.32372 0.20617 1.570 0.116
## str_typesecfirst 0.01155 0.22147 0.052 0.958
## str_typesecthird 0.08134 0.22112 0.368 0.713
## str_typethirdfirst 0.19838 0.19700 1.007 0.314
## str_typethirdsec 0.15987 0.22105 0.723 0.470
## ExpExp3 -0.15869 0.20505 -0.774 0.439
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(TRIAL_INDEX) 1.098e+01 10.99985 4.669e+03 < 2e-16 ***
## s(Time):ExpExp2 5.444e+00 5.76083 9.320e+01 < 2e-16 ***
## s(Time):ExpExp3 4.392e+00 4.73413 3.627e+01 7.82e-07 ***
## s(Time):str_typefirstsec 1.615e-02 0.02065 2.000e-03 0.9663
## s(Time):str_typefirstthird 1.407e+00 1.49799 6.880e-01 0.4711
## s(Time):str_typesecfirst 5.356e+00 5.69562 1.721e+00 0.9462
## s(Time):str_typesecthird 5.109e+00 5.46254 7.503e+00 0.3274
## s(Time):str_typethirdfirst 1.003e+00 1.00432 1.620e+00 0.2041
## s(Time):str_typethirdsec 4.784e+00 5.14179 1.272e+01 0.0303 *
## s(Time):str_typeExpfirstsecExp2 5.926e-01 8.00000 6.470e-01 < 2e-16 ***
## s(Time):str_typeExpfirstsecExp3 3.519e+00 8.00000 6.463e+00 < 2e-16 ***
## s(Time):str_typeExpfirstthirdExp2 7.732e-03 8.00000 6.000e-03 1.16e-14 ***
## s(Time):str_typeExpfirstthirdExp3 6.432e-01 8.00000 6.930e-01 < 2e-16 ***
## s(Time):str_typeExpsecfirstExp2 5.283e-01 8.00000 5.660e-01 4.16e-13 ***
## s(Time):str_typeExpsecfirstExp3 1.599e-03 8.00000 1.000e-03 4.44e-05 ***
## s(Time):str_typeExpsecthirdExp2 2.793e-03 8.00000 0.000e+00 0.0166 *
## s(Time):str_typeExpsecthirdExp3 9.887e-03 8.00000 7.000e-03 4.51e-10 ***
## s(Time):str_typeExpthirdfirstExp2 2.636e-03 8.00000 1.000e-03 1.70e-08 ***
## s(Time):str_typeExpthirdfirstExp3 2.096e-02 8.00000 1.700e-02 < 2e-16 ***
## s(Time):str_typeExpthirdsecExp2 1.621e-02 8.00000 1.500e-02 1.77e-11 ***
## s(Time):str_typeExpthirdsecExp3 1.471e+00 8.00000 1.821e+00 < 2e-16 ***
## s(Time,Subject) 6.717e+02 700.00000 1.228e+05 < 2e-16 ***
## s(Time,Item) 8.876e+02 947.00000 1.182e+05 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 1841/1842
## R-sq.(adj) = 0.173 Deviance explained = 16.4%
## fREML = 1.6486e+06 Scale est. = 1 n = 204806
This model accounts for the exact same proportion of deviance as the preceding one. The additional curves are essentially flat. These are the model smooths:
par(mgp=c(1.5,0.35,0))
par(tcl=-0.2)
par(mai=c(0.5,0.5,0.1,0.1))
par(mai=c(0.35,0.35,0.1,0.1))
plot.gam(m2c,page=2) -> c2
Are these smooths justified, consistent with different effects of (some) stress conditions between experiments? If not, then the stress effects are the same across experiments.
compareML(m2c,m2a)
## m2c: TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time,
## by = Exp) + s(Time, by = str_type) + s(Time, by = str_typeExp,
## m = 1) + s(Time, Subject, bs = "fs", m = 1) + s(Time, Item,
## bs = "fs", m = 1)
##
## m2a: TargetLooks ~ str_type + s(TRIAL_INDEX) + Exp + s(Time) + s(Time,
## by = str_type, m = 1) + s(Time, Subject, bs = "fs", m = 1) +
## s(Time, Item, bs = "fs", m = 1)
##
## Model m2a preferred: lower fREML score (46.785), and lower df (20.000).
## -----
## Model Score Edf Difference Df
## 1 m2c 1648583 41
## 2 m2a 1648536 21 -46.785 20.000
##
## AIC difference: -575.90, model m2c has lower AIC.
compareML(m2c,m2b)
## m2c: TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time,
## by = Exp) + s(Time, by = str_type) + s(Time, by = str_typeExp,
## m = 1) + s(Time, Subject, bs = "fs", m = 1) + s(Time, Item,
## bs = "fs", m = 1)
##
## m2b: TargetLooks ~ str_type + s(TRIAL_INDEX, k = 12) + Exp + s(Time,
## by = Exp) + s(Time, by = str_type, m = 1) + s(Time, Subject,
## bs = "fs", m = 1) + s(Time, Item, bs = "fs", m = 1)
##
## Model m2b preferred: lower fREML score (43.722), and lower df (18.000).
## -----
## Model Score Edf Difference Df
## 1 m2c 1648583 41
## 2 m2b 1648539 23 -43.722 18.000
##
## AIC difference: -40.04, model m2c has lower AIC.
The least complex model has lower fREML with fewer degrees of freedom and is therefore preferred.
The following graph plots the two experiment-specific smooths from the latest (most complex) model, adjusted for intercept and fixed effects of experiment, along with the corresponding confidence intervals.
par(mfrow=c(1,1))
plot (NA,NA,type="n",xlim=c(0,1000),ylim=c(0,3),xlab="Time (ms)",ylab="Target vs. competitor looks (log odds)",las=1)
polygon(c(c2[[2]]$x,rev(c2[[2]]$x)),c(c2[[2]]$fit[,1]+m2c$coefficients[1]-c2[[2]]$se,rev(c2[[2]]$fit[,1]+m2c$coefficients[1]+c2[[2]]$se)),col=rgb(0.5,0.5,1,0.5),border=NA)
polygon(c(c2[[3]]$x,rev(c2[[3]]$x)),c(c2[[3]]$fit[,1]+m2c$coefficients[1]+m2c$coefficients[7]-c2[[3]]$se,rev(c2[[3]]$fit[,1]+m2c$coefficients[1]+m2c$coefficients[7]+c2[[3]]$se)),col=rgb(0.5,1,0.5,0.5),border=NA)
lines(c2[[2]]$x,c2[[2]]$fit[,1]+m2c$coefficients[1],col="blue",lwd=3)
lines(c2[[3]]$x,c2[[3]]$fit[,1]+m2c$coefficients[1]+m2c$coefficients[7],col="darkgreen",lwd=3)
text( 970,3,"Experiment 2",col="blue",pos=2,cex=1.25)
text(1020,1.25,"Experiment 3",col="darkgreen",pos=2,cex=1.25)
The difference between these two smooths becomes statistically significant after 818 ms.
plot_diff(m2c,view="Time",comp=list(Exp=c("Exp2","Exp3")),rm.ranef=T,main="Experiment 2 minus Experiment 3",ylab="Difference in target looks (logits)",ylim=c(-1,1.5),hide.label=T,print.summary=F)
sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] itsadug_2.3 plotfunctions_1.3 mgcv_1.8-31 nlme_3.1-144
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.3 lattice_0.20-40 digest_0.6.25 grid_3.6.2
## [5] magrittr_1.5 evaluate_0.14 rlang_0.4.2 stringi_1.4.6
## [9] Matrix_1.2-18 rmarkdown_2.1 splines_3.6.2 tools_3.6.2
## [13] stringr_1.4.0 xfun_0.12 yaml_2.2.1 compiler_3.6.2
## [17] htmltools_0.4.0 knitr_1.28