Background information

Paper: Yan, Zhou, Shu, Yusupu, Miao, Kruegel, & Kliegl, R. (2014). Eye Movements Guided by Morphological Structure: Evidence from the Uighur Language. Cognition, 132 (2), 181-215.

Script is based on uighur_package.v2.R, available on Potsdam Mind Research Repository (PMR2), also a preprint of the paper; see script for details about computation of various derived variables (e.g., centering for LMMs, grouping for figures). Script was adapted to new lme4 (1.1-7) and R 3.0.3; e.g., use only REML=FALSE in lmer().

doi: http://dx.doi.org/10.1016/j.cognition.2014.03.008
URL: http://www.sciencedirect.com/science/article/pii/S001002771400047X
PMR2: https://read.psych.uni-potsdam.de/Joomla/administrator/index.php?option=com_content&sectionid=3&task=edit&cid[]=132 

VARIABLES

- subj:     subject identifier
- sent:     sentence identifier (Exp 1)
- word:     word identifier (Exp 1)
- item:     item (Exp 2)
- wn:       word number in a sentence
- nw:       number of words in a sentence
- flp:      first-fixation landing position
- ffd:      first-fixation duration
- sfd:      second-fixation duration
- gd:       gaze duration
- ifre:     if a word fixated 1 or not 0
- ifskip:   if a word skipped 1 or not 0
- wl:       word length in letters
- ls:       launch site
- wf:       log10 word frequency
- rl:       the number of root letters/root length
- sn:       the number of suffixes
- suftype1: type of 1st suffix: 0 no suffix, 1 derivational suffix, 2 inflectional suffix
- suftype2: type of 2nd suffix

Experiment 1: corpus analyses

LMMs for fixation landing position (flp)

Estimate main variance compenents and correlation parameters for subjects, intercepts for sentences, and words

## Baseline LMM
print(summary(m1a <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + 
                                (1+wl.c+ls.c+sn.c+wf.c+wl.c*sn.c|subj) + (1|sent) + (1|word), 
                          data=uighur1, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c * ls.c * sn.c * wf.c + (1 + wl.c + ls.c + sn.c + wf.c +  
##     wl.c * sn.c | subj) + (1 | sent) + (1 | word)
##    Data: uighur1
## 
##      AIC      BIC   logLik deviance df.resid 
##    50222    50522   -25071    50142    13483 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.073 -0.613  0.007  0.611  6.232 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr                         
##  word     (Intercept) 0.42780  0.6541                                
##  sent     (Intercept) 0.02851  0.1688                                
##  subj     (Intercept) 0.46182  0.6796                                
##           wl.c        0.21818  0.4671    0.97                        
##           ls.c        0.01137  0.1066   -0.28 -0.39                  
##           sn.c        0.00650  0.0806   -0.68 -0.54  0.27            
##           wf.c        0.00132  0.0363   -0.18 -0.13  0.53  0.68      
##           wl.c:sn.c   0.02152  0.1467   -0.06 -0.02  0.39  0.56  0.97
##  Residual             2.14652  1.4651                                
## Number of obs: 13523, groups:  word, 679 sent, 120 subj, 48
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          3.66628    0.10507    34.9
## wl.c                 1.58345    0.10265    15.4
## ls.c                -0.43173    0.01761   -24.5
## sn.c                -0.10186    0.04138    -2.5
## wf.c                 0.06231    0.04606     1.4
## wl.c:ls.c           -0.18383    0.02004    -9.2
## wl.c:sn.c            0.09385    0.05481     1.7
## ls.c:sn.c            0.00379    0.01044     0.4
## wl.c:wf.c            0.00545    0.09238     0.1
## ls.c:wf.c           -0.00697    0.01187    -0.6
## sn.c:wf.c            0.01371    0.05650     0.2
## wl.c:ls.c:sn.c       0.02300    0.01451     1.6
## wl.c:ls.c:wf.c      -0.00696    0.02510    -0.3
## wl.c:sn.c:wf.c      -0.05715    0.07002    -0.8
## ls.c:sn.c:wf.c       0.01696    0.01477     1.1
## wl.c:ls.c:sn.c:wf.c  0.00478    0.01925     0.2
## Expanded LMM (due to reviewer request)
print(summary(m1b <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c * bigram.c + 
                                (1+wl.c+ls.c+sn.c+wf.c+wl.c*sn.c|subj) + (1|sent) + (1|word), 
                          data=uighur1, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c * ls.c * sn.c * wf.c * bigram.c + (1 + wl.c + ls.c +  
##     sn.c + wf.c + wl.c * sn.c | subj) + (1 | sent) + (1 | word)
##    Data: uighur1
## 
##      AIC      BIC   logLik deviance df.resid 
##    50241    50662   -25065    50129    13467 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.086 -0.613  0.002  0.609  6.255 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr                         
##  word     (Intercept) 0.42607  0.6527                                
##  sent     (Intercept) 0.02806  0.1675                                
##  subj     (Intercept) 0.46079  0.6788                                
##           wl.c        0.21116  0.4595    1.00                        
##           ls.c        0.01134  0.1065   -0.30 -0.30                  
##           sn.c        0.01068  0.1033   -0.54 -0.54  0.03            
##           wf.c        0.00126  0.0355   -0.17 -0.17  0.69  0.20      
##           wl.c:sn.c   0.02033  0.1426   -0.05 -0.05  0.51  0.35  0.95
##  Residual             2.14565  1.4648                                
## Number of obs: 13523, groups:  word, 679 sent, 120 subj, 48
## 
## Fixed effects:
##                              Estimate Std. Error t value
## (Intercept)                   3.66165    0.10510    34.8
## wl.c                          1.65194    0.10501    15.7
## ls.c                         -0.43162    0.01768   -24.4
## sn.c                         -0.10052    0.04359    -2.3
## wf.c                          0.06694    0.04809     1.4
## bigram.c                      0.04150    0.07860     0.5
## wl.c:ls.c                    -0.20181    0.02098    -9.6
## wl.c:sn.c                     0.11962    0.05873     2.0
## ls.c:sn.c                     0.01240    0.01101     1.1
## wl.c:wf.c                     0.01657    0.09790     0.2
## ls.c:wf.c                    -0.00980    0.01242    -0.8
## sn.c:wf.c                     0.00502    0.05945     0.1
## wl.c:bigram.c                -0.13908    0.20275    -0.7
## ls.c:bigram.c                 0.00900    0.01950     0.5
## sn.c:bigram.c                 0.00831    0.09704     0.1
## wf.c:bigram.c                -0.03923    0.09184    -0.4
## wl.c:ls.c:sn.c                0.02623    0.01518     1.7
## wl.c:ls.c:wf.c               -0.00885    0.02661    -0.3
## wl.c:sn.c:wf.c               -0.01658    0.07307    -0.2
## ls.c:sn.c:wf.c                0.02390    0.01560     1.5
## wl.c:ls.c:bigram.c            0.00719    0.05308     0.1
## wl.c:sn.c:bigram.c           -0.35344    0.14584    -2.4
## ls.c:sn.c:bigram.c           -0.04182    0.02616    -1.6
## wl.c:wf.c:bigram.c           -0.19257    0.19080    -1.0
## ls.c:wf.c:bigram.c            0.01921    0.02509     0.8
## sn.c:wf.c:bigram.c           -0.00257    0.09988     0.0
## wl.c:ls.c:sn.c:wf.c           0.00315    0.01997     0.2
## wl.c:ls.c:sn.c:bigram.c       0.00638    0.03955     0.2
## wl.c:ls.c:wf.c:bigram.c       0.08901    0.05274     1.7
## wl.c:sn.c:wf.c:bigram.c       0.01819    0.17405     0.1
## ls.c:sn.c:wf.c:bigram.c      -0.04191    0.02711    -1.5
## wl.c:ls.c:sn.c:wf.c:bigram.c -0.04870    0.04756    -1.0
anova(m1a, m1b)
## Data: uighur1
## Models:
## m1a: flp ~ wl.c * ls.c * sn.c * wf.c + (1 + wl.c + ls.c + sn.c + wf.c + 
## m1a:     wl.c * sn.c | subj) + (1 | sent) + (1 | word)
## m1b: flp ~ wl.c * ls.c * sn.c * wf.c * bigram.c + (1 + wl.c + ls.c + 
## m1b:     sn.c + wf.c + wl.c * sn.c | subj) + (1 | sent) + (1 | word)
##     Df   AIC   BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1a 40 50222 50522 -25071    50142                        
## m1b 56 50241 50662 -25065    50129  12.7     16       0.69

Results m1a / m1b

  1. Model m1b probably not optimal:
  1. does not converge correctly w/ lme4 (1.1-7),

  2. no effects associated w/ word frequency (wf),

  3. smallest variance component for wf,

  4. model is degenerate; see:

VarCorr(m1b)
##  Groups   Name        Std.Dev. Corr                         
##  word     (Intercept) 0.6527                                
##  sent     (Intercept) 0.1675                                
##  subj     (Intercept) 0.6788                                
##           wl.c        0.4595    1.00                        
##           ls.c        0.1065   -0.30 -0.30                  
##           sn.c        0.1033   -0.54 -0.54  0.03            
##           wf.c        0.0355   -0.17 -0.17  0.69  0.20      
##           wl.c:sn.c   0.1426   -0.05 -0.05  0.51  0.35  0.95
##  Residual             1.4648
chf <- matrix(0, nrow=6, ncol=6)
(chf[lower.tri(chf, diag=TRUE)] <-  getME(m1b, "theta")[3:23])
##           subj.(Intercept)      subj.wl.c.(Intercept)      subj.ls.c.(Intercept) 
##                   0.463416                   0.313706                  -0.021458 
##      subj.sn.c.(Intercept)      subj.wf.c.(Intercept) subj.wl.c:sn.c.(Intercept) 
##                  -0.038017                  -0.004011                  -0.004986 
##                  subj.wl.c             subj.ls.c.wl.c             subj.sn.c.wl.c 
##                   0.000000                   0.066041                  -0.019519 
##             subj.wf.c.wl.c        subj.wl.c:sn.c.wl.c                  subj.ls.c 
##                   0.018175                   0.055365                   0.021542 
##             subj.sn.c.ls.c             subj.wf.c.ls.c        subj.wl.c:sn.c.ls.c 
##                   0.028180                  -0.003214                  -0.008282 
##                  subj.sn.c             subj.wf.c.sn.c        subj.wl.c:sn.c.sn.c 
##                   0.048531                   0.013059                   0.072556 
##                  subj.wf.c        subj.wl.c:sn.c.wf.c             subj.wl.c:sn.c 
##                   0.007674                   0.032455                   0.000000
sv <- svd(chf)
round(sv$d^2/sum(sv$d^2)*100)
## [1] 94  4  1  0  0  0
  1. Leave out word frequency
print(summary(m1c <- lmer(flp ~ wl.c * ls.c * sn.c * bigram.c + 
                                (1+wl.c+ls.c+sn.c+wl.c:sn.c|subj) + (1|sent) + (1|word), 
                          data=uighur1, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c * ls.c * sn.c * bigram.c + (1 + wl.c + ls.c + sn.c +  
##     wl.c:sn.c | subj) + (1 | sent) + (1 | word)
##    Data: uighur1
## 
##      AIC      BIC   logLik deviance df.resid 
##    50208    50463   -25070    50140    13489 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.072 -0.611  0.006  0.613  6.317 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr                   
##  word     (Intercept) 0.42733  0.6537                          
##  sent     (Intercept) 0.02908  0.1705                          
##  subj     (Intercept) 0.46174  0.6795                          
##           wl.c        0.21717  0.4660    0.97                  
##           ls.c        0.01145  0.1070   -0.28 -0.41            
##           sn.c        0.00613  0.0783   -0.68 -0.56  0.24      
##           wl.c:sn.c   0.02199  0.1483   -0.05 -0.06  0.41  0.53
##  Residual             2.14643  1.4651                          
## Number of obs: 13523, groups:  word, 679 sent, 120 subj, 48
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)              3.65433    0.10486    34.9
## wl.c                     1.61493    0.09879    16.3
## ls.c                    -0.43306    0.01759   -24.6
## sn.c                    -0.10366    0.04105    -2.5
## bigram.c                 0.06107    0.06792     0.9
## wl.c:ls.c               -0.18228    0.01873    -9.7
## wl.c:sn.c                0.11885    0.05232     2.3
## ls.c:sn.c                0.00585    0.01036     0.6
## wl.c:bigram.c           -0.10743    0.16794    -0.6
## ls.c:bigram.c            0.00346    0.01769     0.2
## sn.c:bigram.c            0.00584    0.08637     0.1
## wl.c:ls.c:sn.c           0.01860    0.01347     1.4
## wl.c:ls.c:bigram.c      -0.00663    0.04621    -0.1
## wl.c:sn.c:bigram.c      -0.30703    0.12451    -2.5
## ls.c:sn.c:bigram.c      -0.01659    0.02306    -0.7
## wl.c:ls.c:sn.c:bigram.c -0.00486    0.03549    -0.1
anova(m1a, m1b, m1c)
## Data: uighur1
## Models:
## m1c: flp ~ wl.c * ls.c * sn.c * bigram.c + (1 + wl.c + ls.c + sn.c + 
## m1c:     wl.c:sn.c | subj) + (1 | sent) + (1 | word)
## m1a: flp ~ wl.c * ls.c * sn.c * wf.c + (1 + wl.c + ls.c + sn.c + wf.c + 
## m1a:     wl.c * sn.c | subj) + (1 | sent) + (1 | word)
## m1b: flp ~ wl.c * ls.c * sn.c * wf.c * bigram.c + (1 + wl.c + ls.c + 
## m1b:     sn.c + wf.c + wl.c * sn.c | subj) + (1 | sent) + (1 | word)
##     Df   AIC   BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1c 34 50208 50463 -25070    50140                        
## m1a 40 50222 50522 -25071    50142   0.0      6       1.00
## m1b 56 50241 50662 -25065    50129  12.7     16       0.69

Results m1c

  1. no warnings about convergence,

  2. no loss in goodness in fit,

  3. vc’s are significant in drop1-LRTs, but model is still degenerate (at most three vc’s are supported by data); not sure what do to. See:

VarCorr(m1c)
##  Groups   Name        Std.Dev. Corr                   
##  word     (Intercept) 0.6537                          
##  sent     (Intercept) 0.1705                          
##  subj     (Intercept) 0.6795                          
##           wl.c        0.4660    0.97                  
##           ls.c        0.1070   -0.28 -0.41            
##           sn.c        0.0783   -0.68 -0.56  0.24      
##           wl.c:sn.c   0.1483   -0.05 -0.06  0.41  0.53
##  Residual             1.4651
chf <- matrix(0, nrow=5, ncol=5)
(chf[lower.tri(chf, diag=TRUE)] <-  getME(m1c, "theta")[3:17])
##           subj.(Intercept)      subj.wl.c.(Intercept)      subj.ls.c.(Intercept) 
##                  4.638e-01                  3.100e-01                 -2.042e-02 
##      subj.sn.c.(Intercept) subj.wl.c:sn.c.(Intercept)                  subj.wl.c 
##                 -3.653e-02                 -5.474e-03                  7.130e-02 
##             subj.ls.c.wl.c             subj.sn.c.wl.c        subj.wl.c:sn.c.wl.c 
##                 -4.395e-02                  2.540e-02                 -3.268e-03 
##                  subj.ls.c             subj.sn.c.ls.c        subj.wl.c:sn.c.ls.c 
##                  5.465e-02                  2.412e-02                  5.064e-02 
##                  subj.sn.c        subj.wl.c:sn.c.sn.c             subj.wl.c:sn.c 
##                  1.722e-02                  8.742e-02                  4.790e-08
sv <- svd(chf)
round(sv$d^2/sum(sv$d^2)*100)
## [1] 94  4  2  0  0
  1. Leave out word frequency and bigram frequency
print(summary(m1d <- lmer(flp ~ wl.c * ls.c * sn.c + 
                                        (1+wl.c+ls.c+sn.c+wl.c:sn.c|subj) + (1|sent) + (1|word), 
                                        data=uighur1, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c * ls.c * sn.c + (1 + wl.c + ls.c + sn.c + wl.c:sn.c |  
##     subj) + (1 | sent) + (1 | word)
##    Data: uighur1
## 
##      AIC      BIC   logLik deviance df.resid 
##    50201    50396   -25074    50149    13497 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.070 -0.612  0.006  0.611  6.279 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr                   
##  word     (Intercept) 0.43115  0.6566                          
##  sent     (Intercept) 0.02907  0.1705                          
##  subj     (Intercept) 0.46247  0.6801                          
##           wl.c        0.21867  0.4676    0.97                  
##           ls.c        0.01145  0.1070   -0.28 -0.40            
##           sn.c        0.00615  0.0784   -0.69 -0.56  0.25      
##           wl.c:sn.c   0.02277  0.1509   -0.05 -0.05  0.41  0.53
##  Residual             2.14705  1.4653                          
## Number of obs: 13523, groups:  word, 679 sent, 120 subj, 48
## 
## Fixed effects:
##                Estimate Std. Error t value
## (Intercept)     3.65887    0.10494    34.9
## wl.c            1.58721    0.09838    16.1
## ls.c           -0.43309    0.01758   -24.6
## sn.c           -0.10597    0.04097    -2.6
## wl.c:ls.c      -0.18188    0.01851    -9.8
## wl.c:sn.c       0.09014    0.05092     1.8
## ls.c:sn.c       0.00481    0.01027     0.5
## wl.c:ls.c:sn.c  0.01671    0.01320     1.3
anova(m1a, m1d)  # No decrease in goodness of fit
## Data: uighur1
## Models:
## m1d: flp ~ wl.c * ls.c * sn.c + (1 + wl.c + ls.c + sn.c + wl.c:sn.c | 
## m1d:     subj) + (1 | sent) + (1 | word)
## m1a: flp ~ wl.c * ls.c * sn.c * wf.c + (1 + wl.c + ls.c + sn.c + wf.c + 
## m1a:     wl.c * sn.c | subj) + (1 | sent) + (1 | word)
##     Df   AIC   BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1d 26 50201 50396 -25074    50149                        
## m1a 40 50222 50522 -25071    50142  6.82     14       0.94

Results m1d

  1. no warnings about convergence,

  2. no loss in goodness in fit (relative to model m1a),

  3. vc’s are significant in drop1-LRTs, but model is still degenerate (at most three vc’s are supported by data); still not sure what do to. See:

VarCorr(m1d)
##  Groups   Name        Std.Dev. Corr                   
##  word     (Intercept) 0.6566                          
##  sent     (Intercept) 0.1705                          
##  subj     (Intercept) 0.6801                          
##           wl.c        0.4676    0.97                  
##           ls.c        0.1070   -0.28 -0.40            
##           sn.c        0.0784   -0.69 -0.56  0.25      
##           wl.c:sn.c   0.1509   -0.05 -0.05  0.41  0.53
##  Residual             1.4653
chf <- matrix(0, nrow=5, ncol=5)
(chf[lower.tri(chf, diag=TRUE)] <-  getME(m1d, "theta")[3:17])
##           subj.(Intercept)      subj.wl.c.(Intercept)      subj.ls.c.(Intercept) 
##                  4.641e-01                  3.111e-01                 -2.032e-02 
##      subj.sn.c.(Intercept) subj.wl.c:sn.c.(Intercept)                  subj.wl.c 
##                 -3.674e-02                 -5.513e-03                  7.119e-02 
##             subj.ls.c.wl.c             subj.sn.c.wl.c        subj.wl.c:sn.c.wl.c 
##                 -4.371e-02                  2.539e-02                 -1.081e-03 
##                  subj.ls.c             subj.sn.c.ls.c        subj.wl.c:sn.c.ls.c 
##                  5.484e-02                  2.419e-02                  5.306e-02 
##                  subj.sn.c        subj.wl.c:sn.c.sn.c             subj.wl.c:sn.c 
##                  1.687e-02                  8.809e-02                  5.738e-05
sv <- svd(chf)
round(sv$d^2/sum(sv$d^2)*100)
## [1] 94  4  2  0  0

We keep model m1d as optimal model for now. (Note: For figures in publication model m1a was used because of theoretical motivation of variance components. Probably not a good idea.)

Compute partial FLP effects

Main effects

# WL
uighur1$DV.wl <- remef(m1d, keep=TRUE, grouping=TRUE, fix=c(1, "wl.c"), ran=NULL)
ddply(uighur1, .(round(log2(wl))), summarise, M=mean(flp), SD=sd(flp), SE=sd(flp)/sqrt(length(flp)), N=length(flp))
##   round(log2(wl))     M     SD      SE    N
## 1               1 1.040 0.6391 0.10507   37
## 2               2 2.613 1.3643 0.02698 2558
## 3               3 3.814 1.9818 0.02036 9475
## 4               4 4.794 2.5016 0.06563 1453
ddply(uighur1, .(round(log2(wl))), summarise, M=mean(DV.wl), SD=sd(DV.wl), SE=sd(DV.wl)/sqrt(length(DV.wl)), N=length(DV.wl))
##   round(log2(wl))      M     SD      SE    N
## 1               1 0.7564 0.5617 0.09235   37
## 2               2 2.4693 1.1838 0.02341 2558
## 3               3 3.8014 1.5102 0.01552 9475
## 4               4 4.8975 1.8597 0.04879 1453
# LS
uighur1$DV.ls <- remef(m1d, keep=TRUE, grouping=TRUE, fix=c(1, "ls.c"), ran=NULL)

# SN -- shows increase in observed means, decrease in estimated means
uighur1$DV.sn <- remef(m1d, keep=TRUE, grouping=TRUE, fix=c(1, "sn.c"), ran=NULL)
ddply(uighur1, .(sn), summarise, M_obs=mean(flp), M_est=mean(DV.sn))
##   sn M_obs M_est
## 1  0 3.135 3.780
## 2  1 3.597 3.674
## 3  2 4.108 3.557
## 4  3 4.683 3.481
## 5  4 5.168 3.342
## 6  5 5.777 3.263

Significant interactions

# WL x LS
uighur1$DV.wl_ls <- remef(m1d, keep=TRUE, grouping=TRUE, fix=c(1, "wl.c", "ls.c", "wl.c:ls.c"), ran=NULL)

# WL x SN
uighur1$DV.wl_sn <- remef(m1d, keep=TRUE, grouping=TRUE, fix=c(1, "wl.c", "sn.c", "wl.c:sn.c"), ran=NULL)

Visualization of FLP effects and interaction

Figure 2: WL x LS plot

ybreaks = seq(1, 7, 2)

ddply(uighur1, .(wl.f4, ls.f), summarise, Obs = mean(flp), Est = mean(DV.wl_ls), N=length(flp) )
##           wl.f4 ls.f   Obs   Est   N
## 1   short (2-5)    1 2.980 3.224 168
## 2   short (2-5)    2 3.235 3.237 401
## 3   short (2-5)    3 2.963 2.912 473
## 4   short (2-5)    4 2.636 2.536 469
## 5   short (2-5)    5 2.337 2.186 389
## 6   short (2-5)    6 2.131 1.824 309
## 7   short (2-5)    7 1.984 1.643 151
## 8   short (2-5)    8 1.821 1.352 108
## 9   short (2-5)    9 1.806 1.278  94
## 10  short (2-5)   10 1.655 1.026  33
## 11 medium (6-7)    1 3.965 4.269 212
## 12 medium (6-7)    2 4.010 4.211 578
## 13 medium (6-7)    3 3.848 3.912 648
## 14 medium (6-7)    4 3.312 3.383 647
## 15 medium (6-7)    5 2.894 2.910 561
## 16 medium (6-7)    6 2.713 2.595 418
## 17 medium (6-7)    7 2.388 2.196 257
## 18 medium (6-7)    8 2.196 1.852 177
## 19 medium (6-7)    9 2.124 1.773 105
## 20 medium (6-7)   10 1.868 1.509  41
## 21 medium (8-9)    1 4.765 5.073 205
## 22 medium (8-9)    2 4.991 5.135 526
## 23 medium (8-9)    3 4.547 4.599 686
## 24 medium (8-9)    4 4.070 4.098 679
## 25 medium (8-9)    5 3.647 3.647 576
## 26 medium (8-9)    6 3.162 3.074 328
## 27 medium (8-9)    7 2.796 2.630 260
## 28 medium (8-9)    8 2.322 2.229 159
## 29 medium (8-9)    9 2.285 1.903  90
## 30 medium (8-9)   10 1.453 1.097  29
## 31 long (10-17)    1 5.742 6.021 185
## 32 long (10-17)    2 5.738 5.860 572
## 33 long (10-17)    3 5.382 5.360 701
## 34 long (10-17)    4 4.853 4.871 681
## 35 long (10-17)    5 4.187 4.143 579
## 36 long (10-17)    6 3.730 3.591 428
## 37 long (10-17)    7 3.265 3.099 258
## 38 long (10-17)    8 2.908 2.679 196
## 39 long (10-17)    9 3.008 2.447 105
## 40 long (10-17)   10 2.815 2.105  41
p1 <- qplot(data=uighur1, x = ls, y = DV.wl_ls, xlab="Launch site", ylab="Fixation landing position", 
        group=wl.f4, linetype=wl.f4, shape=wl.f4, geom="smooth", method="lm") + 
        geom_smooth(method=lm,color="black") +
        scale_colour_manual(name = "Word length", values = c(1,2,3,4)) + 
        scale_linetype_manual(name = "Word length", values = c(1,2,3,4)) + 
        scale_shape_manual(name = "Word length", values = c(1,2,3,4)) + 
        stat_summary (fun.y = mean, geom="point", size=2, mapping = aes (y=flp, x=ls.f, group=wl.f4) ) +
        coord_cartesian(ylim=c(0, 7)) + theme_bw() + 
        theme(legend.justification= c(1,1), legend.position=c(1,1) )

p1

plot of chunk fig2

Figure 3: SN plot

ddply(uighur1, .(sn), summarize, N=length(flp))     # not many fixations with sn > 3
##   sn    N
## 1  0 4088
## 2  1 4987
## 3  2 3201
## 4  3 1085
## 5  4  147
## 6  5   15
ddply(uighur1, .(sn.f4), summarize, N=length(flp))  # collaps sn 3 to 5
##   sn.f4    N
## 1     0 4088
## 2     1 4987
## 3     2 3201
## 4   3-5 1247
# ... aggregate to subject x sn table
sn_obs <- ddply(uighur1, .(subj, sn.f4), summarize, M=mean(flp))
sn_GM <- mean(sn_obs$M)

# ... remove between-subject variance, add GM back to data
sn_obs2 <- ddply(sn_obs, .(subj), transform, M.w = M - mean(M) + sn_GM)  

# ... compute M and within-SEs after removal of between-subject variance
sn_obs3 <- ddply(sn_obs2, .(sn.f4), summarize, M = mean(M), SE=sd(M.w)/sqrt(length(M.w)), N=length(M.w))  

# ... LMM estimates
sn_est <- ddply(uighur1, .(sn.f4), summarize, M=mean(DV.sn), SE=sd(DV.sn)/sqrt(length(DV.sn)), N=length(DV.sn))

# ... combine obs and est
sn_Means <- rbind(sn_obs3, sn_est)
sn_Means$Type <- factor(c(rep("observed", 4), rep("estimated", 4)), levels=c("observed", "estimated" ))

# ... plot
p2 <- qplot(data=sn_Means, x=sn.f4, y=M, xlab= "Number of suffixes", ylab="Fixation landing position",
        group=Type, linetype=Type, shape=Type, geom=c("point")) +
        geom_smooth(method=lm,color="black", se=FALSE) +
        geom_errorbar(aes(ymax=M+2*SE, ymin=M-2*SE), width=0.1, linetype=1) + 
        coord_cartesian(ylim=c(2, 5)) + theme_bw() + 
        theme(legend.title=element_blank(), legend.justification= c(0,1), legend.position=c(0,1))
p2

plot of chunk fig3

Experiment 2: target words: WL(6:9)x SN(0,2)

LMMs for fixation landing position (flp)

Estimate main variance compenents and correlation parameters for subjects, intercepts for items

## Baseline LMM
print(summary(m2a <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1+wl.c+ls.c+sn.c+wf.c|subj) + (1|item), 
                           data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c * ls.c * sn.c * wf.c + (1 + wl.c + ls.c + sn.c + wf.c |  
##     subj) + (1 | item)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6667     6851    -3300     6601     1920 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.597 -0.600 -0.003  0.596  3.951 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr                   
##  item     (Intercept) 0.32784  0.5726                          
##  subj     (Intercept) 0.27541  0.5248                          
##           wl.c        0.01489  0.1220    0.47                  
##           ls.c        0.01130  0.1063    0.25  0.00            
##           sn.c        0.00412  0.0642   -0.38 -0.42 -0.80      
##           wf.c        0.04380  0.2093    0.41  0.24 -0.49  0.58
##  Residual             1.49223  1.2216                          
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          3.20179    0.12112   26.44
## wl.c                 0.19531    0.06850    2.85
## ls.c                -0.40436    0.02834  -14.27
## sn.c                -0.05445    0.03457   -1.58
## wf.c                 0.26820    0.08183    3.28
## wl.c:ls.c           -0.00348    0.01809   -0.19
## wl.c:sn.c            0.03767    0.02895    1.30
## ls.c:sn.c            0.02002    0.01956    1.02
## wl.c:wf.c            0.02359    0.06554    0.36
## ls.c:wf.c            0.07568    0.03505    2.16
## sn.c:wf.c            0.11409    0.06755    1.69
## wl.c:ls.c:sn.c       0.00176    0.01740    0.10
## wl.c:ls.c:wf.c       0.05007    0.02927    1.71
## wl.c:sn.c:wf.c       0.08889    0.05468    1.63
## ls.c:sn.c:wf.c       0.07389    0.03446    2.14
## wl.c:ls.c:sn.c:wf.c  0.06272    0.02912    2.15
VarCorr(m2a)
##  Groups   Name        Std.Dev. Corr                   
##  item     (Intercept) 0.5726                          
##  subj     (Intercept) 0.5248                          
##           wl.c        0.1220    0.47                  
##           ls.c        0.1063    0.25  0.00            
##           sn.c        0.0642   -0.38 -0.42 -0.80      
##           wf.c        0.2093    0.41  0.24 -0.49  0.58
##  Residual             1.2216
chf <- matrix(0, nrow=5, ncol=5)
(chf[lower.tri(chf, diag=TRUE)] <-  getME(m2a, "theta")[2:16])
##      subj.(Intercept) subj.wl.c.(Intercept) subj.ls.c.(Intercept) subj.sn.c.(Intercept) 
##               0.42961               0.04701               0.02212              -0.02012 
## subj.wf.c.(Intercept)             subj.wl.c        subj.ls.c.wl.c        subj.sn.c.wl.c 
##               0.06968               0.08813              -0.01184              -0.01457 
##        subj.wf.c.wl.c             subj.ls.c        subj.sn.c.ls.c        subj.wf.c.ls.c 
##               0.01038               0.08331              -0.04062              -0.10418 
##             subj.sn.c        subj.wf.c.sn.c             subj.wf.c 
##               0.02221               0.11635               0.00000
sv <- svd(chf)
round(sv$d^2/sum(sv$d^2)*100)
## [1] 82 13  3  1  0
# Model is degenerate

print(summary(m2b    <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|subj) + (1|item) +
                                   (0+wl.c|subj) + (0+ls.c|subj) + (0+sn.c|subj) + (0+wf.c|subj), 
                             data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 +  
##     wl.c | subj) + (0 + ls.c | subj) + (0 + sn.c | subj) + (0 +      wf.c | subj)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6656     6785    -3305     6610     1930 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.396 -0.602  0.001  0.603  3.893 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     (Intercept) 0.3248   0.570   
##  subj     wf.c        0.0400   0.200   
##  subj.1   sn.c        0.0000   0.000   
##  subj.2   ls.c        0.0112   0.106   
##  subj.3   wl.c        0.0123   0.111   
##  subj.4   (Intercept) 0.2555   0.505   
##  Residual             1.4991   1.224   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          3.19867    0.11820   27.06
## wl.c                 0.19302    0.06764    2.85
## ls.c                -0.40713    0.02831  -14.38
## sn.c                -0.05132    0.03250   -1.58
## wf.c                 0.26546    0.08142    3.26
## wl.c:ls.c           -0.00521    0.01811   -0.29
## wl.c:sn.c            0.03749    0.02898    1.29
## ls.c:sn.c            0.01962    0.01956    1.00
## wl.c:wf.c            0.02028    0.06563    0.31
## ls.c:wf.c            0.07293    0.03532    2.06
## sn.c:wf.c            0.10886    0.06766    1.61
## wl.c:ls.c:sn.c       0.00142    0.01746    0.08
## wl.c:ls.c:wf.c       0.04427    0.02936    1.51
## wl.c:sn.c:wf.c       0.08667    0.05484    1.58
## ls.c:sn.c:wf.c       0.07161    0.03469    2.06
## wl.c:ls.c:sn.c:wf.c  0.05748    0.02913    1.97
anova(m2b, m2a)
## Data: uighur2
## Models:
## m2b: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 + 
## m2b:     wl.c | subj) + (0 + ls.c | subj) + (0 + sn.c | subj) + (0 + 
## m2b:     wf.c | subj)
## m2a: flp ~ wl.c * ls.c * sn.c * wf.c + (1 + wl.c + ls.c + sn.c + wf.c | 
## m2a:     subj) + (1 | item)
##     Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2b 23 6656 6785  -3305     6610                        
## m2a 33 6667 6851  -3301     6601  9.43     10       0.49
# Ensemble of correlation parameters is not significant

# ... drop1 VC at a time
print(summary(m2b.wl    <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|subj) + (1|item) +
                                      (0+ls.c|subj) + (0+sn.c|subj) + (0+wf.c|subj), 
                                data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 +  
##     ls.c | subj) + (0 + sn.c | subj) + (0 + wf.c | subj)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6658     6781    -3307     6614     1931 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.350 -0.605  0.005  0.602  3.890 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     (Intercept) 0.3236   0.569   
##  subj     wf.c        0.0417   0.204   
##  subj.1   sn.c        0.0000   0.000   
##  subj.2   ls.c        0.0109   0.104   
##  subj.3   (Intercept) 0.2509   0.501   
##  Residual             1.5135   1.230   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          3.19802    0.11750   27.22
## wl.c                 0.19144    0.06440    2.97
## ls.c                -0.40776    0.02808  -14.52
## sn.c                -0.05391    0.03239   -1.66
## wf.c                 0.26044    0.08188    3.18
## wl.c:ls.c           -0.00422    0.01792   -0.24
## wl.c:sn.c            0.03759    0.02906    1.29
## ls.c:sn.c            0.02029    0.01955    1.04
## wl.c:wf.c            0.01848    0.06577    0.28
## ls.c:wf.c            0.07079    0.03536    2.00
## sn.c:wf.c            0.10654    0.06785    1.57
## wl.c:ls.c:sn.c       0.00198    0.01746    0.11
## wl.c:ls.c:wf.c       0.04191    0.02934    1.43
## wl.c:sn.c:wf.c       0.08641    0.05501    1.57
## ls.c:sn.c:wf.c       0.07265    0.03479    2.09
## wl.c:ls.c:sn.c:wf.c  0.05781    0.02920    1.98
anova(m2b.wl, m2b)  # p = .062
## Data: uighur2
## Models:
## m2b.wl: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 + 
## m2b.wl:     ls.c | subj) + (0 + sn.c | subj) + (0 + wf.c | subj)
## m2b: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 + 
## m2b:     wl.c | subj) + (0 + ls.c | subj) + (0 + sn.c | subj) + (0 + 
## m2b:     wf.c | subj)
##        Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2b.wl 22 6658 6781  -3307     6614                        
## m2b    23 6656 6785  -3305     6610   3.5      1      0.062
print(summary(m2b.ls    <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|subj) + (1|item) +
                                      (0+wl.c|subj) + (0+sn.c|subj) + (0+wf.c|subj),
                                data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 +  
##     wl.c | subj) + (0 + sn.c | subj) + (0 + wf.c | subj)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6668     6790    -3312     6624     1931 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.372 -0.613 -0.011  0.606  4.655 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     (Intercept) 0.3182   0.564   
##  subj     wf.c        0.0415   0.204   
##  subj.1   sn.c        0.0000   0.000   
##  subj.2   wl.c        0.0114   0.107   
##  subj.3   (Intercept) 0.2812   0.530   
##  Residual             1.5297   1.237   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          3.20586    0.12131   26.43
## wl.c                 0.19607    0.06693    2.93
## ls.c                -0.40584    0.02057  -19.73
## sn.c                -0.04598    0.03269   -1.41
## wf.c                 0.27163    0.08195    3.31
## wl.c:ls.c           -0.00962    0.01809   -0.53
## wl.c:sn.c            0.03631    0.02903    1.25
## ls.c:sn.c            0.01916    0.01961    0.98
## wl.c:wf.c            0.01584    0.06582    0.24
## ls.c:wf.c            0.07148    0.03537    2.02
## sn.c:wf.c            0.10182    0.06803    1.50
## wl.c:ls.c:sn.c      -0.00292    0.01733   -0.17
## wl.c:ls.c:wf.c       0.04811    0.02938    1.64
## wl.c:sn.c:wf.c       0.08784    0.05522    1.59
## ls.c:sn.c:wf.c       0.07163    0.03471    2.06
## wl.c:ls.c:sn.c:wf.c  0.06080    0.02913    2.09
anova(m2b.ls, m2b)  # p = .005
## Data: uighur2
## Models:
## m2b.ls: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 + 
## m2b.ls:     wl.c | subj) + (0 + sn.c | subj) + (0 + wf.c | subj)
## m2b: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 + 
## m2b:     wl.c | subj) + (0 + ls.c | subj) + (0 + sn.c | subj) + (0 + 
## m2b:     wf.c | subj)
##        Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2b.ls 22 6668 6790  -3312     6624                        
## m2b    23 6656 6785  -3305     6610  13.1      1    0.00029
print(summary(m2b.sn    <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|subj) + (1|item) +
                                      (0+wl.c|subj) + (0+ls.c|subj) + (0+wf.c|subj),
                                data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 +  
##     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6654     6777    -3305     6610     1931 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.396 -0.602  0.001  0.603  3.893 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     (Intercept) 0.3248   0.570   
##  subj     wf.c        0.0400   0.200   
##  subj.1   ls.c        0.0112   0.106   
##  subj.2   wl.c        0.0123   0.111   
##  subj.3   (Intercept) 0.2555   0.505   
##  Residual             1.4991   1.224   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          3.19867    0.11820   27.06
## wl.c                 0.19302    0.06764    2.85
## ls.c                -0.40713    0.02831  -14.38
## sn.c                -0.05132    0.03250   -1.58
## wf.c                 0.26546    0.08142    3.26
## wl.c:ls.c           -0.00521    0.01811   -0.29
## wl.c:sn.c            0.03749    0.02898    1.29
## ls.c:sn.c            0.01962    0.01956    1.00
## wl.c:wf.c            0.02028    0.06563    0.31
## ls.c:wf.c            0.07293    0.03532    2.06
## sn.c:wf.c            0.10886    0.06766    1.61
## wl.c:ls.c:sn.c       0.00142    0.01746    0.08
## wl.c:ls.c:wf.c       0.04427    0.02936    1.51
## wl.c:sn.c:wf.c       0.08667    0.05484    1.58
## ls.c:sn.c:wf.c       0.07161    0.03469    2.06
## wl.c:ls.c:sn.c:wf.c  0.05748    0.02913    1.97
anova(m2b.sn, m2b)  # p = 1
## Data: uighur2
## Models:
## m2b.sn: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 + 
## m2b.sn:     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj)
## m2b: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 + 
## m2b:     wl.c | subj) + (0 + ls.c | subj) + (0 + sn.c | subj) + (0 + 
## m2b:     wf.c | subj)
##        Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2b.sn 22 6654 6777  -3305     6610                        
## m2b    23 6656 6785  -3305     6610     0      1          1
print(summary(m2b.wf    <- lmer(flp ~ wl.c * ls.c * sn.c * wf.c + (1|subj) + (1|item) +
                                              (0+wl.c|subj) + (0+ls.c|subj) + (0+sn.c|subj), 
                                               data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 +  
##     wl.c | subj) + (0 + ls.c | subj) + (0 + sn.c | subj)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6660     6782    -3308     6616     1931 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.720 -0.601 -0.004  0.607  3.859 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     (Intercept) 0.3246   0.570   
##  subj     sn.c        0.0000   0.000   
##  subj.1   ls.c        0.0114   0.107   
##  subj.2   wl.c        0.0133   0.115   
##  subj.3   (Intercept) 0.2554   0.505   
##  Residual             1.5139   1.230   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          3.20167    0.11820   27.09
## wl.c                 0.19516    0.06792    2.87
## ls.c                -0.40791    0.02848  -14.32
## sn.c                -0.05062    0.03262   -1.55
## wf.c                 0.25598    0.07229    3.54
## wl.c:ls.c           -0.00587    0.01815   -0.32
## wl.c:sn.c            0.03754    0.02908    1.29
## ls.c:sn.c            0.02001    0.01963    1.02
## wl.c:wf.c            0.02097    0.06572    0.32
## ls.c:wf.c            0.08378    0.03457    2.42
## sn.c:wf.c            0.11210    0.06771    1.66
## wl.c:ls.c:sn.c       0.00127    0.01751    0.07
## wl.c:ls.c:wf.c       0.04713    0.02926    1.61
## wl.c:sn.c:wf.c       0.09148    0.05476    1.67
## ls.c:sn.c:wf.c       0.06664    0.03449    1.93
## wl.c:ls.c:sn.c:wf.c  0.05486    0.02887    1.90
anova(m2b.wf, m2b)  # p = .021
## Data: uighur2
## Models:
## m2b.wf: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 + 
## m2b.wf:     wl.c | subj) + (0 + ls.c | subj) + (0 + sn.c | subj)
## m2b: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 + 
## m2b:     wl.c | subj) + (0 + ls.c | subj) + (0 + sn.c | subj) + (0 + 
## m2b:     wf.c | subj)
##        Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2b.wf 22 6660 6783  -3308     6616                        
## m2b    23 6656 6785  -3305     6610  5.34      1      0.021
# ... fixed effect structure
print(summary(m2c <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + (1|subj) + (1|item) +
                                (0+wl.c|subj) + (0+ls.c|subj) + (0+wf.c|subj), 
                           data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 +  
##     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6646     6707    -3312     6624     1942 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.366 -0.609 -0.001  0.600  4.039 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     (Intercept) 0.3249   0.570   
##  subj     wf.c        0.0448   0.212   
##  subj.1   ls.c        0.0116   0.108   
##  subj.2   wl.c        0.0118   0.109   
##  subj.3   (Intercept) 0.2541   0.504   
##  Residual             1.5090   1.228   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.1982     0.1175   27.23
## wl.c          0.1908     0.0673    2.83
## ls.c         -0.4036     0.0267  -15.11
## sn.c         -0.0801     0.0286   -2.81
## wf.c          0.1924     0.0761    2.53
anova(m2c, m2b)
## Data: uighur2
## Models:
## m2c: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 + 
## m2c:     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj)
## m2b: flp ~ wl.c * ls.c * sn.c * wf.c + (1 | subj) + (1 | item) + (0 + 
## m2b:     wl.c | subj) + (0 + ls.c | subj) + (0 + sn.c | subj) + (0 + 
## m2b:     wf.c | subj)
##     Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2c 11 6646 6707  -3312     6624                        
## m2b 23 6656 6785  -3305     6610  13.3     12       0.34
# ... final model
print(summary(m2c <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + (1|subj) + (1|item) +
                                (0+wl.c|subj) + (0+ls.c|subj) + (0+wf.c|subj), 
                          data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 +  
##     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6646     6707    -3312     6624     1942 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.366 -0.609 -0.001  0.600  4.039 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     (Intercept) 0.3249   0.570   
##  subj     wf.c        0.0448   0.212   
##  subj.1   ls.c        0.0116   0.108   
##  subj.2   wl.c        0.0118   0.109   
##  subj.3   (Intercept) 0.2541   0.504   
##  Residual             1.5090   1.228   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.1982     0.1175   27.23
## wl.c          0.1908     0.0673    2.83
## ls.c         -0.4036     0.0267  -15.11
## sn.c         -0.0801     0.0286   -2.81
## wf.c          0.1924     0.0761    2.53
VarCorr(m2c)
##  Groups   Name        Std.Dev.
##  item     (Intercept) 0.570   
##  subj     wf.c        0.212   
##  subj.1   ls.c        0.108   
##  subj.2   wl.c        0.109   
##  subj.3   (Intercept) 0.504   
##  Residual             1.228
chf2 <-  diag(unname(getME(m2c, "theta")[1:5]), 5)
sv2 <- svd(chf2)
round(sv2$d^2/sum(sv2$d^2)*100)
## [1] 50 39  7  2  2
# SVDs look ok

Morphological complexity as random effect

Add subject-related random effect for morphological complexity to the final model, as reqested by reviewer, because of theoretical relevance of morph. complexity

## Baseline LMM

print(summary(m2c.x <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + (1|subj) + (1|item) +
                                  (0+wl.c|subj) + (0+ls.c|subj) + (0+wf.c|subj) +
                                  (0+sn.c|subj),
                            data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 +  
##     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj) + (0 +      sn.c | subj)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6648     6715    -3312     6624     1941 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.366 -0.609 -0.001  0.600  4.039 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     (Intercept) 0.3249   0.570   
##  subj     sn.c        0.0000   0.000   
##  subj.1   wf.c        0.0448   0.212   
##  subj.2   ls.c        0.0116   0.108   
##  subj.3   wl.c        0.0118   0.109   
##  subj.4   (Intercept) 0.2541   0.504   
##  Residual             1.5090   1.228   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.1982     0.1175   27.23
## wl.c          0.1908     0.0673    2.83
## ls.c         -0.4036     0.0267  -15.11
## sn.c         -0.0801     0.0286   -2.81
## wf.c          0.1924     0.0761    2.53
anova(m2c, m2c.x) # not significant
## Data: uighur2
## Models:
## m2c: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 + 
## m2c:     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj)
## m2c.x: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 + 
## m2c.x:     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj) + (0 + 
## m2c.x:     sn.c | subj)
##       Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2c   11 6646 6707  -3312     6624                        
## m2c.x 12 6648 6715  -3312     6624     0      1          1
print(summary(m2c.y <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c +  (1|subj) + (1|item) + 
                                   (0+wl.c|subj) + (0+ls.c|subj) + (0+wf.c|subj) + 
                                   (0+sn.c|item),
                             data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 +  
##     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj) + (0 +      sn.c | item)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6646     6713    -3311     6622     1941 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.297 -0.602 -0.001  0.593  4.030 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     sn.c        0.0155   0.124   
##  item.1   (Intercept) 0.3237   0.569   
##  subj     wf.c        0.0447   0.211   
##  subj.1   ls.c        0.0114   0.107   
##  subj.2   wl.c        0.0119   0.109   
##  subj.3   (Intercept) 0.2545   0.505   
##  Residual             1.4935   1.222   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.1997     0.1174   27.25
## wl.c          0.1899     0.0673    2.82
## ls.c         -0.4035     0.0266  -15.17
## sn.c         -0.0805     0.0315   -2.56
## wf.c          0.1866     0.0808    2.31
anova(m2c, m2c.y) # not significant
## Data: uighur2
## Models:
## m2c: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 + 
## m2c:     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj)
## m2c.y: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 + 
## m2c.y:     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj) + (0 + 
## m2c.y:     sn.c | item)
##       Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2c   11 6646 6707  -3312     6624                        
## m2c.y 12 6646 6713  -3311     6622  1.81      1       0.18
print(summary(m2c.z <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + (1|subj) + (1|item) +
                                  (0+wl.c|subj) + (0+ls.c|subj) + (0+wf.c|subj) + 
                                  (0+sn.c|subj) + (0+sn.c|item),
                            data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 +  
##     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj) + (0 +  
##     sn.c | subj) + (0 + sn.c | item)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6648     6720    -3311     6622     1940 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.297 -0.602 -0.001  0.593  4.030 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     sn.c        0.0155   0.124   
##  item.1   (Intercept) 0.3237   0.569   
##  subj     sn.c        0.0000   0.000   
##  subj.1   wf.c        0.0447   0.211   
##  subj.2   ls.c        0.0114   0.107   
##  subj.3   wl.c        0.0119   0.109   
##  subj.4   (Intercept) 0.2545   0.505   
##  Residual             1.4935   1.222   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.1997     0.1174   27.25
## wl.c          0.1899     0.0673    2.82
## ls.c         -0.4035     0.0266  -15.17
## sn.c         -0.0805     0.0315   -2.56
## wf.c          0.1866     0.0808    2.31
anova(m2c, m2c.z) # not significant
## Data: uighur2
## Models:
## m2c: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 + 
## m2c:     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj)
## m2c.z: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 + 
## m2c.z:     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj) + (0 + 
## m2c.z:     sn.c | subj) + (0 + sn.c | item)
##       Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2c   11 6646 6707  -3312     6624                        
## m2c.z 13 6648 6721  -3311     6622  1.81      2        0.4
# svd does NOT look ok: No variance of (0+sn.c|subj)
VarCorr(m2c.z)
##  Groups   Name        Std.Dev.
##  item     sn.c        0.124   
##  item.1   (Intercept) 0.569   
##  subj     sn.c        0.000   
##  subj.1   wf.c        0.211   
##  subj.2   ls.c        0.107   
##  subj.3   wl.c        0.109   
##  subj.4   (Intercept) 0.505   
##  Residual             1.222
chf3 <-  diag(unname(getME(m2c.z, "theta")[1:7]), 7)
sv3 <- svd(chf3)
round(sv3$d^2/sum(sv3$d^2)*100)
## [1] 49 38  7  2  2  2  0

Keeping non-significant vc in the model is a bad idea if it leads to a degenerate model. It is probably better to choose the model w/o (0+sn.c|subj); 6 vc’s supported by svd, but probably best to stay with model m2c.

Bigram freuquency

Added as requested by reviewer, because of theoretical relevance of bigram frequency.

print(summary(m2d <- lmer(flp ~ wl.c + ls.c + sn.c + wf.c + bigram + (1|subj) + (1|item) +
                                  (0+wl.c|subj) + (0+ls.c|subj) + (0+wf.c|subj), 
                            data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ wl.c + ls.c + sn.c + wf.c + bigram + (1 | subj) + (1 |  
##     item) + (0 + wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c |      subj)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6648     6715    -3312     6624     1941 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.369 -0.608 -0.002  0.599  4.043 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     (Intercept) 0.3238   0.569   
##  subj     wf.c        0.0448   0.212   
##  subj.1   ls.c        0.0116   0.108   
##  subj.2   wl.c        0.0118   0.109   
##  subj.3   (Intercept) 0.2540   0.504   
##  Residual             1.5092   1.228   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.2831     0.3271   10.04
## wl.c          0.1925     0.0675    2.85
## ls.c         -0.4035     0.0267  -15.10
## sn.c         -0.0814     0.0289   -2.81
## wf.c          0.1916     0.0762    2.51
## bigram       -0.0246     0.0884   -0.28
print(summary(m2d.x <- lmer(flp ~ (wl.c + ls.c + sn.c + wf.c) * bigram + (1|subj) + (1|item) +
                                (0+wl.c|subj) + (0+ls.c|subj) + (0+wf.c|subj), 
                          data=uighur2, REML=FALSE)), corr=FALSE)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: flp ~ (wl.c + ls.c + sn.c + wf.c) * bigram + (1 | subj) + (1 |  
##     item) + (0 + wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c |      subj)
##    Data: uighur2
## 
##      AIC      BIC   logLik deviance df.resid 
##     6650     6739    -3309     6618     1937 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.379 -0.612 -0.007  0.604  4.002 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  item     (Intercept) 0.3133   0.560   
##  subj     wf.c        0.0446   0.211   
##  subj.1   ls.c        0.0115   0.107   
##  subj.2   wl.c        0.0129   0.114   
##  subj.3   (Intercept) 0.2507   0.501   
##  Residual             1.5058   1.227   
## Number of obs: 1953, groups:  item, 86 subj, 30
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.17774    0.36767    8.64
## wl.c        -0.19027    0.35272   -0.54
## ls.c        -0.61850    0.13526   -4.57
## sn.c         0.32995    0.26057    1.27
## wf.c        -0.21457    0.60924   -0.35
## bigram       0.00103    0.10008    0.01
## wl.c:bigram  0.11047    0.09955    1.11
## ls.c:bigram  0.06248    0.03856    1.62
## sn.c:bigram -0.11921    0.07613   -1.57
## wf.c:bigram  0.11622    0.17086    0.68
anova(m2c,m2d, m2d.x )
## Data: uighur2
## Models:
## m2c: flp ~ wl.c + ls.c + sn.c + wf.c + (1 | subj) + (1 | item) + (0 + 
## m2c:     wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | subj)
## m2d: flp ~ wl.c + ls.c + sn.c + wf.c + bigram + (1 | subj) + (1 | 
## m2d:     item) + (0 + wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | 
## m2d:     subj)
## m2d.x: flp ~ (wl.c + ls.c + sn.c + wf.c) * bigram + (1 | subj) + (1 | 
## m2d.x:     item) + (0 + wl.c | subj) + (0 + ls.c | subj) + (0 + wf.c | 
## m2d.x:     subj)
##       Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2c   11 6646 6707  -3312     6624                        
## m2d   12 6648 6715  -3312     6624  0.08      1       0.78
## m2d.x 16 6650 6739  -3309     6618  6.03      4       0.20

Result: Bigram frequency is not significant and does not change the other effects (unless diluted by interactions).

Plots

Residuals of LMM: Looks ok

p0 <-qplot(x=fitted(m2c), y=resid(m2c), geom="point",  shape=I("."), size=8, 
      xlab="Fitted values", ylab="Standardized residuals") + 
      geom_hline(yintercept=0) + theme_bw() 
p0

plot of chunk residual

lattice::qqmath(resid(m2c))

plot of chunk residual

Partial effects and plot

Word length

ybreaks = seq(1, 7, 2)
matrix(names(fixef(m2c)))
##      [,1]         
## [1,] "(Intercept)"
## [2,] "wl.c"       
## [3,] "ls.c"       
## [4,] "sn.c"       
## [5,] "wf.c"
uighur2$DV.wl <- remef(m2c, keep=TRUE, grouping=TRUE, fix=c(1, 2), ran=NULL)
p1 <- qplot(data=uighur2, x = wl, y = DV.wl, xlab="Word length", ylab="Landing position", group=1, 
            geom="smooth", method="lm") +
      geom_smooth(method=lm,color="black") +
      stat_summary (fun.y = mean, geom="point", colour="black", size=2, mapping = aes (x=wl) ) +
      stat_summary (fun.y = mean, geom="point", shape=2,  size=3, mapping = aes (y=flp, x=wl) ) + 
      coord_cartesian(ylim=c(0, 5))

Launch site

uighur2$DV.ls <- remef(m2c, keep=TRUE, grouping=TRUE, fix=c(1, 3), ran=NULL)
p2 <- qplot(data=uighur2, x = ls, y = DV.ls, xlab="Launch site", ylab="Landing position", group=1,
            geom="smooth", method="lm") +
      geom_smooth(method=lm,color="black") +
      stat_summary (fun.y = mean, geom="point", colour="black",  size=2, mapping = aes (x=ls.f) ) +
      stat_summary (fun.y = mean, geom="point", shape=2,   size=3, mapping = aes (y=flp, x=ls.f) ) +
      coord_cartesian(ylim=c(0, 5)) 

Morph. complexity

uighur2$DV.sn <- remef(m2c, keep=TRUE, grouping=TRUE, fix=c(1, 4), ran=NULL)
p3 <- qplot(data=uighur2, x = sn, y = DV.sn, xlab="Suffixes", ylab="Landing position", group=1,
            geom="smooth", method="lm") +
      geom_smooth(method=lm,color="black") +
      stat_summary (fun.y = mean, geom="point", colour="black", size=2, mapping = aes (x=sn) ) +       
      stat_summary (fun.y = mean, geom="point", shape=2, size=3, mapping = aes (y=flp, x=sn) ) + 
      coord_cartesian(ylim=c(0, 5)) 

Word frequency

uighur2$DV.wf <- remef(m2c, keep=TRUE, grouping=TRUE, fix=c(1, 5), ran=NULL)
p4 <- qplot(data=uighur2, x = wf10, y = DV.wf, xlab="Word frequency", ylab="Landing position", group=1, geom="smooth", method="lm") +
      geom_smooth(method=lm,color="black") +
      stat_summary (fun.y = mean, geom="point", colour="black",  size=2, mapping = aes (x=wf.f) ) +
      stat_summary (fun.y = mean, geom="point", shape=2,   size=3, mapping = aes (y=flp, x=wf.f) ) + 
      coord_cartesian(ylim=c(0, 5))  

Figure 4 (Exp 2)

grid.newpage()  
pushViewport(viewport(layout = grid.layout(2, 2))) 
print(p1, vp=vplayout(1,1))
print(p2, vp=vplayout(1,2))
print(p3, vp=vplayout(2,1))
print(p4, vp=vplayout(2,2))

plot of chunk fig4