lexical decision latencies

# trial by trial reaction time data


# load packages
pacman::p_load(languageR, lattice, tidyverse, nlme, lmtest)

# load data
data(lexdec, package="languageR")
heid$Trial <- as.numeric(unlist(apply(as.matrix(table(heid$Subject)), 1, seq)))
# query data info
?lexdec

## starting httpd help server ... done

# see first 6 lines of data
head(lexdec)

##   Subject       RT Trial Sex NativeLanguage Correct PrevType PrevCorrect
## 1      A1 6.340359    23   F        English correct     word     correct
## 2      A1 6.308098    27   F        English correct  nonword     correct
## 3      A1 6.349139    29   F        English correct  nonword     correct
## 4      A1 6.186209    30   F        English correct     word     correct
## 5      A1 6.025866    32   F        English correct  nonword     correct
## 6      A1 6.180017    33   F        English correct     word     correct
##         Word Frequency FamilySize SynsetCount Length  Class FreqSingular
## 1        owl  4.859812  1.3862944   0.6931472      3 animal           54
## 2       mole  4.605170  1.0986123   1.9459101      4 animal           69
## 3     cherry  4.997212  0.6931472   1.6094379      6  plant           83
## 4       pear  4.727388  0.0000000   1.0986123      4  plant           44
## 5        dog  7.667626  3.1354942   2.0794415      3 animal         1233
## 6 blackberry  4.060443  0.6931472   1.3862944     10  plant           26
##   FreqPlural DerivEntropy Complex      rInfl meanRT SubjFreq meanSize
## 1         74       0.7912 simplex -0.3101549 6.3582     3.12   3.4758
## 2         30       0.6968 simplex  0.8145080 6.4150     2.40   2.9999
## 3         49       0.4754 simplex  0.5187938 6.3426     3.88   1.6278
## 4         68       0.0000 simplex -0.4274440 6.3353     4.52   1.9908
## 5        828       1.2129 simplex  0.3977961 6.2956     6.04   4.6429
## 6         31       0.3492 complex -0.1698990 6.3959     3.28   1.5831
##   meanWeight      BNCw      BNCc       BNCd BNCcRatio BNCdRatio
## 1     3.1806 12.057065  0.000000   6.175602  0.000000  0.512198
## 2     2.6112  5.738806  4.062251   2.850278  0.707856  0.496667
## 3     1.2081  5.716520  3.249801  12.588727  0.568493  2.202166
## 4     1.6114  2.050370  1.462410   7.363218  0.713242  3.591166
## 5     4.5167 74.838494 50.859385 241.561040  0.679589  3.227765
## 6     1.1365  1.270338  0.162490   1.187616  0.127911  0.934882

# check data structure
str(lexdec)

## 'data.frame':    1659 obs. of  28 variables:
##  $ Subject       : Factor w/ 21 levels "A1","A2","A3",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ RT            : num  6.34 6.31 6.35 6.19 6.03 ...
##  $ Trial         : int  23 27 29 30 32 33 34 38 41 42 ...
##  $ Sex           : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ NativeLanguage: Factor w/ 2 levels "English","Other": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Correct       : Factor w/ 2 levels "correct","incorrect": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PrevType      : Factor w/ 2 levels "nonword","word": 2 1 1 2 1 2 2 1 1 2 ...
##  $ PrevCorrect   : Factor w/ 2 levels "correct","incorrect": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Word          : Factor w/ 79 levels "almond","ant",..: 55 47 20 58 25 12 71 69 62 1 ...
##  $ Frequency     : num  4.86 4.61 5 4.73 7.67 ...
##  $ FamilySize    : num  1.386 1.099 0.693 0 3.135 ...
##  $ SynsetCount   : num  0.693 1.946 1.609 1.099 2.079 ...
##  $ Length        : int  3 4 6 4 3 10 10 8 6 6 ...
##  $ Class         : Factor w/ 2 levels "animal","plant": 1 1 2 2 1 2 2 1 2 2 ...
##  $ FreqSingular  : int  54 69 83 44 1233 26 50 63 11 24 ...
##  $ FreqPlural    : int  74 30 49 68 828 31 65 47 9 42 ...
##  $ DerivEntropy  : num  0.791 0.697 0.475 0 1.213 ...
##  $ Complex       : Factor w/ 2 levels "complex","simplex": 2 2 2 2 2 1 1 2 2 2 ...
##  $ rInfl         : num  -0.31 0.815 0.519 -0.427 0.398 ...
##  $ meanRT        : num  6.36 6.42 6.34 6.34 6.3 ...
##  $ SubjFreq      : num  3.12 2.4 3.88 4.52 6.04 3.28 5.04 2.8 3.12 3.72 ...
##  $ meanSize      : num  3.48 3 1.63 1.99 4.64 ...
##  $ meanWeight    : num  3.18 2.61 1.21 1.61 4.52 ...
##  $ BNCw          : num  12.06 5.74 5.72 2.05 74.84 ...
##  $ BNCc          : num  0 4.06 3.25 1.46 50.86 ...
##  $ BNCd          : num  6.18 2.85 12.59 7.36 241.56 ...
##  $ BNCcRatio     : num  0 0.708 0.568 0.713 0.68 ...
##  $ BNCdRatio     : num  0.512 0.497 2.202 3.591 3.228 ...

# select a subset of variables for current example
dta <- heid %>% select(Subject, Trial, RT, BaseFrequency)

# set plot theme
ot <- theme_set(theme_bw())

# word frequency effect
ggplot(data = dta, aes(x = BaseFrequency, y = RT)) +
 geom_point() +
 stat_smooth(method="lm") +
 labs(x = "BaseFrequency", y = "Reaction Time (transformed)")

## `geom_smooth()` using formula 'y ~ x'

# trial effect by subject
ggplot(data = dta, aes(x = Trial, y = RT)) +
 geom_point(pch =1, size = rel(.5)) +
 geom_line() +
 facet_wrap(~ Subject) +
 labs(x = "Trial ID", y = "Reaction Time (transformed)")

# autocorrelation by individual
acf.fnc(dta, group = "Subject", time = "Trial", x = "RT", plot = TRUE)

# simple linear model ignoring trial dependence - least-squares
summary(m0 <- lm(RT ~ BaseFrequency, data = dta))

## 
## Call:
## lm(formula = RT ~ BaseFrequency, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51664 -0.21576 -0.05251  0.15991  0.89027 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.641720   0.020730 320.392  < 2e-16 ***
## BaseFrequency -0.015470   0.004701  -3.291  0.00104 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.282 on 830 degrees of freedom
## Multiple R-squared:  0.01288,    Adjusted R-squared:  0.01169 
## F-statistic: 10.83 on 1 and 830 DF,  p-value: 0.001041

# MLE of the model above
summary(m0a <- gls(RT ~ BaseFrequency, data = dta, method = "ML"))

## Generalized least squares fit by maximum likelihood
##   Model: RT ~ BaseFrequency 
##   Data: dta 
##        AIC      BIC   logLik
##   258.4621 272.6336 -126.231
## 
## Coefficients:
##                  Value   Std.Error  t-value p-value
## (Intercept)    6.64172 0.020730012 320.3915   0.000
## BaseFrequency -0.01547 0.004701192  -3.2908   0.001
## 
##  Correlation: 
##               (Intr)
## BaseFrequency -0.882
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.8345863 -0.7661546 -0.1864644  0.5678517  3.1613281 
## 
## Residual standard error: 0.2816138 
## Degrees of freedom: 832 total; 830 residual

# simple linear model with autocorrelation error using gls
summary(m1 <- update(m0a, correlation = corAR1(form = ~ Trial | Subject)))

## Generalized least squares fit by maximum likelihood
##   Model: RT ~ BaseFrequency 
##   Data: dta 
##        AIC      BIC    logLik
##   64.26212 83.15745 -28.13106
## 
## Correlation Structure: AR(1)
##  Formula: ~Trial | Subject 
##  Parameter estimate(s):
##       Phi 
## 0.4621332 
## 
## Coefficients:
##                   Value   Std.Error   t-value p-value
## (Intercept)    6.635164 0.021934244 302.50253   0e+00
## BaseFrequency -0.013854 0.003868156  -3.58149   4e-04
## 
##  Correlation: 
##               (Intr)
## BaseFrequency -0.699
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.8345660 -0.7652167 -0.1837375  0.5862265  3.1590365 
## 
## Residual standard error: 0.2811809 
## Degrees of freedom: 832 total; 830 residual

# compare models
anova(m0a, m1)

##     Model df       AIC       BIC     logLik   Test L.Ratio p-value
## m0a     1  3 258.46209 272.63359 -126.23105                       
## m1      2  4  64.26212  83.15745  -28.13106 1 vs 2   196.2  <.0001

#
# collect residuals from the two models above
dta <- dta %>% mutate(res0 = rstandard(m0),
                      res1 = resid(m1, type = "normalized"))

# Durbin-Watson test of serial correlation
dwtest(res0 ~ BaseFrequency, data = dta)

## 
##  Durbin-Watson test
## 
## data:  res0 ~ BaseFrequency
## DW = 1.064, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

#
dwtest(res1 ~ BaseFrequency, data = dta)

## 
##  Durbin-Watson test
## 
## data:  res1 ~ BaseFrequency
## DW = 2.1854, p-value = 0.9963
## alternative hypothesis: true autocorrelation is greater than 0

# check acf for residuals
acf.fnc(dta, group = "Subject", time = "Trial", x = "res1", plot = TRUE)

###

lexical decision latencies

Szu-Yu Chen

27 September 2020