# trial by trial reaction time data
# load packages
pacman::p_load(languageR, lattice, tidyverse, nlme, lmtest)
# load data
data(lexdec, package="languageR")
heid$Trial <- as.numeric(unlist(apply(as.matrix(table(heid$Subject)), 1, seq)))
# query data info
?lexdec
## starting httpd help server ... done
# see first 6 lines of data
head(lexdec)
## Subject RT Trial Sex NativeLanguage Correct PrevType PrevCorrect
## 1 A1 6.340359 23 F English correct word correct
## 2 A1 6.308098 27 F English correct nonword correct
## 3 A1 6.349139 29 F English correct nonword correct
## 4 A1 6.186209 30 F English correct word correct
## 5 A1 6.025866 32 F English correct nonword correct
## 6 A1 6.180017 33 F English correct word correct
## Word Frequency FamilySize SynsetCount Length Class FreqSingular
## 1 owl 4.859812 1.3862944 0.6931472 3 animal 54
## 2 mole 4.605170 1.0986123 1.9459101 4 animal 69
## 3 cherry 4.997212 0.6931472 1.6094379 6 plant 83
## 4 pear 4.727388 0.0000000 1.0986123 4 plant 44
## 5 dog 7.667626 3.1354942 2.0794415 3 animal 1233
## 6 blackberry 4.060443 0.6931472 1.3862944 10 plant 26
## FreqPlural DerivEntropy Complex rInfl meanRT SubjFreq meanSize
## 1 74 0.7912 simplex -0.3101549 6.3582 3.12 3.4758
## 2 30 0.6968 simplex 0.8145080 6.4150 2.40 2.9999
## 3 49 0.4754 simplex 0.5187938 6.3426 3.88 1.6278
## 4 68 0.0000 simplex -0.4274440 6.3353 4.52 1.9908
## 5 828 1.2129 simplex 0.3977961 6.2956 6.04 4.6429
## 6 31 0.3492 complex -0.1698990 6.3959 3.28 1.5831
## meanWeight BNCw BNCc BNCd BNCcRatio BNCdRatio
## 1 3.1806 12.057065 0.000000 6.175602 0.000000 0.512198
## 2 2.6112 5.738806 4.062251 2.850278 0.707856 0.496667
## 3 1.2081 5.716520 3.249801 12.588727 0.568493 2.202166
## 4 1.6114 2.050370 1.462410 7.363218 0.713242 3.591166
## 5 4.5167 74.838494 50.859385 241.561040 0.679589 3.227765
## 6 1.1365 1.270338 0.162490 1.187616 0.127911 0.934882
# check data structure
str(lexdec)
## 'data.frame': 1659 obs. of 28 variables:
## $ Subject : Factor w/ 21 levels "A1","A2","A3",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ RT : num 6.34 6.31 6.35 6.19 6.03 ...
## $ Trial : int 23 27 29 30 32 33 34 38 41 42 ...
## $ Sex : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
## $ NativeLanguage: Factor w/ 2 levels "English","Other": 1 1 1 1 1 1 1 1 1 1 ...
## $ Correct : Factor w/ 2 levels "correct","incorrect": 1 1 1 1 1 1 1 1 1 1 ...
## $ PrevType : Factor w/ 2 levels "nonword","word": 2 1 1 2 1 2 2 1 1 2 ...
## $ PrevCorrect : Factor w/ 2 levels "correct","incorrect": 1 1 1 1 1 1 1 1 1 1 ...
## $ Word : Factor w/ 79 levels "almond","ant",..: 55 47 20 58 25 12 71 69 62 1 ...
## $ Frequency : num 4.86 4.61 5 4.73 7.67 ...
## $ FamilySize : num 1.386 1.099 0.693 0 3.135 ...
## $ SynsetCount : num 0.693 1.946 1.609 1.099 2.079 ...
## $ Length : int 3 4 6 4 3 10 10 8 6 6 ...
## $ Class : Factor w/ 2 levels "animal","plant": 1 1 2 2 1 2 2 1 2 2 ...
## $ FreqSingular : int 54 69 83 44 1233 26 50 63 11 24 ...
## $ FreqPlural : int 74 30 49 68 828 31 65 47 9 42 ...
## $ DerivEntropy : num 0.791 0.697 0.475 0 1.213 ...
## $ Complex : Factor w/ 2 levels "complex","simplex": 2 2 2 2 2 1 1 2 2 2 ...
## $ rInfl : num -0.31 0.815 0.519 -0.427 0.398 ...
## $ meanRT : num 6.36 6.42 6.34 6.34 6.3 ...
## $ SubjFreq : num 3.12 2.4 3.88 4.52 6.04 3.28 5.04 2.8 3.12 3.72 ...
## $ meanSize : num 3.48 3 1.63 1.99 4.64 ...
## $ meanWeight : num 3.18 2.61 1.21 1.61 4.52 ...
## $ BNCw : num 12.06 5.74 5.72 2.05 74.84 ...
## $ BNCc : num 0 4.06 3.25 1.46 50.86 ...
## $ BNCd : num 6.18 2.85 12.59 7.36 241.56 ...
## $ BNCcRatio : num 0 0.708 0.568 0.713 0.68 ...
## $ BNCdRatio : num 0.512 0.497 2.202 3.591 3.228 ...
# select a subset of variables for current example
dta <- heid %>% select(Subject, Trial, RT, BaseFrequency)
# set plot theme
ot <- theme_set(theme_bw())
# word frequency effect
ggplot(data = dta, aes(x = BaseFrequency, y = RT)) +
geom_point() +
stat_smooth(method="lm") +
labs(x = "BaseFrequency", y = "Reaction Time (transformed)")
## `geom_smooth()` using formula 'y ~ x'

# trial effect by subject
ggplot(data = dta, aes(x = Trial, y = RT)) +
geom_point(pch =1, size = rel(.5)) +
geom_line() +
facet_wrap(~ Subject) +
labs(x = "Trial ID", y = "Reaction Time (transformed)")

# autocorrelation by individual
acf.fnc(dta, group = "Subject", time = "Trial", x = "RT", plot = TRUE)

# simple linear model ignoring trial dependence - least-squares
summary(m0 <- lm(RT ~ BaseFrequency, data = dta))
##
## Call:
## lm(formula = RT ~ BaseFrequency, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51664 -0.21576 -0.05251 0.15991 0.89027
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.641720 0.020730 320.392 < 2e-16 ***
## BaseFrequency -0.015470 0.004701 -3.291 0.00104 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.282 on 830 degrees of freedom
## Multiple R-squared: 0.01288, Adjusted R-squared: 0.01169
## F-statistic: 10.83 on 1 and 830 DF, p-value: 0.001041
# MLE of the model above
summary(m0a <- gls(RT ~ BaseFrequency, data = dta, method = "ML"))
## Generalized least squares fit by maximum likelihood
## Model: RT ~ BaseFrequency
## Data: dta
## AIC BIC logLik
## 258.4621 272.6336 -126.231
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 6.64172 0.020730012 320.3915 0.000
## BaseFrequency -0.01547 0.004701192 -3.2908 0.001
##
## Correlation:
## (Intr)
## BaseFrequency -0.882
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.8345863 -0.7661546 -0.1864644 0.5678517 3.1613281
##
## Residual standard error: 0.2816138
## Degrees of freedom: 832 total; 830 residual
# simple linear model with autocorrelation error using gls
summary(m1 <- update(m0a, correlation = corAR1(form = ~ Trial | Subject)))
## Generalized least squares fit by maximum likelihood
## Model: RT ~ BaseFrequency
## Data: dta
## AIC BIC logLik
## 64.26212 83.15745 -28.13106
##
## Correlation Structure: AR(1)
## Formula: ~Trial | Subject
## Parameter estimate(s):
## Phi
## 0.4621332
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 6.635164 0.021934244 302.50253 0e+00
## BaseFrequency -0.013854 0.003868156 -3.58149 4e-04
##
## Correlation:
## (Intr)
## BaseFrequency -0.699
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.8345660 -0.7652167 -0.1837375 0.5862265 3.1590365
##
## Residual standard error: 0.2811809
## Degrees of freedom: 832 total; 830 residual
# compare models
anova(m0a, m1)
## Model df AIC BIC logLik Test L.Ratio p-value
## m0a 1 3 258.46209 272.63359 -126.23105
## m1 2 4 64.26212 83.15745 -28.13106 1 vs 2 196.2 <.0001
#
# collect residuals from the two models above
dta <- dta %>% mutate(res0 = rstandard(m0),
res1 = resid(m1, type = "normalized"))
# Durbin-Watson test of serial correlation
dwtest(res0 ~ BaseFrequency, data = dta)
##
## Durbin-Watson test
##
## data: res0 ~ BaseFrequency
## DW = 1.064, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
#
dwtest(res1 ~ BaseFrequency, data = dta)
##
## Durbin-Watson test
##
## data: res1 ~ BaseFrequency
## DW = 2.1854, p-value = 0.9963
## alternative hypothesis: true autocorrelation is greater than 0
# check acf for residuals
acf.fnc(dta, group = "Subject", time = "Trial", x = "res1", plot = TRUE)

###