trial by trial reaction time data

load packages

pacman::p_load(languageR, lattice, tidyverse, nlme, lmtest)

load data

data(heid, package="languageR")
heid$Trial <- as.numeric(unlist(apply(as.matrix(table(heid$Subject)), 1, seq)))

query data info

?heid

## starting httpd help server ... done

see first 6 lines of data

head(heid)

##   Subject         Word   RT BaseFrequency Trial
## 1     pp1   basaalheid 6.69          3.56     1
## 2     pp1  markantheid 6.81          5.16     2
## 3     pp1 ontroerdheid 6.51          5.55     3
## 4     pp1  contentheid 6.58          4.50     4
## 5     pp1    riantheid 6.86          4.53     5
## 6     pp1  tembaarheid 6.35          0.00     6

check data structure

str(heid)

## 'data.frame':    832 obs. of  5 variables:
##  $ Subject      : Factor w/ 26 levels "pp1","pp10","pp11",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Word         : Factor w/ 40 levels "aftandsheid",..: 4 24 28 10 33 38 25 5 16 30 ...
##  $ RT           : num  6.69 6.81 6.51 6.58 6.86 6.35 6.69 7.17 6.58 6.98 ...
##  $ BaseFrequency: num  3.56 5.16 5.55 4.5 4.53 0 1.61 3.61 2.56 5.86 ...
##  $ Trial        : num  1 2 3 4 5 6 7 8 9 10 ...

select a subset of variables for current example

dta <- heid %>% select(Subject, Trial, RT, BaseFrequency)

選取變數。

set plot theme

ot <- theme_set(theme_bw())

word frequency effect

ggplot(data = dta, aes(x = BaseFrequency, y = RT)) +
 geom_point() +
 stat_smooth(method="lm") +
 labs(x = "BaseFrequency", y = "Reaction Time (transformed)")

## `geom_smooth()` using formula 'y ~ x'

trial effect by subject

ggplot(data = dta, aes(x = Trial, y = RT)) +
 geom_point(pch =1, size = rel(.5)) +
 geom_line() +
 facet_wrap(~ Subject) +
 labs(x = "Trial ID", y = "Reaction Time (transformed)")

autocorrelation by individual

acf.fnc(dta, group = "Subject", time = "Trial", x = "RT", plot = TRUE)

simple linear model ignoring trial dependence - least-squares

summary(m0 <- lm(RT ~ BaseFrequency, data = dta))

## 
## Call:
## lm(formula = RT ~ BaseFrequency, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51664 -0.21576 -0.05251  0.15991  0.89027 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.641720   0.020730 320.392  < 2e-16 ***
## BaseFrequency -0.015470   0.004701  -3.291  0.00104 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.282 on 830 degrees of freedom
## Multiple R-squared:  0.01288,    Adjusted R-squared:  0.01169 
## F-statistic: 10.83 on 1 and 830 DF,  p-value: 0.001041

MLE of the model above

summary(m0a <- gls(RT ~ BaseFrequency, data = dta, method = "ML"))

## Generalized least squares fit by maximum likelihood
##   Model: RT ~ BaseFrequency 
##   Data: dta 
##        AIC      BIC   logLik
##   258.4621 272.6336 -126.231
## 
## Coefficients:
##                  Value   Std.Error  t-value p-value
## (Intercept)    6.64172 0.020730012 320.3915   0.000
## BaseFrequency -0.01547 0.004701192  -3.2908   0.001
## 
##  Correlation: 
##               (Intr)
## BaseFrequency -0.882
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.8345863 -0.7661546 -0.1864644  0.5678517  3.1613281 
## 
## Residual standard error: 0.2816138 
## Degrees of freedom: 832 total; 830 residual

simple linear model with autocorrelation error using gls

summary(m1 <- update(m0a, correlation = corAR1(form = ~ Trial | Subject)))

## Generalized least squares fit by maximum likelihood
##   Model: RT ~ BaseFrequency 
##   Data: dta 
##        AIC      BIC    logLik
##   64.26212 83.15745 -28.13106
## 
## Correlation Structure: AR(1)
##  Formula: ~Trial | Subject 
##  Parameter estimate(s):
##       Phi 
## 0.4621332 
## 
## Coefficients:
##                   Value   Std.Error   t-value p-value
## (Intercept)    6.635164 0.021934244 302.50253   0e+00
## BaseFrequency -0.013854 0.003868156  -3.58149   4e-04
## 
##  Correlation: 
##               (Intr)
## BaseFrequency -0.699
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.8345660 -0.7652167 -0.1837375  0.5862265  3.1590365 
## 
## Residual standard error: 0.2811809 
## Degrees of freedom: 832 total; 830 residual

compare models

anova(m0a, m1)

##     Model df       AIC       BIC     logLik   Test L.Ratio p-value
## m0a     1  3 258.46209 272.63359 -126.23105                       
## m1      2  4  64.26212  83.15745  -28.13106 1 vs 2   196.2  <.0001

collect residuals from the two models above

dta <- dta %>% mutate(res0 = rstandard(m0),
                      res1 = resid(m1, type = "normalized"))

Durbin-Watson test of serial correlation

dwtest(res0 ~ BaseFrequency, data = dta)

## 
##  Durbin-Watson test
## 
## data:  res0 ~ BaseFrequency
## DW = 1.064, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

dwtest(res1 ~ BaseFrequency, data = dta)

## 
##  Durbin-Watson test
## 
## data:  res1 ~ BaseFrequency
## DW = 2.1854, p-value = 0.9963
## alternative hypothesis: true autocorrelation is greater than 0

check acf for residuals

acf.fnc(dta, group = "Subject", time = "Trial", x = "res1", plot = TRUE)

WEEK3 homework 2：Regression with dependent errors

吳靜芳

27 September 2020

trial by trial reaction time data

load packages

load data

query data info

see first 6 lines of data

check data structure

select a subset of variables for current example

set plot theme

word frequency effect

trial effect by subject

autocorrelation by individual

simple linear model ignoring trial dependence - least-squares

MLE of the model above

simple linear model with autocorrelation error using gls

compare models

collect residuals from the two models above

Durbin-Watson test of serial correlation

check acf for residuals