1 load packages

pacman::p_load(languageR, lattice, tidyverse, nlme, lmtest)

2 load data

data(lexdec, package="languageR")

3 see first 6 lines of data

head(lexdec)

##   Subject       RT Trial Sex NativeLanguage Correct PrevType PrevCorrect
## 1      A1 6.340359    23   F        English correct     word     correct
## 2      A1 6.308098    27   F        English correct  nonword     correct
## 3      A1 6.349139    29   F        English correct  nonword     correct
## 4      A1 6.186209    30   F        English correct     word     correct
## 5      A1 6.025866    32   F        English correct  nonword     correct
## 6      A1 6.180017    33   F        English correct     word     correct
##         Word Frequency FamilySize SynsetCount Length  Class FreqSingular
## 1        owl  4.859812  1.3862944   0.6931472      3 animal           54
## 2       mole  4.605170  1.0986123   1.9459101      4 animal           69
## 3     cherry  4.997212  0.6931472   1.6094379      6  plant           83
## 4       pear  4.727388  0.0000000   1.0986123      4  plant           44
## 5        dog  7.667626  3.1354942   2.0794415      3 animal         1233
## 6 blackberry  4.060443  0.6931472   1.3862944     10  plant           26
##   FreqPlural DerivEntropy Complex      rInfl meanRT SubjFreq meanSize
## 1         74       0.7912 simplex -0.3101549 6.3582     3.12   3.4758
## 2         30       0.6968 simplex  0.8145080 6.4150     2.40   2.9999
## 3         49       0.4754 simplex  0.5187938 6.3426     3.88   1.6278
## 4         68       0.0000 simplex -0.4274440 6.3353     4.52   1.9908
## 5        828       1.2129 simplex  0.3977961 6.2956     6.04   4.6429
## 6         31       0.3492 complex -0.1698990 6.3959     3.28   1.5831
##   meanWeight      BNCw      BNCc       BNCd BNCcRatio BNCdRatio
## 1     3.1806 12.057065  0.000000   6.175602  0.000000  0.512198
## 2     2.6112  5.738806  4.062251   2.850278  0.707856  0.496667
## 3     1.2081  5.716520  3.249801  12.588727  0.568493  2.202166
## 4     1.6114  2.050370  1.462410   7.363218  0.713242  3.591166
## 5     4.5167 74.838494 50.859385 241.561040  0.679589  3.227765
## 6     1.1365  1.270338  0.162490   1.187616  0.127911  0.934882

4 check data structure

str(lexdec)

## 'data.frame':    1659 obs. of  28 variables:
##  $ Subject       : Factor w/ 21 levels "A1","A2","A3",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ RT            : num  6.34 6.31 6.35 6.19 6.03 ...
##  $ Trial         : int  23 27 29 30 32 33 34 38 41 42 ...
##  $ Sex           : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ NativeLanguage: Factor w/ 2 levels "English","Other": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Correct       : Factor w/ 2 levels "correct","incorrect": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PrevType      : Factor w/ 2 levels "nonword","word": 2 1 1 2 1 2 2 1 1 2 ...
##  $ PrevCorrect   : Factor w/ 2 levels "correct","incorrect": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Word          : Factor w/ 79 levels "almond","ant",..: 55 47 20 58 25 12 71 69 62 1 ...
##  $ Frequency     : num  4.86 4.61 5 4.73 7.67 ...
##  $ FamilySize    : num  1.386 1.099 0.693 0 3.135 ...
##  $ SynsetCount   : num  0.693 1.946 1.609 1.099 2.079 ...
##  $ Length        : int  3 4 6 4 3 10 10 8 6 6 ...
##  $ Class         : Factor w/ 2 levels "animal","plant": 1 1 2 2 1 2 2 1 2 2 ...
##  $ FreqSingular  : int  54 69 83 44 1233 26 50 63 11 24 ...
##  $ FreqPlural    : int  74 30 49 68 828 31 65 47 9 42 ...
##  $ DerivEntropy  : num  0.791 0.697 0.475 0 1.213 ...
##  $ Complex       : Factor w/ 2 levels "complex","simplex": 2 2 2 2 2 1 1 2 2 2 ...
##  $ rInfl         : num  -0.31 0.815 0.519 -0.427 0.398 ...
##  $ meanRT        : num  6.36 6.42 6.34 6.34 6.3 ...
##  $ SubjFreq      : num  3.12 2.4 3.88 4.52 6.04 3.28 5.04 2.8 3.12 3.72 ...
##  $ meanSize      : num  3.48 3 1.63 1.99 4.64 ...
##  $ meanWeight    : num  3.18 2.61 1.21 1.61 4.52 ...
##  $ BNCw          : num  12.06 5.74 5.72 2.05 74.84 ...
##  $ BNCc          : num  0 4.06 3.25 1.46 50.86 ...
##  $ BNCd          : num  6.18 2.85 12.59 7.36 241.56 ...
##  $ BNCcRatio     : num  0 0.708 0.568 0.713 0.68 ...
##  $ BNCdRatio     : num  0.512 0.497 2.202 3.591 3.228 ...

lexdec$Trial <- as.numeric(unlist(apply(as.matrix(table(lexdec$Subject)), 1, seq)))

5 select a subset of variables for current example

dta_ex2 <- lexdec[,-2]

library(ggplot2)

6 word frequency effect

ggplot(data = dta_ex2, aes(x = Frequency, y = meanRT)) +
 geom_point() +
 stat_smooth(method="lm") +
 labs(x = "Word Frequency", y = "Reaction Time (transformed)") +
 theme_bw()

## `geom_smooth()` using formula 'y ~ x'

7 trial effect by subject

ggplot(data = dta_ex2, aes(x = Trial, y = meanRT)) +
 geom_point(pch =1, size = rel(.5)) +
 geom_line() +
 facet_wrap(~ Subject) +
 labs(x = "Trial ID", y = "Reaction Time (transformed)")

8 autocorrelation by individual

acf.fnc(dta_ex2, group = "Subject", time = "Trial", x = "meanRT", plot = TRUE)

9 simple linear model ignoring trial dependence - least-squares

summary(m0 <- lm(meanRT ~ Frequency, data = dta_ex2))

## 
## Call:
## lm(formula = meanRT ~ Frequency, data = dta_ex2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.125857 -0.031267 -0.008982  0.030641  0.223503 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.557861   0.005685  1153.5   <2e-16 ***
## Frequency   -0.037675   0.001156   -32.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06 on 1657 degrees of freedom
## Multiple R-squared:  0.3907, Adjusted R-squared:  0.3904 
## F-statistic:  1063 on 1 and 1657 DF,  p-value: < 2.2e-16

10 MLE of the model above

summary(m0a <- gls(meanRT ~ Frequency, data = dta_ex2, method = "ML"))

## Generalized least squares fit by maximum likelihood
##   Model: meanRT ~ Frequency 
##   Data: dta_ex2 
##        AIC       BIC   logLik
##   -4622.63 -4606.388 2314.315
## 
## Coefficients:
##                 Value   Std.Error   t-value p-value
## (Intercept)  6.557861 0.005685406 1153.4551       0
## Frequency   -0.037675 0.001155777  -32.5975       0
## 
##  Correlation: 
##           (Intr)
## Frequency -0.966
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.0987407 -0.5214005 -0.1497792  0.5109541  3.7270392 
## 
## Residual standard error: 0.05996797 
## Degrees of freedom: 1659 total; 1657 residual

11 simple linear model with autocorrelation error using gls

summary(m1 <- update(m0a, correlation = corAR1(form = ~ Trial | Subject)))

## Generalized least squares fit by maximum likelihood
##   Model: meanRT ~ Frequency 
##   Data: dta_ex2 
##         AIC       BIC   logLik
##   -4621.723 -4600.067 2314.862
## 
## Correlation Structure: AR(1)
##  Formula: ~Trial | Subject 
##  Parameter estimate(s):
##         Phi 
## -0.02596498 
## 
## Coefficients:
##                 Value   Std.Error   t-value p-value
## (Intercept)  6.558081 0.005674439 1155.7231       0
## Frequency   -0.037721 0.001155472  -32.6455       0
## 
##  Correlation: 
##           (Intr)
## Frequency -0.967
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.1000860 -0.5214532 -0.1503597  0.5107850  3.7265878 
## 
## Residual standard error: 0.05996819 
## Degrees of freedom: 1659 total; 1657 residual

12 compare models

anova(m0a, m1)

##     Model df       AIC       BIC   logLik   Test  L.Ratio p-value
## m0a     1  3 -4622.630 -4606.388 2314.315                        
## m1      2  4 -4621.723 -4600.067 2314.862 1 vs 2 1.092902  0.2958

13 collect residuals from the two models above

dta_ex2 <- dta_ex2 %>% mutate(res0 = rstandard(m0),
                      res1 = resid(m1, type = "normalized"))

14 Durbin-Watson test of serial correlation

dwtest(res0 ~ Frequency, data = dta_ex2)

## 
##  Durbin-Watson test
## 
## data:  res0 ~ Frequency
## DW = 2.044, p-value = 0.8152
## alternative hypothesis: true autocorrelation is greater than 0

15

dwtest(res1 ~ Frequency, data = dta_ex2)

## 
##  Durbin-Watson test
## 
## data:  res1 ~ Frequency
## DW = 1.9925, p-value = 0.4396
## alternative hypothesis: true autocorrelation is greater than 0

16 check acf for residuals

acf.fnc(dta_ex2, group = "Subject", time = "Trial", x = "res1", plot = TRUE)

Homework exercise 2

Hank Lee

2020-09-27

1 load packages

2 load data

3 see first 6 lines of data

4 check data structure

5 select a subset of variables for current example

6 word frequency effect

7 trial effect by subject

8 autocorrelation by individual

9 simple linear model ignoring trial dependence - least-squares

10 MLE of the model above

11 simple linear model with autocorrelation error using gls

12 compare models

13 collect residuals from the two models above

14 Durbin-Watson test of serial correlation

15

16 check acf for residuals

16.0.1