Tablet comparison script.

Set up some functions.

## Loading required package: Matrix
## Loading required package: Rcpp

Load data and preliminaries.

tab <- read.csv("~/Projects/tablet_norming/data/tabletstudyresults.csv")
tab$reaction.time <- as.numeric(as.character(tab$reaction.time))

## Warning: NAs introduced by coercion

tab$trial.type <- factor(tab$trial.type, c("rec","MEcontrol","MEexperimental"))

Now exclude test subjects from the tablet…

tab <- subset(tab, subject.id %in% 
                levels(tab$subject.id)[grepl("-",
                                             as.character(levels(tab$subject.id)))] &
                 trial.type != "filler")

Now add demographics.

demo <- read.csv("~/Projects/tablet_norming/data/tablet_demographics.csv")
demo$age <- demo$age_group
demo$age.group <- floor(demo$age)
tab <- merge(tab, demo)

tab <- subset(tab, exclude==0)

Look at age distribution.

qplot(age.group, 
  data=subset(demo,exclude==0))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-5

Reaction time distribution. Based on this plot, we prune at +/- 2SDs in log space (red lines).

m <- mean(log(tab$reaction.time)) 
s <- sd(log(tab$reaction.time))

qplot(reaction.time/1000,
      data=tab) + 
  geom_vline(xintercept=exp(m - 2*s)/1000, col="red",lty=2) + 
  geom_vline(xintercept=exp(m + 2*s)/1000, col="red",lty=2) + 
  scale_x_log10(breaks=c(1,2,5,10,20,50))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-6

mean(tab$reaction.time)

## [1] 2832

median(tab$reaction.time)

## [1] 1717

exp(m-(2*s))

## [1] 521.4

exp(m+(2*s))

## [1] 7550

mean(tab$reaction.time > exp(m + 2*s) |
                    tab$reaction.time < exp(m - 2*s))

## [1] 0.06359

tab$reaction.time[tab$reaction.time > exp(m + 2*s) |
                    tab$reaction.time < exp(m - 2*s)] <- NA

How many participants do we have?

length(unique(tab$subject.id))

## [1] 63

Actual analysis starts here

Aggregation.

mss <- ddply(tab, .(subject.id, trial.type, age.group), summarise,
      RT=mean(reaction.time, na.rm=TRUE), 
      correct=sum(response=="Y") / sum(response=="Y" | response=="N"))

ms <- ddply(mss, .(trial.type), summarise, 
            rt=mean(RT,na.rm=TRUE), 
            rt.cih=ci.high(RT,na.rm=TRUE),
            rt.cil=ci.low(RT,na.rm=TRUE),
            acc=mean(correct),
            acc.cih=ci.high(correct),
            acc.cil=ci.low(correct))

First basic plots. Accuracy.

qplot(trial.type, acc,
      fill=trial.type,
      ymin=acc-acc.cil,
      ymax=acc+acc.cih,
      geom=c("bar","linerange"),
      stat="identity",
      data=ms)

plot of chunk unnamed-chunk-9

RT.

qplot(trial.type, rt,
      fill=trial.type,
      ymin=rt-rt.cil,
      ymax=rt+rt.cih,
      geom=c("bar","linerange"),
      stat="identity",
      data=ms)

plot of chunk unnamed-chunk-10

Now break it down by age.

Accuracy:

msa <- ddply(mss, .(trial.type, age.group), summarise, 
            rt=mean(RT,na.rm=TRUE), 
            rt.cih=ci.high(RT),
            rt.cil=ci.low(RT),
            acc=mean(correct,na.rm=TRUE),
            acc.cih=ci.high(correct),
            acc.cil=ci.low(correct)) 

levels(msa$trial.type) <- c("Familiar Word","ME Control","ME Inference")
# pdf("~/Projects/tablet_norming/writeup/figures/accuracy.pdf",width=5,height=3)
qplot(age.group, acc,
      fill=trial.type,
      ymin=acc-acc.cil,
      ymax=acc+acc.cih,
      position=position_dodge(width=.9), 
      geom=c("bar","linerange"),
      stat="identity",
      data=msa) + 
  xlab("Age (Years)") + 
  ylab("Accuracy") + 
  scale_fill_discrete(name="Trial Type") + 
  geom_hline(yintercept=.5,lty=2)

plot of chunk unnamed-chunk-11

# dev.off()

and RT:

# pdf("~/Projects/tablet_norming/writeup/figures/rt.pdf",width=5,height=3)
qplot(age.group, rt/1000,
      fill=trial.type,
      ymin=rt/1000-rt.cil/1000,
      ymax=rt/1000+rt.cih/1000,
      position=position_dodge(width=.9), 
      geom=c("bar","linerange"),
      stat="identity",
      data=msa) + 
  xlab("Age (Years)") + 
  ylab("Reaction Time (s)") + 
  scale_fill_discrete(name="Trial Type")

plot of chunk unnamed-chunk-12

# dev.off()

Continuous age analysis

Accuracy continuous:

mss <- ddply(subset(tab,trial.type!="MEexperimental"), 
             .(subject.id, age), summarise,
      rt=mean(reaction.time, na.rm=TRUE), 
      correct=sum(response=="Y") / sum(response=="Y" | response=="N"))

qplot(age,correct, 
      data=mss) + 
  scale_y_continuous(breaks=seq(0,1,.25), limits=c(0,1.1)) + 
  geom_hline(yintercept=.5,lty=2) +
  geom_smooth()

## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-13

Now with all trial types. (Consolidate rec and ME control).

tab$trial.type.simple <- tab$trial.type
levels(tab$trial.type.simple) <- c("Familiar","Familiar","ME")
mss <- ddply(tab, 
             .(subject.id, trial.type.simple, age), summarise,
      rt=mean(reaction.time, na.rm=TRUE), 
      rt.cih=ci.high(reaction.time),
      rt.cil=ci.low(reaction.time),
      correct=sum(response=="Y") / sum(response=="Y" | response=="N"))

qplot(age,correct, col=trial.type.simple, 
      data=mss) + 
  scale_y_continuous(breaks=seq(0,1,.25), limits=c(0,1.1)) + 
  geom_hline(yintercept=.5,lty=2) +
  geom_smooth()

## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-14

and RT

# pdf("~/Projects/tablet_norming/writeup/figures/individuals.pdf",width=5,height=3)
qplot(age, rt/1000, geom="pointrange",
      ymin=rt/1000-rt.cil/1000,
      ymax=rt/1000+rt.cih/1000,
      data=subset(mss, trial.type.simple="Familiar")) + 
  ylim(c(0,7)) + 
  ylab("Reaction Time (s)") +
  xlab("Age Group (Years)") + 
  geom_smooth()

## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

# dev.off()

Item group analysis

Accuracy:

tab$easy.item <- tab$word %in% c("dog","cat","cookie","bottle",
                                 "cup","car","shoe","apple")
mss <- ddply(subset(tab,trial.type!="MEexperimental"), 
             .(subject.id, trial.type, easy.item, age.group), summarise,
      RT=mean(reaction.time, na.rm=TRUE), 
      correct=sum(response=="Y") / sum(response=="Y" | response=="N"))

msa <- ddply(mss, .(age.group, easy.item), summarise, 
            rt=mean(RT,na.rm=TRUE), 
            rt.cih=ci.high(RT),
            rt.cil=ci.low(RT),
            acc=mean(correct,na.rm=TRUE),
            acc.cih=ci.high(correct),
            acc.cil=ci.low(correct)) 

qplot(age.group, acc,
      fill=easy.item,
      ymin=acc-acc.cil,
      ymax=acc+acc.cih,
      position=position_dodge(width=.9), 
      geom=c("bar","linerange"),
      stat="identity",
      data=msa) + 
  geom_hline(yintercept=.5,lty=2)

plot of chunk unnamed-chunk-16

and RT:

qplot(age.group, rt,
      fill=easy.item,
      ymin=rt-rt.cil,
      ymax=rt+rt.cih,
      position=position_dodge(width=.9), 
      geom=c("bar","linerange"),
      stat="identity",
      data=msa)

plot of chunk unnamed-chunk-17

Item analysis

mssi <- ddply(tab, .(subject.id, trial.type, word), summarise,
      RT=mean(reaction.time, na.rm=TRUE), 
      correct=sum(response=="Y") / sum(response=="Y" | response=="N"))

msi <- ddply(mssi, .(trial.type, word), summarise, 
            rt=mean(RT,na.rm=TRUE), 
            rt.cih=ci.high(RT),
            rt.cil=ci.low(RT),
            acc=mean(correct),
            acc.cih=ci.high(correct),
            acc.cil=ci.low(correct)) 

msi$word <- reorder(msi$word, msi$acc)

qplot(word, acc,
       ymin=acc-acc.cil,
      ymax=acc+acc.cih,
      geom="pointrange",
      data=msi) + 
  facet_wrap(~trial.type,scales="free_x") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5))

plot of chunk unnamed-chunk-18

and RT:

msi$word <- reorder(msi$word, msi$rt)

qplot(word, rt,
       ymin=rt-rt.cil,
      ymax=rt+rt.cih,
      geom="pointrange",
      data=msi) + 
  facet_wrap(~trial.type,scales="free_x") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5))

plot of chunk unnamed-chunk-19

Number of trials

Nearly everyone sticks it out to the end!

tab$trial.number <- as.numeric(as.character(tab$trial.number))
mst <- ddply(tab, .(subject.id,age.group), summarise,
      max.trial=max(trial.number))

qplot(max.trial, facets=~age.group, data=mst)

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-20

and means across groups.

mstm <- ddply(mst, .(age.group), summarise, 
              trials = mean(max.trial), 
              trials.cih = ci.high(max.trial), 
              trials.cil = ci.low(max.trial))
qplot(age.group, trials,
      fill=factor(age.group),
      ymin=trials-trials.cil,
      ymax=trials+trials.cih,
      position=position_dodge(width=.9), 
      geom=c("bar","linerange"),
      stat="identity",
      data=mstm) + 
  geom_hline(yintercept=28, lty=2)

plot of chunk unnamed-chunk-21

and RT means across trials:

mst <- ddply(tab, .(trial.number, age.group), summarise,
      RT=mean(reaction.time, na.rm=TRUE))

qplot(trial.number, RT, data=mst) + 
  geom_smooth(method="lm")

plot of chunk unnamed-chunk-22

Statisics

library(xtable)
rt.mod <- lmer(reaction.time ~ trial.number + age * trial.type + 
       (trial.type | subject.id), 
     data=tab)
summary(rt.mod)

## Linear mixed model fit by REML ['lmerMod']
## Formula: reaction.time ~ trial.number + age * trial.type + (trial.type |  
##     subject.id)
##    Data: tab
## 
## REML criterion at convergence: 22152
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.912 -0.503 -0.206  0.217  5.842 
## 
## Random effects:
##  Groups     Name                     Variance Std.Dev. Corr     
##  subject.id (Intercept)               77778   279               
##             trial.typeMEcontrol       27008   164      0.93     
##             trial.typeMEexperimental 151591   389      0.62 0.86
##  Residual                            850125   922               
## Number of obs: 1340, groups: subject.id, 63
## 
## Fixed effects:
##                              Estimate Std. Error t value
## (Intercept)                   3211.30     215.32   14.91
## trial.number                    -6.93       3.13   -2.22
## age                           -383.21      61.19   -6.26
## trial.typeMEcontrol            261.28     245.95    1.06
## trial.typeMEexperimental       848.77     294.57    2.88
## age:trial.typeMEcontrol        -69.84      70.76   -0.99
## age:trial.typeMEexperimental   -82.37      85.06   -0.97
## 
## Correlation of Fixed Effects:
##               (Intr) trl.nm age    trl.typMEc trl.typMEx ag:trl.typMEc
## trial.numbr   -0.170                                                  
## age           -0.943 -0.036                                           
## trl.typMEcn   -0.411 -0.003  0.396                                    
## trl.typMExp   -0.270 -0.010  0.259  0.556                             
## ag:trl.typMEc  0.391  0.003 -0.399 -0.964     -0.536                  
## ag:trl.typMEx  0.254  0.004 -0.256 -0.534     -0.963      0.554

xtable(summary(rt.mod)$coef)

## % latex table generated in R 3.0.2 by xtable 1.7-3 package
## % Wed Jul 30 21:55:23 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrr}
##   \hline
##  & Estimate & Std. Error & t value \\ 
##   \hline
## (Intercept) & 3211.30 & 215.32 & 14.91 \\ 
##   trial.number & -6.93 & 3.13 & -2.22 \\ 
##   age & -383.21 & 61.19 & -6.26 \\ 
##   trial.typeMEcontrol & 261.28 & 245.95 & 1.06 \\ 
##   trial.typeMEexperimental & 848.77 & 294.57 & 2.88 \\ 
##   age:trial.typeMEcontrol & -69.84 & 70.76 & -0.99 \\ 
##   age:trial.typeMEexperimental & -82.37 & 85.06 & -0.97 \\ 
##    \hline
## \end{tabular}
## \end{table}

tab$acc <- tab$response=="Y"
acc.mod <- lmer(acc ~ age.group * trial.type + 
       (trial.type | subject.id), 
       family="binomial",
     data=tab)

## Warning: calling lmer with 'family' is deprecated; please use glmer() instead
## Warning: Model failed to converge with max|grad| = 1.42978 (tol = 0.001)

summary(acc.mod)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial ( logit )
## Formula: acc ~ age.group * trial.type + (trial.type | subject.id)
##    Data: tab
## Control: 
## structure(list(optimizer = c("bobyqa", "Nelder_Mead"), calc.derivs = TRUE,  
##     use.last.params = FALSE, restart_edge = FALSE, boundary.tol = 1e-05,  
##     tolPwrss = 1e-07, compDev = TRUE, checkControl = structure(list( 
##         check.nobs.vs.rankZ = "warningSmall", check.nobs.vs.nlev = "stop",  
##         check.nlev.gtreq.5 = "ignore", check.nlev.gtr.1 = "stop",  
##         check.nobs.vs.nRE = "stop", check.rankX = "message+drop.cols",  
##         check.scaleX = "warning", check.formula.LHS = "stop"), .Names = c("check.nobs.vs.rankZ",  
##     "check.nobs.vs.nlev", "check.nlev.gtreq.5", "check.nlev.gtr.1",  
##     "check.nobs.vs.nRE", "check.rankX", "check.scaleX", "check.formula.LHS" 
##     )), checkConv = structure(list(check.conv.grad = structure(list( 
##         action = "warning", tol = 0.001, relTol = NULL), .Names = c("action",  
##     "tol", "relTol")), check.conv.singular = structure(list(action = "ignore",  
##         tol = 1e-04), .Names = c("action", "tol")), check.conv.hess = structure(list( 
##         action = "warning", tol = 1e-06), .Names = c("action",  
##     "tol"))), .Names = c("check.conv.grad", "check.conv.singular",  
##     "check.conv.hess")), optCtrl = list()), .Names = c("optimizer",  
## "calc.derivs", "use.last.params", "restart_edge", "boundary.tol",  
## "tolPwrss", "compDev", "checkControl", "checkConv", "optCtrl" 
## ), class = c("glmerControl", "merControl"))
## 
##      AIC      BIC   logLik deviance df.resid 
##    731.3    794.5   -353.7    707.3     1419 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -9.106  0.070  0.141  0.289  1.822 
## 
## Random effects:
##  Groups     Name                     Variance Std.Dev. Corr       
##  subject.id (Intercept)              2.740    1.655               
##             trial.typeMEcontrol      1.229    1.109    -0.86      
##             trial.typeMEexperimental 0.818    0.904    -0.99  0.90
## Number of obs: 1431, groups: subject.id, 63
## 
## Fixed effects:
##                                    Estimate Std. Error z value Pr(>|z|)
## (Intercept)                           1.000      0.850    1.18  0.23936
## age.group                             1.201      0.360    3.34  0.00084
## trial.typeMEcontrol                  -1.434      0.868   -1.65  0.09868
## trial.typeMEexperimental             -1.809      0.746   -2.42  0.01537
## age.group:trial.typeMEcontrol         0.378      0.415    0.91  0.36257
## age.group:trial.typeMEexperimental   -0.197      0.338   -0.58  0.55969
##                                       
## (Intercept)                           
## age.group                          ***
## trial.typeMEcontrol                .  
## trial.typeMEexperimental           *  
## age.group:trial.typeMEcontrol         
## age.group:trial.typeMEexperimental    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) ag.grp trl.typMEc trl.typMEx ag.grp:trl.typMEc
## age.group         -0.792                                               
## trl.typMEcn       -0.749  0.599                                        
## trl.typMExp       -0.862  0.693  0.707                                 
## ag.grp:trl.typMEc  0.514 -0.713 -0.780     -0.494                      
## ag.grp:trl.typMEx  0.629 -0.881 -0.523     -0.761      0.678

xtable(summary(acc.mod)$coef)

## % latex table generated in R 3.0.2 by xtable 1.7-3 package
## % Wed Jul 30 21:55:34 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
##   \hline
##  & Estimate & Std. Error & z value & Pr($>$$|$z$|$) \\ 
##   \hline
## (Intercept) & 1.00 & 0.85 & 1.18 & 0.24 \\ 
##   age.group & 1.20 & 0.36 & 3.34 & 0.00 \\ 
##   trial.typeMEcontrol & -1.43 & 0.87 & -1.65 & 0.10 \\ 
##   trial.typeMEexperimental & -1.81 & 0.75 & -2.42 & 0.02 \\ 
##   age.group:trial.typeMEcontrol & 0.38 & 0.42 & 0.91 & 0.36 \\ 
##   age.group:trial.typeMEexperimental & -0.20 & 0.34 & -0.58 & 0.56 \\ 
##    \hline
## \end{tabular}
## \end{table}

tab$trial.number <- as.numeric(as.character(tab$trial.number))
mst <- ddply(tab, .(subject.id,age.group), summarise,
      max.trial=max(trial.number))

msta <- ddply(mst, .(age.group), summarise,
      finished = mean(max.trial==28), 
      num.trials = mean(max.trial))
# xtable(msta)

Reliability

Fairest analysis – split halves where 1 half is the odd-numbered Familiar trials and even numbered ME trials, and the other half the remaining trials Control trials.

library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, desc, failwith, id, mutate, summarise, summarize
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

bytrial <- tab %>%
  select(age.group,subject.id,trial.type,trial.number,acc,reaction.time) %>%
  group_by(age.group,trial.type,subject.id) %>%
  filter(trial.type != "MEexperimental") %>%
  arrange(trial.number) %>%
  mutate(trial.order = 1:length(trial.number),
    even.half = trial.order %% 2 == 0,
    first.half = (even.half & trial.type == "rec" ) | 
      (!even.half & trial.type != "rec" ))

type.trials <- bytrial %>%
  group_by(age.group,subject.id,trial.type,add=FALSE) %>%
  summarise(acc = mean(acc),
            rt = na.median(reaction.time))
type.trials <- reshape(type.trials, idvar = c("subject.id","age.group"),
                             timevar = "trial.type", direction="wide")

half.trials <- bytrial %>%
  group_by(age.group,subject.id,first.half,add=FALSE) %>%
  summarise(acc = mean(acc),
            rt = na.median(reaction.time))
half.trials <- reshape(half.trials, idvar = c("subject.id","age.group"),
                             timevar = "first.half", direction="wide")

Statistics

reliability.half <- half.trials %>%
  group_by(age.group) %>%
  summarise(acc.corr = cor(acc.TRUE,acc.FALSE,use="complete.obs"),
            acc.corr.t = cor.test(acc.TRUE,acc.FALSE,
                                  use="complete.obs")$statistic,
            acc.corr.p = cor.test(acc.TRUE,acc.FALSE,
                                  use="complete.obs")$p.value,
            rt.corr = cor(rt.TRUE,rt.FALSE,use="complete.obs"),
            rt.corr.t = cor.test(rt.TRUE,rt.FALSE,
                                 use="complete.obs")$statistic,
            rt.corr.p = cor.test(rt.TRUE,rt.FALSE,
                                 use="complete.obs")$p.value)

reliability.type <- type.trials %>%
  group_by(age.group) %>%
  summarise(acc.corr = cor(acc.rec,acc.MEcontrol,use="complete.obs"),
            acc.corr.t = cor.test(acc.rec,acc.MEcontrol,
                                  use="complete.obs")$statistic,
            acc.corr.p = cor.test(acc.rec,acc.MEcontrol,
                                  use="complete.obs")$p.value,
            rt.corr = cor(rt.rec,rt.MEcontrol,use="complete.obs"),
            rt.corr.t = cor.test(rt.rec,rt.MEcontrol,
                                 use="complete.obs")$statistic,
            rt.corr.p = cor.test(rt.rec,rt.MEcontrol,
                                 use="complete.obs")$p.value)

print(reliability.half)

## Source: local data frame [4 x 7]
## 
##   age.group acc.corr acc.corr.t acc.corr.p rt.corr rt.corr.t rt.corr.p
## 1         1  0.39672     1.2965    0.22704  0.4905    1.6884  0.125594
## 2         2  0.39197     1.4759    0.16572 -0.2453   -0.8767  0.397861
## 3         3 -0.09685    -0.4352    0.66811  0.4199    2.0690  0.051717
## 4         4  0.42857     1.7748    0.09766  0.6825    3.4938  0.003579

print(reliability.type)

## Source: local data frame [4 x 7]
## 
##   age.group acc.corr acc.corr.t acc.corr.p rt.corr rt.corr.t rt.corr.p
## 1         1   0.4482     1.5041    0.16681  0.4973     1.621  0.143631
## 2         2   0.2984     1.0831    0.30006  0.4218     1.612  0.132990
## 3         3  -0.1029    -0.4625    0.64872  0.5867     3.240  0.004104
## 4         4   0.5375     2.3848    0.03178  0.7292     3.987  0.001350

TAB_COMP

Tablet comparison script.

Actual analysis starts here

Now break it down by age.

Continuous age analysis

Item group analysis

Item analysis

Number of trials

Statisics

Reliability