Tablet comparison script.

Set up some functions.

## Loading required package: Matrix
## Loading required package: Rcpp

Load data and preliminaries.

tab <- read.csv("~/Projects/tablet_norming/data/tabletstudyresults.csv")
tab$reaction.time <- as.numeric(as.character(tab$reaction.time))
## Warning: NAs introduced by coercion
tab$trial.type <- factor(tab$trial.type, c("rec","MEcontrol","MEexperimental"))

Now exclude test subjects from the tablet…

tab <- subset(tab, subject.id %in% 
                levels(tab$subject.id)[grepl("-",
                                             as.character(levels(tab$subject.id)))] &
                 trial.type != "filler")

Now add demographics.

demo <- read.csv("~/Projects/tablet_norming/data/tablet_demographics.csv")
demo$age <- demo$age_group
demo$age.group <- floor(demo$age)
tab <- merge(tab, demo)

tab <- subset(tab, exclude==0)

Look at age distribution.

qplot(age.group, 
  data=subset(demo,exclude==0))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-5

Reaction time distribution. Based on this plot, we prune at +/- 2SDs in log space (red lines).

m <- mean(log(tab$reaction.time)) 
s <- sd(log(tab$reaction.time))

qplot(reaction.time/1000,
      data=tab) + 
  geom_vline(xintercept=exp(m - 2*s)/1000, col="red",lty=2) + 
  geom_vline(xintercept=exp(m + 2*s)/1000, col="red",lty=2) + 
  scale_x_log10(breaks=c(1,2,5,10,20,50))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-6

tab$reaction.time[tab$reaction.time > exp(m + 2*s) |
                    tab$reaction.time < exp(m - 2*s)] <- NA

How many participants do we have?

length(unique(tab$subject.id))
## [1] 63

Actual analysis starts here

Aggregation.

mss <- ddply(tab, .(subject.id, trial.type, age.group), summarise,
      RT=mean(reaction.time, na.rm=TRUE), 
      correct=sum(response=="Y") / sum(response=="Y" | response=="N"))

ms <- ddply(mss, .(trial.type), summarise, 
            rt=mean(RT,na.rm=TRUE), 
            rt.cih=ci.high(RT,na.rm=TRUE),
            rt.cil=ci.low(RT,na.rm=TRUE),
            acc=mean(correct),
            acc.cih=ci.high(correct),
            acc.cil=ci.low(correct))

First basic plots. Accuracy.

qplot(trial.type, acc,
      fill=trial.type,
      ymin=acc-acc.cil,
      ymax=acc+acc.cih,
      geom=c("bar","linerange"),
      stat="identity",
      data=ms)

plot of chunk unnamed-chunk-9

RT.

qplot(trial.type, rt,
      fill=trial.type,
      ymin=rt-rt.cil,
      ymax=rt+rt.cih,
      geom=c("bar","linerange"),
      stat="identity",
      data=ms)

plot of chunk unnamed-chunk-10

Now break it down by age.

Accuracy:

msa <- ddply(mss, .(trial.type, age.group), summarise, 
            rt=mean(RT,na.rm=TRUE), 
            rt.cih=ci.high(RT),
            rt.cil=ci.low(RT),
            acc=mean(correct,na.rm=TRUE),
            acc.cih=ci.high(correct),
            acc.cil=ci.low(correct)) 

levels(msa$trial.type) <- c("Familiar Word","ME Control","ME Inference")
# pdf("~/Projects/tablet_norming/writeup/figures/accuracy.pdf",width=5,height=3)
qplot(age.group, acc,
      fill=trial.type,
      ymin=acc-acc.cil,
      ymax=acc+acc.cih,
      position=position_dodge(width=.9), 
      geom=c("bar","linerange"),
      stat="identity",
      data=msa) + 
  xlab("Age (Years)") + 
  ylab("Accuracy") + 
  scale_fill_discrete(name="Trial Type") + 
  geom_hline(yintercept=.5,lty=2)

plot of chunk unnamed-chunk-11

# dev.off()

and RT:

# pdf("~/Projects/tablet_norming/writeup/figures/rt.pdf",width=5,height=3)
qplot(age.group, rt/1000,
      fill=trial.type,
      ymin=rt/1000-rt.cil/1000,
      ymax=rt/1000+rt.cih/1000,
      position=position_dodge(width=.9), 
      geom=c("bar","linerange"),
      stat="identity",
      data=msa) + 
  xlab("Age (Years)") + 
  ylab("Reaction Time (s)") + 
  scale_fill_discrete(name="Trial Type") 

plot of chunk unnamed-chunk-12

# dev.off()

Continuous age analysis

Accuracy continuous:

mss <- ddply(subset(tab,trial.type!="MEexperimental"), 
             .(subject.id, age), summarise,
      rt=mean(reaction.time, na.rm=TRUE), 
      correct=sum(response=="Y") / sum(response=="Y" | response=="N"))

qplot(age,correct, 
      data=mss) + 
  scale_y_continuous(breaks=seq(0,1,.25), limits=c(0,1.1)) + 
  geom_hline(yintercept=.5,lty=2) +
  geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-13

Now with all trial types. (Consolidate rec and ME control).

tab$trial.type.simple <- tab$trial.type
levels(tab$trial.type.simple) <- c("Familiar","Familiar","ME")
mss <- ddply(tab, 
             .(subject.id, trial.type.simple, age), summarise,
      rt=mean(reaction.time, na.rm=TRUE), 
      rt.cih=ci.high(reaction.time),
      rt.cil=ci.low(reaction.time),
      correct=sum(response=="Y") / sum(response=="Y" | response=="N"))

qplot(age,correct, col=trial.type.simple, 
      data=mss) + 
  scale_y_continuous(breaks=seq(0,1,.25), limits=c(0,1.1)) + 
  geom_hline(yintercept=.5,lty=2) +
  geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-14

and RT

# pdf("~/Projects/tablet_norming/writeup/figures/individuals.pdf",width=5,height=3)
qplot(age, rt/1000, geom="pointrange",
      ymin=rt/1000-rt.cil/1000,
      ymax=rt/1000+rt.cih/1000,
      data=subset(mss, trial.type.simple="Familiar")) + 
  ylim(c(0,7)) + 
  ylab("Reaction Time (s)") +
  xlab("Age Group (Years)") + 
  geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-15

# dev.off()

RT

Item group analysis

Accuracy:

tab$easy.item <- tab$word %in% c("dog","cat","cookie","bottle",
                                 "cup","car","shoe","apple")
mss <- ddply(subset(tab,trial.type!="MEexperimental"), 
             .(subject.id, trial.type, easy.item, age.group), summarise,
      RT=mean(reaction.time, na.rm=TRUE), 
      correct=sum(response=="Y") / sum(response=="Y" | response=="N"))

msa <- ddply(mss, .(age.group, easy.item), summarise, 
            rt=mean(RT,na.rm=TRUE), 
            rt.cih=ci.high(RT),
            rt.cil=ci.low(RT),
            acc=mean(correct,na.rm=TRUE),
            acc.cih=ci.high(correct),
            acc.cil=ci.low(correct)) 

qplot(age.group, acc,
      fill=easy.item,
      ymin=acc-acc.cil,
      ymax=acc+acc.cih,
      position=position_dodge(width=.9), 
      geom=c("bar","linerange"),
      stat="identity",
      data=msa) + 
  geom_hline(yintercept=.5,lty=2)

plot of chunk unnamed-chunk-16

and RT:

qplot(age.group, rt,
      fill=easy.item,
      ymin=rt-rt.cil,
      ymax=rt+rt.cih,
      position=position_dodge(width=.9), 
      geom=c("bar","linerange"),
      stat="identity",
      data=msa)

plot of chunk unnamed-chunk-17

Item analysis

mssi <- ddply(tab, .(subject.id, trial.type, word), summarise,
      RT=mean(reaction.time, na.rm=TRUE), 
      correct=sum(response=="Y") / sum(response=="Y" | response=="N"))

msi <- ddply(mssi, .(trial.type, word), summarise, 
            rt=mean(RT,na.rm=TRUE), 
            rt.cih=ci.high(RT),
            rt.cil=ci.low(RT),
            acc=mean(correct),
            acc.cih=ci.high(correct),
            acc.cil=ci.low(correct)) 

msi$word <- reorder(msi$word, msi$acc)

qplot(word, acc,
       ymin=acc-acc.cil,
      ymax=acc+acc.cih,
      geom="pointrange",
      data=msi) + 
  facet_wrap(~trial.type,scales="free_x") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5))

plot of chunk unnamed-chunk-18

and RT:

msi$word <- reorder(msi$word, msi$rt)

qplot(word, rt,
       ymin=rt-rt.cil,
      ymax=rt+rt.cih,
      geom="pointrange",
      data=msi) + 
  facet_wrap(~trial.type,scales="free_x") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5))

plot of chunk unnamed-chunk-19

Number of trials

Nearly everyone sticks it out to the end!

tab$trial.number <- as.numeric(as.character(tab$trial.number))
mst <- ddply(tab, .(subject.id,age.group), summarise,
      max.trial=max(trial.number))

qplot(max.trial, facets=~age.group, data=mst)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-20

and means across groups.

mstm <- ddply(mst, .(age.group), summarise, 
              trials = mean(max.trial), 
              trials.cih = ci.high(max.trial), 
              trials.cil = ci.low(max.trial))
qplot(age.group, trials,
      fill=factor(age.group),
      ymin=trials-trials.cil,
      ymax=trials+trials.cih,
      position=position_dodge(width=.9), 
      geom=c("bar","linerange"),
      stat="identity",
      data=mstm) + 
  geom_hline(yintercept=28, lty=2)

plot of chunk unnamed-chunk-21

and RT means across trials:

mst <- ddply(tab, .(trial.number, age.group), summarise,
      RT=mean(reaction.time, na.rm=TRUE))

qplot(trial.number, RT, data=mst) + 
  geom_smooth(method="lm")

plot of chunk unnamed-chunk-22

Statisics

rt.mod <- lmer(reaction.time ~ trial.number + age * trial.type + 
       (trial.type | subject.id), 
     data=tab)
summary(rt.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: reaction.time ~ trial.number + age * trial.type + (trial.type |  
##     subject.id)
##    Data: tab
## 
## REML criterion at convergence: 22152
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.912 -0.503 -0.206  0.217  5.842 
## 
## Random effects:
##  Groups     Name                     Variance Std.Dev. Corr     
##  subject.id (Intercept)               77778   279               
##             trial.typeMEcontrol       27008   164      0.93     
##             trial.typeMEexperimental 151591   389      0.62 0.86
##  Residual                            850125   922               
## Number of obs: 1340, groups:  subject.id, 63
## 
## Fixed effects:
##                              Estimate Std. Error t value
## (Intercept)                   3211.30     215.32   14.91
## trial.number                    -6.93       3.13   -2.22
## age                           -383.21      61.19   -6.26
## trial.typeMEcontrol            261.28     245.95    1.06
## trial.typeMEexperimental       848.77     294.57    2.88
## age:trial.typeMEcontrol        -69.84      70.76   -0.99
## age:trial.typeMEexperimental   -82.37      85.06   -0.97
## 
## Correlation of Fixed Effects:
##               (Intr) trl.nm age    trl.typMEc trl.typMEx ag:trl.typMEc
## trial.numbr   -0.170                                                  
## age           -0.943 -0.036                                           
## trl.typMEcn   -0.411 -0.003  0.396                                    
## trl.typMExp   -0.270 -0.010  0.259  0.556                             
## ag:trl.typMEc  0.391  0.003 -0.399 -0.964     -0.536                  
## ag:trl.typMEx  0.254  0.004 -0.256 -0.534     -0.963      0.554
tab$acc <- tab$response=="Y"
acc.mod <- lmer(acc ~ trial.number + age * trial.type + 
       (trial.type | subject.id), 
     data=tab)
## Warning: Model failed to converge with max|grad| = 0.205757 (tol = 0.002, component 3)
## Warning: Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
summary(acc.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: acc ~ trial.number + age * trial.type + (trial.type | subject.id)
##    Data: tab
## 
## REML criterion at convergence: 376.8
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.890 -0.008  0.150  0.398  2.491 
## 
## Random effects:
##  Groups     Name                     Variance Std.Dev. Corr     
##  subject.id (Intercept)              0.00000  0.000             
##             trial.typeMEcontrol      0.00756  0.087     NaN     
##             trial.typeMEexperimental 0.01870  0.137     NaN 0.80
##  Residual                            0.06914  0.263             
## Number of obs: 1431, groups:  subject.id, 63
## 
## Fixed effects:
##                               Estimate Std. Error t value
## (Intercept)                   0.750106   0.045504   16.48
## trial.number                  0.001048   0.000857    1.22
## age                           0.056321   0.012975    4.34
## trial.typeMEcontrol          -0.127173   0.072106   -1.76
## trial.typeMEexperimental     -0.426444   0.085673   -4.98
## age:trial.typeMEcontrol       0.032416   0.021113    1.54
## age:trial.typeMEexperimental  0.089364   0.025119    3.56
## 
## Correlation of Fixed Effects:
##               (Intr) trl.nm age    trl.typMEc trl.typMEx ag:trl.typMEc
## trial.numbr   -0.211                                                  
## age           -0.927 -0.054                                           
## trl.typMEcn   -0.602 -0.006  0.593                                    
## trl.typMExp   -0.505 -0.010  0.499  0.609                             
## ag:trl.typMEc  0.576  0.004 -0.613 -0.960     -0.584                  
## ag:trl.typMEx  0.483  0.007 -0.515 -0.584     -0.959      0.609
tab$trial.number <- as.numeric(as.character(tab$trial.number))
mst <- ddply(tab, .(subject.id,age.group), summarise,
      max.trial=max(trial.number))

msta <- ddply(mst, .(age.group), summarise,
      finished = mean(max.trial==28), 
      num.trials = mean(max.trial))
# xtable(msta)