Set up some functions.
## Loading required package: Matrix
## Loading required package: Rcpp
Load data and preliminaries.
tab <- read.csv("~/Projects/tablet_norming/data/tabletstudyresults.csv")
tab$reaction.time <- as.numeric(as.character(tab$reaction.time))
## Warning: NAs introduced by coercion
tab$trial.type <- factor(tab$trial.type, c("rec","MEcontrol","MEexperimental"))
Now exclude test subjects from the tablet…
tab <- subset(tab, subject.id %in%
levels(tab$subject.id)[grepl("-",
as.character(levels(tab$subject.id)))] &
trial.type != "filler")
Now add demographics.
demo <- read.csv("~/Projects/tablet_norming/data/tablet_demographics.csv")
demo$age <- demo$age_group
demo$age.group <- floor(demo$age)
tab <- merge(tab, demo)
tab <- subset(tab, exclude==0)
Look at age distribution.
qplot(age.group,
data=subset(demo,exclude==0))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Reaction time distribution. Based on this plot, we prune at +/- 2SDs in log space (red lines).
m <- mean(log(tab$reaction.time))
s <- sd(log(tab$reaction.time))
qplot(reaction.time/1000,
data=tab) +
geom_vline(xintercept=exp(m - 2*s)/1000, col="red",lty=2) +
geom_vline(xintercept=exp(m + 2*s)/1000, col="red",lty=2) +
scale_x_log10(breaks=c(1,2,5,10,20,50))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
mean(tab$reaction.time)
## [1] 2832
median(tab$reaction.time)
## [1] 1717
exp(m-(2*s))
## [1] 521.4
exp(m+(2*s))
## [1] 7550
mean(tab$reaction.time > exp(m + 2*s) |
tab$reaction.time < exp(m - 2*s))
## [1] 0.06359
tab$reaction.time[tab$reaction.time > exp(m + 2*s) |
tab$reaction.time < exp(m - 2*s)] <- NA
How many participants do we have?
length(unique(tab$subject.id))
## [1] 63
Aggregation.
mss <- ddply(tab, .(subject.id, trial.type, age.group), summarise,
RT=mean(reaction.time, na.rm=TRUE),
correct=sum(response=="Y") / sum(response=="Y" | response=="N"))
ms <- ddply(mss, .(trial.type), summarise,
rt=mean(RT,na.rm=TRUE),
rt.cih=ci.high(RT,na.rm=TRUE),
rt.cil=ci.low(RT,na.rm=TRUE),
acc=mean(correct),
acc.cih=ci.high(correct),
acc.cil=ci.low(correct))
First basic plots. Accuracy.
qplot(trial.type, acc,
fill=trial.type,
ymin=acc-acc.cil,
ymax=acc+acc.cih,
geom=c("bar","linerange"),
stat="identity",
data=ms)
RT.
qplot(trial.type, rt,
fill=trial.type,
ymin=rt-rt.cil,
ymax=rt+rt.cih,
geom=c("bar","linerange"),
stat="identity",
data=ms)
Accuracy:
msa <- ddply(mss, .(trial.type, age.group), summarise,
rt=mean(RT,na.rm=TRUE),
rt.cih=ci.high(RT),
rt.cil=ci.low(RT),
acc=mean(correct,na.rm=TRUE),
acc.cih=ci.high(correct),
acc.cil=ci.low(correct))
levels(msa$trial.type) <- c("Familiar Word","ME Control","ME Inference")
# pdf("~/Projects/tablet_norming/writeup/figures/accuracy.pdf",width=5,height=3)
qplot(age.group, acc,
fill=trial.type,
ymin=acc-acc.cil,
ymax=acc+acc.cih,
position=position_dodge(width=.9),
geom=c("bar","linerange"),
stat="identity",
data=msa) +
xlab("Age (Years)") +
ylab("Accuracy") +
scale_fill_discrete(name="Trial Type") +
geom_hline(yintercept=.5,lty=2)
# dev.off()
and RT:
# pdf("~/Projects/tablet_norming/writeup/figures/rt.pdf",width=5,height=3)
qplot(age.group, rt/1000,
fill=trial.type,
ymin=rt/1000-rt.cil/1000,
ymax=rt/1000+rt.cih/1000,
position=position_dodge(width=.9),
geom=c("bar","linerange"),
stat="identity",
data=msa) +
xlab("Age (Years)") +
ylab("Reaction Time (s)") +
scale_fill_discrete(name="Trial Type")
# dev.off()
Accuracy continuous:
mss <- ddply(subset(tab,trial.type!="MEexperimental"),
.(subject.id, age), summarise,
rt=mean(reaction.time, na.rm=TRUE),
correct=sum(response=="Y") / sum(response=="Y" | response=="N"))
qplot(age,correct,
data=mss) +
scale_y_continuous(breaks=seq(0,1,.25), limits=c(0,1.1)) +
geom_hline(yintercept=.5,lty=2) +
geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
Now with all trial types. (Consolidate rec and ME control).
tab$trial.type.simple <- tab$trial.type
levels(tab$trial.type.simple) <- c("Familiar","Familiar","ME")
mss <- ddply(tab,
.(subject.id, trial.type.simple, age), summarise,
rt=mean(reaction.time, na.rm=TRUE),
rt.cih=ci.high(reaction.time),
rt.cil=ci.low(reaction.time),
correct=sum(response=="Y") / sum(response=="Y" | response=="N"))
qplot(age,correct, col=trial.type.simple,
data=mss) +
scale_y_continuous(breaks=seq(0,1,.25), limits=c(0,1.1)) +
geom_hline(yintercept=.5,lty=2) +
geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
and RT
# pdf("~/Projects/tablet_norming/writeup/figures/individuals.pdf",width=5,height=3)
qplot(age, rt/1000, geom="pointrange",
ymin=rt/1000-rt.cil/1000,
ymax=rt/1000+rt.cih/1000,
data=subset(mss, trial.type.simple="Familiar")) +
ylim(c(0,7)) +
ylab("Reaction Time (s)") +
xlab("Age Group (Years)") +
geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
# dev.off()
RT
Accuracy:
tab$easy.item <- tab$word %in% c("dog","cat","cookie","bottle",
"cup","car","shoe","apple")
mss <- ddply(subset(tab,trial.type!="MEexperimental"),
.(subject.id, trial.type, easy.item, age.group), summarise,
RT=mean(reaction.time, na.rm=TRUE),
correct=sum(response=="Y") / sum(response=="Y" | response=="N"))
msa <- ddply(mss, .(age.group, easy.item), summarise,
rt=mean(RT,na.rm=TRUE),
rt.cih=ci.high(RT),
rt.cil=ci.low(RT),
acc=mean(correct,na.rm=TRUE),
acc.cih=ci.high(correct),
acc.cil=ci.low(correct))
qplot(age.group, acc,
fill=easy.item,
ymin=acc-acc.cil,
ymax=acc+acc.cih,
position=position_dodge(width=.9),
geom=c("bar","linerange"),
stat="identity",
data=msa) +
geom_hline(yintercept=.5,lty=2)
and RT:
qplot(age.group, rt,
fill=easy.item,
ymin=rt-rt.cil,
ymax=rt+rt.cih,
position=position_dodge(width=.9),
geom=c("bar","linerange"),
stat="identity",
data=msa)
mssi <- ddply(tab, .(subject.id, trial.type, word), summarise,
RT=mean(reaction.time, na.rm=TRUE),
correct=sum(response=="Y") / sum(response=="Y" | response=="N"))
msi <- ddply(mssi, .(trial.type, word), summarise,
rt=mean(RT,na.rm=TRUE),
rt.cih=ci.high(RT),
rt.cil=ci.low(RT),
acc=mean(correct),
acc.cih=ci.high(correct),
acc.cil=ci.low(correct))
msi$word <- reorder(msi$word, msi$acc)
qplot(word, acc,
ymin=acc-acc.cil,
ymax=acc+acc.cih,
geom="pointrange",
data=msi) +
facet_wrap(~trial.type,scales="free_x") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5))
and RT:
msi$word <- reorder(msi$word, msi$rt)
qplot(word, rt,
ymin=rt-rt.cil,
ymax=rt+rt.cih,
geom="pointrange",
data=msi) +
facet_wrap(~trial.type,scales="free_x") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5))
Nearly everyone sticks it out to the end!
tab$trial.number <- as.numeric(as.character(tab$trial.number))
mst <- ddply(tab, .(subject.id,age.group), summarise,
max.trial=max(trial.number))
qplot(max.trial, facets=~age.group, data=mst)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
and means across groups.
mstm <- ddply(mst, .(age.group), summarise,
trials = mean(max.trial),
trials.cih = ci.high(max.trial),
trials.cil = ci.low(max.trial))
qplot(age.group, trials,
fill=factor(age.group),
ymin=trials-trials.cil,
ymax=trials+trials.cih,
position=position_dodge(width=.9),
geom=c("bar","linerange"),
stat="identity",
data=mstm) +
geom_hline(yintercept=28, lty=2)
and RT means across trials:
mst <- ddply(tab, .(trial.number, age.group), summarise,
RT=mean(reaction.time, na.rm=TRUE))
qplot(trial.number, RT, data=mst) +
geom_smooth(method="lm")
library(xtable)
rt.mod <- lmer(reaction.time ~ trial.number + age * trial.type +
(trial.type | subject.id),
data=tab)
summary(rt.mod)
## Linear mixed model fit by REML ['lmerMod']
## Formula: reaction.time ~ trial.number + age * trial.type + (trial.type |
## subject.id)
## Data: tab
##
## REML criterion at convergence: 22152
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.912 -0.503 -0.206 0.217 5.842
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject.id (Intercept) 77778 279
## trial.typeMEcontrol 27008 164 0.93
## trial.typeMEexperimental 151591 389 0.62 0.86
## Residual 850125 922
## Number of obs: 1340, groups: subject.id, 63
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3211.30 215.32 14.91
## trial.number -6.93 3.13 -2.22
## age -383.21 61.19 -6.26
## trial.typeMEcontrol 261.28 245.95 1.06
## trial.typeMEexperimental 848.77 294.57 2.88
## age:trial.typeMEcontrol -69.84 70.76 -0.99
## age:trial.typeMEexperimental -82.37 85.06 -0.97
##
## Correlation of Fixed Effects:
## (Intr) trl.nm age trl.typMEc trl.typMEx ag:trl.typMEc
## trial.numbr -0.170
## age -0.943 -0.036
## trl.typMEcn -0.411 -0.003 0.396
## trl.typMExp -0.270 -0.010 0.259 0.556
## ag:trl.typMEc 0.391 0.003 -0.399 -0.964 -0.536
## ag:trl.typMEx 0.254 0.004 -0.256 -0.534 -0.963 0.554
xtable(summary(rt.mod)$coef)
## % latex table generated in R 3.0.2 by xtable 1.7-3 package
## % Wed Jul 30 21:55:23 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrr}
## \hline
## & Estimate & Std. Error & t value \\
## \hline
## (Intercept) & 3211.30 & 215.32 & 14.91 \\
## trial.number & -6.93 & 3.13 & -2.22 \\
## age & -383.21 & 61.19 & -6.26 \\
## trial.typeMEcontrol & 261.28 & 245.95 & 1.06 \\
## trial.typeMEexperimental & 848.77 & 294.57 & 2.88 \\
## age:trial.typeMEcontrol & -69.84 & 70.76 & -0.99 \\
## age:trial.typeMEexperimental & -82.37 & 85.06 & -0.97 \\
## \hline
## \end{tabular}
## \end{table}
tab$acc <- tab$response=="Y"
acc.mod <- lmer(acc ~ age.group * trial.type +
(trial.type | subject.id),
family="binomial",
data=tab)
## Warning: calling lmer with 'family' is deprecated; please use glmer() instead
## Warning: Model failed to converge with max|grad| = 1.42978 (tol = 0.001)
summary(acc.mod)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: acc ~ age.group * trial.type + (trial.type | subject.id)
## Data: tab
## Control:
## structure(list(optimizer = c("bobyqa", "Nelder_Mead"), calc.derivs = TRUE,
## use.last.params = FALSE, restart_edge = FALSE, boundary.tol = 1e-05,
## tolPwrss = 1e-07, compDev = TRUE, checkControl = structure(list(
## check.nobs.vs.rankZ = "warningSmall", check.nobs.vs.nlev = "stop",
## check.nlev.gtreq.5 = "ignore", check.nlev.gtr.1 = "stop",
## check.nobs.vs.nRE = "stop", check.rankX = "message+drop.cols",
## check.scaleX = "warning", check.formula.LHS = "stop"), .Names = c("check.nobs.vs.rankZ",
## "check.nobs.vs.nlev", "check.nlev.gtreq.5", "check.nlev.gtr.1",
## "check.nobs.vs.nRE", "check.rankX", "check.scaleX", "check.formula.LHS"
## )), checkConv = structure(list(check.conv.grad = structure(list(
## action = "warning", tol = 0.001, relTol = NULL), .Names = c("action",
## "tol", "relTol")), check.conv.singular = structure(list(action = "ignore",
## tol = 1e-04), .Names = c("action", "tol")), check.conv.hess = structure(list(
## action = "warning", tol = 1e-06), .Names = c("action",
## "tol"))), .Names = c("check.conv.grad", "check.conv.singular",
## "check.conv.hess")), optCtrl = list()), .Names = c("optimizer",
## "calc.derivs", "use.last.params", "restart_edge", "boundary.tol",
## "tolPwrss", "compDev", "checkControl", "checkConv", "optCtrl"
## ), class = c("glmerControl", "merControl"))
##
## AIC BIC logLik deviance df.resid
## 731.3 794.5 -353.7 707.3 1419
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.106 0.070 0.141 0.289 1.822
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject.id (Intercept) 2.740 1.655
## trial.typeMEcontrol 1.229 1.109 -0.86
## trial.typeMEexperimental 0.818 0.904 -0.99 0.90
## Number of obs: 1431, groups: subject.id, 63
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.000 0.850 1.18 0.23936
## age.group 1.201 0.360 3.34 0.00084
## trial.typeMEcontrol -1.434 0.868 -1.65 0.09868
## trial.typeMEexperimental -1.809 0.746 -2.42 0.01537
## age.group:trial.typeMEcontrol 0.378 0.415 0.91 0.36257
## age.group:trial.typeMEexperimental -0.197 0.338 -0.58 0.55969
##
## (Intercept)
## age.group ***
## trial.typeMEcontrol .
## trial.typeMEexperimental *
## age.group:trial.typeMEcontrol
## age.group:trial.typeMEexperimental
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ag.grp trl.typMEc trl.typMEx ag.grp:trl.typMEc
## age.group -0.792
## trl.typMEcn -0.749 0.599
## trl.typMExp -0.862 0.693 0.707
## ag.grp:trl.typMEc 0.514 -0.713 -0.780 -0.494
## ag.grp:trl.typMEx 0.629 -0.881 -0.523 -0.761 0.678
xtable(summary(acc.mod)$coef)
## % latex table generated in R 3.0.2 by xtable 1.7-3 package
## % Wed Jul 30 21:55:34 2014
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \hline
## & Estimate & Std. Error & z value & Pr($>$$|$z$|$) \\
## \hline
## (Intercept) & 1.00 & 0.85 & 1.18 & 0.24 \\
## age.group & 1.20 & 0.36 & 3.34 & 0.00 \\
## trial.typeMEcontrol & -1.43 & 0.87 & -1.65 & 0.10 \\
## trial.typeMEexperimental & -1.81 & 0.75 & -2.42 & 0.02 \\
## age.group:trial.typeMEcontrol & 0.38 & 0.42 & 0.91 & 0.36 \\
## age.group:trial.typeMEexperimental & -0.20 & 0.34 & -0.58 & 0.56 \\
## \hline
## \end{tabular}
## \end{table}
tab$trial.number <- as.numeric(as.character(tab$trial.number))
mst <- ddply(tab, .(subject.id,age.group), summarise,
max.trial=max(trial.number))
msta <- ddply(mst, .(age.group), summarise,
finished = mean(max.trial==28),
num.trials = mean(max.trial))
# xtable(msta)
Fairest analysis – split halves where 1 half is the odd-numbered Familiar trials and even numbered ME trials, and the other half the remaining trials Control trials.
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, desc, failwith, id, mutate, summarise, summarize
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bytrial <- tab %>%
select(age.group,subject.id,trial.type,trial.number,acc,reaction.time) %>%
group_by(age.group,trial.type,subject.id) %>%
filter(trial.type != "MEexperimental") %>%
arrange(trial.number) %>%
mutate(trial.order = 1:length(trial.number),
even.half = trial.order %% 2 == 0,
first.half = (even.half & trial.type == "rec" ) |
(!even.half & trial.type != "rec" ))
type.trials <- bytrial %>%
group_by(age.group,subject.id,trial.type,add=FALSE) %>%
summarise(acc = mean(acc),
rt = na.median(reaction.time))
type.trials <- reshape(type.trials, idvar = c("subject.id","age.group"),
timevar = "trial.type", direction="wide")
half.trials <- bytrial %>%
group_by(age.group,subject.id,first.half,add=FALSE) %>%
summarise(acc = mean(acc),
rt = na.median(reaction.time))
half.trials <- reshape(half.trials, idvar = c("subject.id","age.group"),
timevar = "first.half", direction="wide")
Statistics
reliability.half <- half.trials %>%
group_by(age.group) %>%
summarise(acc.corr = cor(acc.TRUE,acc.FALSE,use="complete.obs"),
acc.corr.t = cor.test(acc.TRUE,acc.FALSE,
use="complete.obs")$statistic,
acc.corr.p = cor.test(acc.TRUE,acc.FALSE,
use="complete.obs")$p.value,
rt.corr = cor(rt.TRUE,rt.FALSE,use="complete.obs"),
rt.corr.t = cor.test(rt.TRUE,rt.FALSE,
use="complete.obs")$statistic,
rt.corr.p = cor.test(rt.TRUE,rt.FALSE,
use="complete.obs")$p.value)
reliability.type <- type.trials %>%
group_by(age.group) %>%
summarise(acc.corr = cor(acc.rec,acc.MEcontrol,use="complete.obs"),
acc.corr.t = cor.test(acc.rec,acc.MEcontrol,
use="complete.obs")$statistic,
acc.corr.p = cor.test(acc.rec,acc.MEcontrol,
use="complete.obs")$p.value,
rt.corr = cor(rt.rec,rt.MEcontrol,use="complete.obs"),
rt.corr.t = cor.test(rt.rec,rt.MEcontrol,
use="complete.obs")$statistic,
rt.corr.p = cor.test(rt.rec,rt.MEcontrol,
use="complete.obs")$p.value)
print(reliability.half)
## Source: local data frame [4 x 7]
##
## age.group acc.corr acc.corr.t acc.corr.p rt.corr rt.corr.t rt.corr.p
## 1 1 0.39672 1.2965 0.22704 0.4905 1.6884 0.125594
## 2 2 0.39197 1.4759 0.16572 -0.2453 -0.8767 0.397861
## 3 3 -0.09685 -0.4352 0.66811 0.4199 2.0690 0.051717
## 4 4 0.42857 1.7748 0.09766 0.6825 3.4938 0.003579
print(reliability.type)
## Source: local data frame [4 x 7]
##
## age.group acc.corr acc.corr.t acc.corr.p rt.corr rt.corr.t rt.corr.p
## 1 1 0.4482 1.5041 0.16681 0.4973 1.621 0.143631
## 2 2 0.2984 1.0831 0.30006 0.4218 1.612 0.132990
## 3 3 -0.1029 -0.4625 0.64872 0.5867 3.240 0.004104
## 4 4 0.5375 2.3848 0.03178 0.7292 3.987 0.001350