Various packages.
rm(list=ls())
library(ggplot2)
library(tidyr)
suppressPackageStartupMessages(library(dplyr))
library(bootstrap)
suppressPackageStartupMessages(library(lme4))
library(knitr)
theme_set(theme_bw())
opts_chunk$set(fig.width=8, fig.height=5,
echo=TRUE, warning=FALSE, message=FALSE, cache=TRUE)
Helper functions.
theta <- function(x,xdata,na.rm=T) {mean(xdata[x],na.rm=na.rm)}
ci.low <- function(x,na.rm=T) {
mean(x,na.rm=na.rm) - quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.025,na.rm=na.rm)}
ci.high <- function(x,na.rm=T) {
quantile(bootstrap(1:length(x),1000,theta,x,na.rm=na.rm)$thetastar,.975,na.rm=na.rm) - mean(x,na.rm=na.rm)}
First read in the data, as distributed by the journal.
d <- read.csv("d.csv")
Rename variables.
d <- d %>%
rename(participant = pt,
trial_type = X) %>%
select(-count)
ms <- d %>%
group_by(age, cond, participant) %>%
summarise(correct = mean(correct)) %>%
summarise(mean = mean(correct),
n = n(),
ci_lower = ci.low(correct),
ci_upper = ci.high(correct))
ggplot(ms, aes(x = cond, y = mean, col = age)) +
geom_pointrange(aes(ymin = mean - ci_lower, ymax = mean + ci_upper),
position = position_dodge(width = .1)) +
geom_line(aes(group=age)) +
ylim(c(0,1))
What’s the story with trial_type
?
ms <- d %>%
group_by(age, cond, trial_type, participant) %>%
summarise(correct = mean(correct)) %>%
summarise(mean = mean(correct),
n = n(),
ci_lower = ci.low(correct),
ci_upper = ci.high(correct))
ggplot(ms, aes(x = cond, y = mean, col = age)) +
geom_pointrange(aes(ymin = mean - ci_lower, ymax = mean + ci_upper),
position = position_dodge(width = .1)) +
geom_line(aes(group=age)) +
facet_wrap(~trial_type) +
ylim(c(0,1))
Not sure, it seems like this is a variable that migth be accounting for some variability across conditions, but I don’t know enough about the design to tell.
How about old_new
? I guess these are generalization trials?
ms <- d %>%
group_by(age, cond, old_new, participant) %>%
summarise(correct = mean(correct)) %>%
summarise(mean = mean(correct),
n = n(),
ci_lower = ci.low(correct),
ci_upper = ci.high(correct))
ggplot(ms, aes(x = cond, y = mean, col = age)) +
geom_pointrange(aes(ymin = mean - ci_lower, ymax = mean + ci_upper),
position = position_dodge(width = .1)) +
geom_line(aes(group=age)) +
facet_wrap(~old_new) +
ylim(c(0,1))
Looks like this makes a big difference in the data.
Basic GLMER.
ds <- filter(d, cond %in% c("nosklex","mixed"))
kable(summary(glmer(correct ~ cond * age + (1|participant),
data = ds,
family = "binomial"))$coefficients,
digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 1.084 | 0.275 | 3.941 | 0.000 |
condnosklex | 0.724 | 0.393 | 1.844 | 0.065 |
agechild | -0.424 | 0.351 | -1.210 | 0.226 |
condnosklex:agechild | -0.290 | 0.493 | -0.589 | 0.556 |
Note that there is a trend towards significance for condition. Let’s add old_new
.
kable(summary(glmer(correct ~ cond * age * old_new +
(old_new | participant),
data = ds,
family = "binomial"))$coefficients,
digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 2.413 | 0.542 | 4.451 | 0.000 |
condnosklex | 0.731 | 0.777 | 0.941 | 0.347 |
agechild | -1.988 | 0.644 | -3.085 | 0.002 |
old_newold | -1.895 | 0.598 | -3.168 | 0.002 |
condnosklex:agechild | -0.360 | 0.923 | -0.390 | 0.697 |
condnosklex:old_newold | 0.423 | 0.856 | 0.494 | 0.621 |
agechild:old_newold | 2.509 | 0.728 | 3.446 | 0.001 |
condnosklex:agechild:old_newold | -0.402 | 1.032 | -0.389 | 0.697 |
Convergence errors. Let’s cut the random effect of old_new
.
kable(summary(glmer(correct ~ cond * age * old_new +
(1 | participant),
data = ds,
family = "binomial"))$coefficients,
digits = 3)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 1.863 | 0.354 | 5.260 | 0.000 |
condnosklex | 0.680 | 0.532 | 1.280 | 0.201 |
agechild | -1.449 | 0.434 | -3.340 | 0.001 |
old_newold | -1.356 | 0.328 | -4.138 | 0.000 |
condnosklex:agechild | -0.459 | 0.637 | -0.720 | 0.472 |
condnosklex:old_newold | 0.366 | 0.477 | 0.767 | 0.443 |
agechild:old_newold | 1.860 | 0.405 | 4.595 | 0.000 |
condnosklex:agechild:old_newold | -0.136 | 0.572 | -0.237 | 0.813 |
In this model we’re not seeing much of a condition effect. It looks like for old
trials, the adults are better in nosklex
but that’s the only noticeable effect, and that interaction doesn’t come out significant.
So we could in principle turn to a Bayesian approach to try and assess the evidence for \(H_0\) (no difference between conditions).
This code adapted from Liz’s code.
library(rjags)
Clean up data.
ms <- d %>%
filter(cond %in% c("mixed","nosklex"),
age == "adult",
old_new == "old") %>%
group_by(cond, participant) %>%
summarise(z = sum(correct),
N = n())
Specify the data in a form that is compatible with RBugs model, as a list.
dataList = list(
nCond = length(unique(ms$cond)),
nSubj = length(unique(ms$participant)),
CondOfSubj = factor(ms$cond),
nTrlOfSubj = ms$N,
nCorrOfSubj = ms$z
)
modelstring = "
model {
for (i in 1:nSubj) {
# Likelihood:
nCorrOfSubj[i] ~ dbin(theta[i], nTrlOfSubj[i])
# Prior on theta (notice nested indexing):
theta[i] ~ dbeta(aBeta[CondOfSubj[i]], bBeta[CondOfSubj[i]])T(0.0001,0.9999)
}
# Re-parameterization of aBeta[j],bBeta[j] in terms of mu and kappa:
for (j in 1:nCond) {
# Model 1: Distinct mu[j] each group. Model 2: Shared mu0 all groups.
aBeta[j] <- ( mu[j]*equals(mdlIdx,1) + mu0*equals(mdlIdx,2) ) *kappa[j]
bBeta[j] <- (1-( mu[j]*equals(mdlIdx,1) + mu0*equals(mdlIdx,2) ))*kappa[j]
}
# Hyperpriors for mu and kappa:
for (j in 1:nCond) {
mu[j] ~ dbeta( a[j,mdlIdx] , b[j,mdlIdx] ) ### DIFFERENT
}
for (j in 1:nCond) {
kappa[j] ~ dgamma( shk , rak ) ### DIFFERENT
}
mu0 ~ dbeta(a0[mdlIdx], b0[mdlIdx])
# Constants for hyperprior:
##shk <- 1.0
##rak <- 0.1
shk <- pow(meanGamma,2)/pow(sdGamma,2)
rak <- meanGamma/pow(sdGamma,2)
meanGamma ~ dunif(0.01, 30 ) ## added
sdGamma ~ dunif(0.01, 30 ) ## added
aP <- 1
bP <- 1
##PSEUDOS ## these just set out starting places for the chains
## (shouldn't be biasing); I based them on the average of
## child and adult means for each condition and overall
a0[1] <- .71*200 # pseudo-71
b0[1] <- (1-.71)*200 # pseudo
a0[2] <- aP # true
b0[2] <- bP # true
a[1,1] <- aP # true
a[2,1] <- aP # true
b[1,1] <- bP # true
b[2,1] <- bP # true
a[1,2] <- .65*100 # pseudo
a[2,2] <- .72*100 # pseudo
b[1,2] <- (1-.65)*100 # pseudo
b[2,2] <- (1-.72)*100 # pseudo
# Hyperprior on model index:
mdlIdx ~ dcat(modelProb[])
modelProb[1] <- .5
modelProb[2] <- .5
}" # close quote for modelstring
writeLines(modelstring,con="model.txt") # Write model to a file.
Parameters. Note, no reason here to thin. These should be easy models to fit, I think the number of samples earlier per chain (5000 + 5000 + 15000) is probably overkill.
Also, I don’t think you want to monitor the thetas and the gamma hyperpriors. It just makes life difficult. It gets hard to print everything if you keep track of each subject’s parameter estimates.
parameters = c("mu","kappa","mu0","mdlIdx")
adaptSteps = 1000 # Number of steps to "tune" the samplers.
burnInSteps = 2000 # Number of steps to "burn-in" the samplers.
nChains = 3 # Number of chains to run.
thinSteps = 1
nPerChain = 2000 # Steps per chain.
Create, initialize, and adapt the model.
jagsModel <- jags.model("model.txt" , data=dataList, # inits=initsList ,
n.chains=nChains , n.adapt=adaptSteps)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 30
## Unobserved stochastic nodes: 38
## Total graph size: 269
##
## Initializing model
Burn-in.
update(jagsModel, n.iter=burnInSteps)
The saved MCMC chain.
codaSamples <- coda.samples(jagsModel, variable.names=parameters ,
n.iter=nPerChain, thin=thinSteps)
Diagnostics. These look pretty good. The gelman plots earlier indicated that a burn-in of 2000 samples might not be bad.
summary(codaSamples)
##
## Iterations = 3001:5000
## Thinning interval = 1
## Number of chains = 3
## Sample size per chain = 2000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## kappa[1] 33.1051 26.02869 0.3360290 1.310655
## kappa[2] 2.2640 1.30497 0.0168471 0.045868
## mdlIdx 1.2987 0.45771 0.0059090 0.023398
## mu[1] 0.6207 0.05389 0.0006957 0.002268
## mu[2] 0.7752 0.07150 0.0009230 0.002418
## mu0 0.6945 0.04280 0.0005526 0.001622
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## kappa[1] 5.4680 14.9718 25.0335 43.3177 103.0965
## kappa[2] 0.6384 1.3515 1.9679 2.8367 5.5582
## mdlIdx 1.0000 1.0000 1.0000 2.0000 2.0000
## mu[1] 0.5144 0.5845 0.6210 0.6591 0.7199
## mu[2] 0.6419 0.7248 0.7725 0.8235 0.9215
## mu0 0.6008 0.6683 0.6983 0.7252 0.7667
plot(codaSamples)
gelman.plot(codaSamples)
mdlIdx_adults <- mean(as.matrix(codaSamples)[,"mdlIdx"]==1)
The mixture weight on model 1 (different) is 0.701 for adults.
Do the same for kids.
ms <- d %>%
filter(cond %in% c("mixed","nosklex"),
age == "child",
old_new == "old") %>%
group_by(cond, participant) %>%
summarise(z = sum(correct),
N = n())
dataList = list(
nCond = length(unique(ms$cond)),
nSubj = length(unique(ms$participant)),
CondOfSubj = factor(ms$cond),
nTrlOfSubj = ms$N,
nCorrOfSubj = ms$z
)
jagsModel <- jags.model("model.txt" , data=dataList, # inits=initsList ,
n.chains=nChains , n.adapt=adaptSteps)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 49
## Unobserved stochastic nodes: 57
## Total graph size: 383
##
## Initializing model
update(jagsModel, n.iter=burnInSteps)
codaSamples <- coda.samples(jagsModel, variable.names=parameters ,
n.iter=nPerChain, thin=thinSteps)
mdlIdx_kids <- mean(as.matrix(codaSamples)[,"mdlIdx"]==1)
The mixture weight on model 1 (different) is 0.205 for kids.
So there is some tendency for the adults to show more support for some condition difference (m1) and the kids to show less support for a condition difference (m1). But neither one has 95%+ of its mass on one model or the other. So the result is not partiuclarly decisive.
A limitation of this analysis is that it only considers data in the “old” condition right now - you’d have to make the model even more complex to incorporate more condition differences.
Overall, I don’t see the Bayesian analysis adding much here over the frequentist mixed-effects model. There is some small numerical difference between conditions (more for the adults than the kids). But the biggest trend is that adults do much better in the ME condition.