Prelims and libraries.
Load in data.
Participant exclusions
Reject subjects who don’t understand scale (based on positive sentences):
reject <- all.data %>%
filter(sent.type=="positive") %>% #Only look at positive sentences
group_by(subid) %>%
mutate(total = n()) %>% #get total # of positive sentences child saw
group_by(subid, condition, truth, total, resp2) %>%
filter((truth=="True" & resp2==3) | (truth=="False" & resp2==1)) %>% #Get # "good" for true pos and "bad" for false pos
summarize(counts = n()) %>%
group_by(subid, condition, total) %>%
summarize(counts = sum(counts)) %>% #total # "correct" responses
mutate(prop = counts/total) %>% #proportion correct
filter(prop < .6) #reject kids who got < .6 "correct" (this allows for 2/6 "mistakes")
for (i in reject$subid) {
all.data <- filter(all.data, subid !=i)
}
Make sure there aren’t any kids who just used one side of scale. Reject kids who only chose a single data point
scaleUse <- aggregate(resp2 ~ subid, all.data, n.unique)
table(scaleUse$resp2) #Are any resp2=1
##
## 2 3
## 49 20
Categorize kids based on response type
tn_responses <- all.data %>%
filter(sent.type=="negative" & truth=="True") %>%
group_by(subid) %>%
mutate(total = n()) %>%
group_by(subid, condition, total, resp2) %>%
summarize(counts = n()) %>%
mutate(prop = counts/total)
category <- dcast(tn_responses, subid + condition ~ resp2)
## Using prop as value column: use value.var to override.
names(category) <- c("subid","condition","bad","neutral","good")
category[is.na(category)] <- 0
category$type <- "other"
#category[category$neutral > .6,]$type <- "tn_neutral"
category[category$bad > .6,]$type <- "tn_bad"
category[category$good > .6,]$type <- "tn_good"
cat_counts <- category %>%
group_by(condition, type) %>%
summarise(counts = n())
cat_counts$condition <- factor(cat_counts$condition, levels=c("none","target"), labels=c("None","Target"))
cat_counts$type <- factor(cat_counts$type, levels=c("tn_bad","tn_good","other"), labels=c("True Negatives = Bad", "True Negatives = Good", "Inconsistent/Other"))
qplot(data=cat_counts, x=condition, y=counts, fill=type,
stat="identity", position="dodge", geom="bar") +
scale_fill_hue("Response Type") +
ylab("Count") + xlab("Context Condition") +
plot.style

Main Analysis
First aross ages
ms <- all.data %>%
group_by(subid, condition, sent.type, truth) %>%
summarise(subm = mean(resp)) %>%
group_by(condition, sent.type, truth) %>%
summarise(m = mean(subm),
cih = ci.high(subm),
cil = ci.low(subm))
ms$condition <- factor(ms$condition, labels=c("None","Target"))
ms$truth <- factor(ms$truth, levels=c("True","False"))
qplot(data=subset(ms, sent.type=="negative"),
x=condition, y=m, facets=~truth,
stat="identity", position="dodge", geom="bar") +
geom_errorbar(aes(ymin=cil, ymax=cih),
position=position_dodge(.9), width=0) +
scale_fill_grey("") +
xlab("Context") + ylab("Response") +
scale_y_continuous(limits=c(0, 5), breaks=seq(1,5,1)) +
#coord_equal(1/1.5) +
plot.style

Histogram of responses
truenegs <- filter(ms, truth=="True" & sent.type == "negative")
#make df for histogram (for formatting reasons)
hist_data <- all.data %>%
filter(truth=="True" & sent.type=="negative") %>%
group_by(condition, resp) %>%
summarise(count = n())
hist_data$condition <- factor(hist_data$condition, labels=c("None","Target"))
qplot(data=hist_data, y=count, x=resp,
fill = condition, width=.5,
geom="bar", position = position_dodge(.6), stat="identity") +
geom_point(data=truenegs, aes(x=m, y=c(41, 42), color=condition)) +
geom_segment(data=truenegs, aes(x=cil, xend=cih, y=c(41, 42), yend=c(41, 42), color=condition)) +
scale_fill_grey("Condition") + scale_color_grey("Condition") +
xlab("Response") + ylab("Count") +
#ggtitle("True Negatives, 3-5-year-olds (N=43)") +
plot.style

Break down by age.
ms <- all.data %>%
group_by(subid, agegroup, condition, sent.type, truth) %>%
summarise(subm = mean(resp)) %>%
group_by(condition, agegroup, sent.type, truth) %>%
summarise(m = mean(subm),
cih = ci.high(subm),
cil = ci.low(subm))
ms$condition <- factor(ms$condition, labels=c("None","Target"))
ms$truth <- factor(ms$truth, levels=c("True","False"))
qplot(data=subset(ms, sent.type=="negative"),
x=condition, y=m, facets=agegroup~truth,
stat="identity", position="dodge", geom="bar") +
geom_errorbar(aes(ymin=cil, ymax=cih),
position=position_dodge(.9), width=0) +
scale_fill_grey("") +
xlab("Context") + ylab("Response") +
scale_y_continuous(limits=c(0, 5), breaks=seq(1,5,1)) +
#coord_equal(1/1.5) +
plot.style

Histogram of responses
trueneg_3s <- filter(ms, truth=="True" & sent.type == "negative" & agegroup == "3")
trueneg_4s <- filter(ms, truth=="True" & sent.type == "negative" & agegroup == "4")
hist_data_3s <- all.data %>%
filter(truth=="True" & sent.type=="negative" & agegroup=="3") %>%
group_by(condition, resp) %>%
summarise(count = n())
hist_data_3s$condition <- factor(hist_data_3s$condition, labels=c("None","Target"))
#quartz()
qplot(data=hist_data_3s, y=count, x=resp,
fill = condition, width=.5,
geom="bar", position = position_dodge(.6), stat="identity") +
geom_point(data=trueneg_3s, aes(x=m, y=c(40, 41), color=condition)) +
geom_segment(data=trueneg_3s, aes(x=cil, xend=cih, y=c(40, 41), yend=c(40, 41), color=condition)) +
scale_fill_grey("Condition") + scale_color_grey("Condition") +
xlab("Response") + ylab("Count") +
ylim(c(0, 80)) +
#ggtitle("True Negatives, 3-year-olds (N=35)") +
plot.style

hist_data_4s <- all.data %>%
filter(truth=="True" & sent.type=="negative" & agegroup=="4") %>%
group_by(condition, resp) %>%
summarise(count = n())
hist_data_4s$condition <- factor(hist_data_4s$condition, labels=c("None","Target"))
#quartz()
qplot(data=hist_data_4s, y=count, x=resp,
fill = condition, width=.5,
geom="bar", position = position_dodge(.6), stat="identity") +
geom_point(data=trueneg_4s, aes(x=m, y=c(40, 41), color=condition)) +
geom_segment(data=trueneg_4s, aes(x=cil, xend=cih, y=c(40, 41), yend=c(40, 41), color=condition)) +
scale_fill_grey("Condition") + scale_color_grey("Condition") +
xlab("Response") + ylab("Count") +
ylim(c(0, 80)) +
#ggtitle("True Negatives, 4-year-olds (N=34)") +
plot.style

Playing around
ms <- all.data %>%
group_by(sent.type, truth, condition, agegroup, subid) %>%
summarise(resp = mean(resp)) %>%
group_by(sent.type, truth, condition, agegroup) %>%
summarise(cih = ci.high(resp),
cil = ci.low(resp),
m = mean(resp))
ggplot(ms, aes(x = sent.type:truth, y = m, fill = condition)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = cil, ymax = cih),
position = position_dodge(width = .9)) +
facet_grid(.~agegroup)

Statistics
Basic continuous models.
summary(lmer(resp ~ condition * agegroup * truth + (1|subid)
+ (1|item),
data = filter(all.data, sent.type == "negative")))
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp ~ condition * agegroup * truth + (1 | subid) + (1 | item)
## Data: filter(all.data, sent.type == "negative")
##
## REML criterion at convergence: 2275
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.456 -0.526 -0.060 0.639 3.405
##
## Random effects:
## Groups Name Variance Std.Dev.
## subid (Intercept) 1.16 1.08
## item (Intercept) 0.00 0.00
## Residual 1.52 1.23
## Number of obs: 655, groups: subid, 69; item, 16
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.563 1.657 2.15
## conditiontarget -0.256 2.402 -0.11
## agegroup -0.457 0.474 -0.96
## truthTrue 1.216 1.183 1.03
## conditiontarget:agegroup 0.150 0.679 0.22
## conditiontarget:truthTrue -1.004 1.722 -0.58
## agegroup:truthTrue -0.125 0.338 -0.37
## conditiontarget:agegroup:truthTrue 0.434 0.486 0.90
##
## Correlation of Fixed Effects:
## (Intr) cndtnt agegrp trthTr cndtn: cndt:T aggr:T
## conditntrgt -0.690
## agegroup -0.990 0.683
## truthTrue -0.570 0.393 0.563
## cndtntrgt:g 0.691 -0.990 -0.698 -0.393
## cndtntrgt:T 0.391 -0.577 -0.387 -0.687 0.569
## aggrp:trthT 0.564 -0.389 -0.569 -0.990 0.398 0.680
## cndtntrg::T -0.393 0.571 0.396 0.689 -0.575 -0.990 -0.696
summary(lmer(resp ~ condition * agegroup +
(1|subid) + (1|item),
data = filter(all.data, sent.type == "negative" & truth == "True")))
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp ~ condition * agegroup + (1 | subid) + (1 | item)
## Data: filter(all.data, sent.type == "negative" & truth == "True")
##
## REML criterion at convergence: 1642
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.572 -0.152 0.087 0.453 3.049
##
## Random effects:
## Groups Name Variance Std.Dev.
## subid (Intercept) 1.90 1.379
## item (Intercept) 0.00 0.000
## Residual 0.93 0.965
## Number of obs: 525, groups: subid, 69; item, 16
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.750 1.688 2.81
## conditiontarget -1.260 2.429 -0.52
## agegroup -0.575 0.483 -1.19
## conditiontarget:agegroup 0.588 0.688 0.86
##
## Correlation of Fixed Effects:
## (Intr) cndtnt agegrp
## conditntrgt -0.695
## agegroup -0.990 0.688
## cndtntrgt:g 0.695 -0.990 -0.702
Some simpler models, including within-agegroup.
model <- lmer(resp ~ condition*agegroup +
(1 | subid) +
(1 | item),
data=subset(all.data, sent.type == "negative" & truth == "True"))
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: resp ~ condition * agegroup + (1 | subid) + (1 | item)
## Data: subset(all.data, sent.type == "negative" & truth == "True")
##
## REML criterion at convergence: 1642
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.572 -0.152 0.087 0.453 3.049
##
## Random effects:
## Groups Name Variance Std.Dev.
## subid (Intercept) 1.90 1.379
## item (Intercept) 0.00 0.000
## Residual 0.93 0.965
## Number of obs: 525, groups: subid, 69; item, 16
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.750 1.688 2.81
## conditiontarget -1.260 2.429 -0.52
## agegroup -0.575 0.483 -1.19
## conditiontarget:agegroup 0.588 0.688 0.86
##
## Correlation of Fixed Effects:
## (Intr) cndtnt agegrp
## conditntrgt -0.695
## agegroup -0.990 0.688
## cndtntrgt:g 0.695 -0.990 -0.702
threes <- filter(all.data, agegroup == "3")
threes_subs <- aggregate(resp ~ subid + condition, threes, mean)
t.test(resp ~ condition, threes_subs)
##
## Welch Two Sample t-test
##
## data: resp by condition
## t = -0.9181, df = 32.36, p-value = 0.3654
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7716 0.2920
## sample estimates:
## mean in group none mean in group target
## 3.186 3.426
fours <- filter(all.data, agegroup == "4")
fours_subs <- aggregate(resp ~ subid + condition, fours, mean)
t.test(resp ~ condition, fours_subs)
##
## Welch Two Sample t-test
##
## data: resp by condition
## t = -2.069, df = 30.75, p-value = 0.04701
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.12949 -0.00801
## sample estimates:
## mean in group none mean in group target
## 2.806 3.375
subs <- aggregate(resp ~ subid + condition, all.data, mean)
t.test(resp ~ condition, subs)
##
## Welch Two Sample t-test
##
## data: resp by condition
## t = -2.048, df = 66.8, p-value = 0.04445
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.763101 -0.009868
## sample estimates:
## mean in group none mean in group target
## 3.013 3.399
Discrete models
all.data$bin.resp <- all.data$resp > 3
ms <- all.data %>%
group_by(sent.type, truth, condition, agegroup, subid) %>%
summarise(resp = mean(bin.resp)) %>%
group_by(sent.type, truth, condition, agegroup) %>%
summarise(cih = ci.high(resp),
cil = ci.low(resp),
m = mean(resp))
ggplot(ms, aes(x = sent.type:truth, y = m, fill = condition)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = cil, ymax = cih),
position = position_dodge(width = .9)) +
facet_grid(.~agegroup)

summary(glmer(bin.resp ~ (condition + agegroup + truth)^3 + (1|subid)
+ (1|item), family = "binomial",
data = filter(all.data, sent.type == "negative")))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: bin.resp ~ (condition + agegroup + truth)^3 + (1 | subid) + (1 |
## item)
## Data: filter(all.data, sent.type == "negative")
##
## AIC BIC logLik deviance df.resid
## 667.9 712.7 -323.9 647.9 645
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.250 -0.452 -0.197 0.462 6.489
##
## Random effects:
## Groups Name Variance Std.Dev.
## subid (Intercept) 4.17e+00 2.04090
## item (Intercept) 6.31e-06 0.00251
## Number of obs: 655, groups: subid, 69; item, 16
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.060 4.195 1.21 0.228
## conditiontarget -4.148 5.487 -0.76 0.450
## agegroup -2.381 1.256 -1.90 0.058 .
## truthTrue -0.505 3.497 -0.14 0.885
## conditiontarget:agegroup 1.642 1.600 1.03 0.305
## conditiontarget:truthTrue 0.776 4.459 0.17 0.862
## agegroup:truthTrue 0.748 1.055 0.71 0.478
## conditiontarget:agegroup:truthTrue -0.226 1.313 -0.17 0.863
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtnt agegrp trthTr cndtn: cndt:T aggr:T
## conditntrgt -0.764
## agegroup -0.990 0.756
## truthTrue -0.737 0.563 0.740
## cndtntrgt:g 0.775 -0.990 -0.782 -0.581
## cndtntrgt:T 0.578 -0.692 -0.581 -0.784 0.696
## aggrp:trthT 0.736 -0.563 -0.754 -0.991 0.591 0.777
## cndtntrg::T -0.591 0.690 0.604 0.796 -0.707 -0.990 -0.803
Subjectwise mean distribution
ms <- all.data %>%
group_by(sent.type, truth, condition, agegroup, subid) %>%
summarise(m = mean(resp))
qplot(round(m),
fill = condition,
facets = ~ agegroup,
position = "dodge",
binwidth = .5,
data = filter(ms, truth == "True" &
sent.type == "negative"))

Beta regression