This document was created from an R Markdown file. The R Markdown file can be found here. All analyses and plots can be reproduced from the raw data with the code in this file. Experiments conducted online can be viewed here.
A note on exclusions: There are several possible criteria for categorizing a participant as a subordinate or basic-level generalizer. X&T find that participants respond consistently across questions as either a subordinate or a basic generalizer for each of the two trials. In contrast, we observed varied responses across questions by participants in our sample. Thus, we adopt two criteria in analyzing our data: a “basic strict” criteria where we only include participants if they responded consistently (“yes” or “no” to all basic-level questions and “yes” or “no” to all subordinate level questions, “basic strict” = T), and a “liberal” criteria where we counted a particpant as a basic generalizer if they responded “yes” to any of the basic level questions (“basic strict” = F). The results of the liberal criteria are reported in the Main Text, and the results of the strict criteria are reported in Appendix A. In a set of post-hoc analyses, we also analyzed the data by varying the exclusion criteria for consistency in the subordinate category (“subordinate strict”).
## [1] "Basic strict criteria: FALSE"
## [1] "Subordinate strict criteria: TRUE"
Get data - adults.
N_adults = 14
N_per_condition = N_adults/2 # 2 conditions (teacher + learner)
# Note: The proportions described in the original report were obtained by aggregating across *trials* not participants (2 trials/participant). We correct for this here by taking the proportion of participants, which results in slightly different proportions than those reported in the original paper.
# make raw data
prop_sub_teacher = .928
n_sub_teacher = round(prop_sub_teacher * N_per_condition)
n_basic_teacher = N_per_condition - n_sub_teacher
teacher_xt_data = as.factor(c(rep("sub", n_sub_teacher),
rep("basic", n_basic_teacher)))
prop_sub_learner = .357
n_sub_learner = round(prop_sub_learner * N_per_condition)
n_basic_learner = N_per_condition - n_sub_learner
learner_xt_data = as.factor(c(rep("sub", n_sub_learner),
rep("basic", n_basic_learner)))
xt_data.adults = data.frame(response = c(as.character(teacher_xt_data),
as.character(learner_xt_data)))
xt_data.adults$Answer.sample[1:N_per_condition] = "teacher"
xt_data.adults$Answer.sample[(N_per_condition + 1):
(N_per_condition * 2)] = "learner"
Get props.
# get props
xt.a.props = get_bootstrapped_props(xt_data.adults, "response",
"X&T (2007b) adults")
Get data - children.
N_child = 24
N_per_condition = N_child /2 # 2 conditions
# make raw data
prop_sub_teacher = .71
n_sub_teacher = round(prop_sub_teacher * N_per_condition)
n_basic_teacher = N_per_condition - n_sub_teacher
teacher_xt_data = as.factor(c(rep("sub", n_sub_teacher),
rep("basic", n_basic_teacher)))
prop_sub_learner = .29
n_sub_learner = round(prop_sub_learner * N_per_condition)
n_basic_learner = N_per_condition - n_sub_learner
learner_xt_data = as.factor(c(rep("sub", n_sub_learner),
rep("basic", n_basic_learner)))
xt_data.children = data.frame(response = c(as.character(teacher_xt_data),
as.character(learner_xt_data)))
xt_data.children$Answer.sample[1:N_per_condition] = "teacher"
xt_data.children$Answer.sample[(N_per_condition + 1):
(N_per_condition * 2)] = "learner"
Get props.
xt.c.props = get_bootstrapped_props(xt_data.children,
"response", "X&T (2007b) children")
N = 14 adults and 24 children in this study.
xt = rbind(xt.c.props, xt.a.props)
# re-order levels so consistent with other exps
xt$Answer.sample <- ordered(xt$Answer.sample,
levels = c("teacher", "learner"))
ggplot(xt, aes(x = response.cat, y = prop, fill = Answer.sample)) +
facet_grid(. ~ exp) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_linerange(aes(ymin = cill, ymax = ciul), position = position_dodge(.9)) +
ylim(0,1) +
ylab("Prop. participants") +
xlab("Generalization pattern") +
scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1") +
ggtitle("Original Xu and Tenenbaum Data") +
themeML
Stats: Pearson’s Chi-squared test with Yates’ continuity correction
Adults.
xta_tab = table(xt_data.adults$Answer.sample, xt_data.adults$response)
kable(tidy(chisq.test(xta_tab)))
| statistic | p.value | parameter |
|---|---|---|
| 2.625 | 0.1051925 | 1 |
Children.
xtc_tab = table(xt_data.children$Answer.sample, xt_data.children$response)
kable(tidy(chisq.test(xtc_tab)))
| statistic | p.value | parameter |
|---|---|---|
| 4.166667 | 0.0412268 | 1 |
Read in data and pre-process.
t1 = read.csv("../data/anonymized/turk_replication_1_A.csv")
# make factors
t1$Answer.sample <- factor(t1$Answer.sample, labels=c('teacher','learner')) # sample0 = teacher, sample1 = learner
t1$Answer.label <- factor(t1$Answer.label, labels=c('nolabel','label')) # 0 = nolabel, 1 = label
t1 <- colwise(as.factor)(t1)
Distribution of participants in preliminary exclusion criteria.
# get number of exclusions by category
t1.exclusion.ns = t1 %>%
summarise(n_noLabelParticipants = length(which(Answer.label != 'label')),
n_repeatWorkers = length(which(duplicated(workerids))),
n_badTraining = length(which(Answer.click1 == "\"false\"" |
Answer.click2 == "\"false\"")),
n_badGeneralize = length(which(Answer.Qwcheck != 0)),
n_badFilter = length(which(Answer.question1 == 'FALSE')))
kable(gather(t1.exclusion.ns,"Exclusion category", "n"))
| Exclusion category | n |
|---|---|
| n_noLabelParticipants | 48 |
| n_repeatWorkers | 8 |
| n_badTraining | 21 |
| n_badGeneralize | 8 |
| n_badFilter | 0 |
total_n = dim(t1)[1] - t1.exclusion.ns[1,"n_noLabelParticipants"] - t1.exclusion.ns[1,"n_repeatWorkers"]
# subset data
t1.f = t1 %>%
filter(Answer.label == 'label') %>% #remove nolabel participants (run 4)
filter(!duplicated(workerids)) %>% #participants who completed multiple runs
filter(Answer.click1 == "\"correct\"" &
Answer.click2 == "\"correct\"") %>% #correct training items (learning only)
filter(Answer.Qwcheck == 0) %>% #check generalization question
filter(Answer.question1 == 'TRUE') #filter question
Some participants were in another, not analyzed condition (“noLabelParticipants”). The total number of participants before any relevant exclusions is N = 294. After preliminary exclusions, the total number of participants is N = 274.
Categorize response patterns based on response-pattern exclusion criterion.
if (sub_strict){
sub <- t1.f$Answer.Qwsmm1 == 0 & t1.f$Answer.Qwsmm2 == 0 &
t1.f$Answer.Qwsm1 == 1 & t1.f$Answer.Qwsm2 == 1
} else {
sub <- t1.f$Answer.Qwsmm1 == 0 & t1.f$Answer.Qwsmm2 == 0 &
(t1.f$Answer.Qwsm1 == 1 | t1.f$Answer.Qwsm2 == 1)
}
if (basic_strict){
basic <- (t1.f$Answer.Qwsmm1 == 1 & t1.f$Answer.Qwsmm2 == 1) &
t1.f$Answer.Qwsm1 == 1 & t1.f$Answer.Qwsm2 == 1
} else {
basic <- (t1.f$Answer.Qwsmm1 == 1 | t1.f$Answer.Qwsmm2 == 1) &
t1.f$Answer.Qwsm1 == 1 & t1.f$Answer.Qwsm2 == 1
}
t1.f$w.ans.cat <- "other"
t1.f$w.ans.cat[sub] <- "sub"
t1.f$w.ans.cat[basic] <- "basic"
t1.f$w.ans.cat = factor(t1.f$w.ans.cat, levels = c("sub", "basic", "other"))
# get other responeses
others.t1 = filter(t1.f, w.ans.cat == "other")
# filter out other responses
t1.f = t1.f[t1.f$w.ans.cat != "other",]
t1.f$w.ans.cat = droplevels(t1.f$w.ans.cat)
The number of particpants whose generalization strategy could not be catgorized was 79. The final sample was N = 195.
Distribution of final sample of participants across conditions.
final.cond.counts.t1 = t1.f %>%
rename(Condition = Answer.sample) %>%
group_by(Condition) %>%
summarise(n = n())
kable(final.cond.counts.t1)
| Condition | n |
|---|---|
| teacher | 94 |
| learner | 101 |
Get props.
t1.f.props = get_bootstrapped_props(t1.f, "w.ans.cat",
"Turk #1")
t1.f.props %>%
ggplot(aes(x = response.cat, y = prop, fill = Answer.sample)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_linerange(aes(ymin = cill, ymax = ciul), position = position_dodge(.9)) +
ylim(0,1) +
ylab("Prop. participants") +
xlab("Generalization pattern") +
ggtitle("Turk Replication #1") +
theme(legend.position=c(.85,.8)) +
themeML +
scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1")
Stats: Pearson’s Chi-squared test with Yates’ continuity correction
t1_tab = table(t1.f$w.ans.cat, t1.f$Answer.sample)[c("sub", "basic"),]
kable(tidy(chisq.test(t1_tab)))
| statistic | p.value | parameter |
|---|---|---|
| 0.6245227 | 0.4293716 | 1 |
Read in data and pre-process.
t2 <- read.csv("../data/anonymized/turk_replication_2_A.csv", header = T)
t2 <- t2 [,c(-1:-19,-21:-32,-36,-41:-43,-54:-55,-56)] # remove unnecessary columns
# make factors
t2$Answer.sample <- factor(t2$Answer.sample,
labels = c('teacher','learner')) # sample0 = teacher, sample1 = learner
t2 <- colwise(as.factor)(t2)
The total before any exclusions is N = 150.
Distribution of participants in preliminary exclusion criteria.
# get number of exclusions by category
t2.exclusion.ns = t2 %>%
summarise(n_badTraining = length(which(Answer.click1 != "\"correct\"" |
Answer.click2 != "\"correct\"")),
n_badGeneralize = length(which(Answer.Qcheck1 != 0 | Answer.Qcheck2 != 0)),
n_badFilter = length(which(Answer.question1 != 'true' |
Answer.question2 != 'true' |
Answer.question3 != 'true' |
Answer.question4 !='true')))
kable(gather(t2.exclusion.ns,"Exclusion category", "n"))
| Exclusion category | n |
|---|---|
| n_badTraining | 22 |
| n_badGeneralize | 15 |
| n_badFilter | 9 |
# subset data
t2.f = t2 %>%
filter(Answer.click1 == "\"correct\"" &
Answer.click2 == "\"correct\"") %>% # take out those who click on wrong training items
filter(Answer.Qcheck1 == 0 & Answer.Qcheck2 == 0) %>% # take out if missed check generalization question
filter(Answer.question1 != 'true' | Answer.question2 != 'true' |
Answer.question3 == 'true' & Answer.question4 == 'true') # take out those who missed attention check questions
The total after preliminary exclusions is N = 118.
Categorize response patterns based on response-pattern exclusion criterion.
if (sub_strict) {
sub <-t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 &
t2.f$Answer.Qproper3 == 1 & t2.f$Answer.Qsub1 == 1 &
t2.f$Answer.Qsub2 == 1 & t2.f$Answer.Qbasic1 == 0 &
t2.f$Answer.Qbasic2 == 0 & t2.f$Answer.Qbasic3 == 0
} else {
sub <- t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 &
t2.f$Answer.Qproper3 == 1 & (t2.f$Answer.Qsub1 == 1 |
t2.f$Answer.Qsub2 == 1) & t2.f$Answer.Qbasic1 == 0 &
t2.f$Answer.Qbasic2 == 0 & t2.f$Answer.Qbasic3 == 0
}
if (basic_strict){
basic <- t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 &
t2.f$Answer.Qproper3 == 1 & t2.f$Answer.Qsub1 == 1 &
t2.f$Answer.Qsub2 == 1 & t2.f$Answer.Qbasic1 == 1 &
t2.f$Answer.Qbasic2 == 1 & t2.f$Answer.Qbasic3 == 1
} else {
basic <- t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 &
t2.f$Answer.Qproper3 == 1 & t2.f$Answer.Qsub1 == 1 &
t2.f$Answer.Qsub2 == 1 & (t2.f$Answer.Qbasic1 == 1 |
t2.f$Answer.Qbasic2 == 1 | t2.f$Answer.Qbasic3 == 1)
}
t2.f$w.ans.cat <- "other"
t2.f$w.ans.cat[sub] <- "sub"
t2.f$w.ans.cat[basic] <- "basic"
t2.f$w.ans.cat = factor(t2.f$w.ans.cat, levels = c("sub", "basic", "other"))
# get other responeses
others.t2 = filter(t2.f, w.ans.cat == "other")
# filter out other responses
t2.f = t2.f[t2.f$w.ans.cat != "other",]
t2.f$w.ans.cat = droplevels(t2.f$w.ans.cat)
The number of particpants whose generalization strategy could not be catgorized was 28. The final sample was N = 90.
Distribution of final sample of participants across conditions.
final.cond.counts.t2 = t2.f %>%
rename(Condition = Answer.sample) %>%
group_by(Condition) %>%
summarise(n = n())
kable(final.cond.counts.t2)
| Condition | n |
|---|---|
| teacher | 53 |
| learner | 37 |
Get props.
t2.f.props = get_bootstrapped_props(t2.f, "w.ans.cat",
"Turk #2")
t2.f.props %>%
ggplot(aes(x = response.cat, y = prop, fill = Answer.sample)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_linerange(aes(ymin = cill, ymax = ciul), position = position_dodge(.9)) +
ylim(0,1) +
ylab("Prop. participants") +
xlab("Generalization pattern") +
ggtitle("Turk Replication #2") +
themeML +
theme(legend.position = c(.85, .8)) +
scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1")
Stats: Pearson’s Chi-squared test with Yates’ continuity correction
t2_tab = table(t2.f$w.ans.cat, t2.f$Answer.sample)[c("sub", "basic"),]
kable(tidy(chisq.test(t2_tab)))
| statistic | p.value | parameter |
|---|---|---|
| 0.8939538 | 0.3444081 | 1 |
Read in data and pre-process.
l1 <- read.csv("../data/anonymized/inlab_replication.csv")
# make factors
l1 <- colwise(as.factor)(l1)
names(l1)[names(l1) == "sample"] = "Answer.sample"
The total before any exclusions is N = 41.
Distribution of participants in preliminary exclusion criteria.
l1.exclusion.ns = l1 %>%
summarise(n_badTraining = length(
which(t2_a == 0 | t3_a == 0 |t2_b == 0 | t2_b == 0)))
kable(gather(l1.exclusion.ns,"Exclusion category", "n"))
| Exclusion category | n |
|---|---|
| n_badTraining | 1 |
# subset data to those with correct training
l1.f = l1 %>%
filter(t2_a == 1 & t3_a == 1 & t2_b == 1 & t2_b == 1)
The total after preliminary exclusions is N = 40.
Categorize response patterns based on response-pattern exclusion criterion.
if (sub_strict) {
sub <- l1.f$basic1_a == 0 & l1.f$basic2_a == 0 &
l1.f$basic1_b == 0 & l1.f$basic2_b == 0 &
(l1.f$sub1_a == 1 & l1.f$sub2_a == 1 &
l1.f$sub1_b == 1 & l1.f$sub2_b == 1)
} else {
sub <- l1.f$basic1_a == 0 & l1.f$basic2_a == 0 &
l1.f$basic1_b == 0 & l1.f$basic2_b == 0 &
(l1.f$sub1_a == 1 | l1.f$sub2_a == 1) &
(l1.f$sub1_b == 1 | l1.f$sub2_b == 1)
}
if (basic_strict){
basic <- (l1.f$basic1_a == 1 & l1.f$basic2_a == 1 &
l1.f$basic1_b == 1 & l1.f$basic2_b == 1) &
(l1.f$sub1_a == 1 & l1.f$sub2_a == 1 &
l1.f$sub1_b == 1 & l1.f$sub2_b == 1)
} else {
basic <- (l1.f$basic1_a == 1 | l1.f$basic2_a == 1 |
l1.f$basic1_b == 1 | l1.f$basic2_b == 1) &
(l1.f$sub1_a == 1 & l1.f$sub2_a == 1 &
l1.f$sub1_b == 1 & l1.f$sub2_b == 1)
}
l1.f$w.ans.cat <- "other"
l1.f$w.ans.cat[sub] <- "sub"
l1.f$w.ans.cat[basic] <- "basic"
l1.f$w.ans.cat <- factor(l1.f$w.ans.cat, c("sub","basic","other"))
# get other responeses
others.l1 = filter(l1.f, w.ans.cat == "other")
# filter out other responses
l1.f = l1.f[l1.f$w.ans.cat != "other",]
l1.f$w.ans.cat = droplevels(l1.f$w.ans.cat)
The number of particpants whose generalization strategy could not be catgorized was 9. The final sample was N = 31.
Distribution of final sample of participants across conditions.
final.cond.counts.l1 = l1.f %>%
rename(Condition = Answer.sample) %>%
group_by(Condition) %>%
summarise(n = n())
kable(final.cond.counts.l1)
| Condition | n |
|---|---|
| learner | 13 |
| teacher | 18 |
Get props.
l1.f.props = get_bootstrapped_props(l1.f, "w.ans.cat",
"In person")
l1.f.props %>%
ggplot(aes(x=response.cat, y = prop, fill = Answer.sample)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_linerange(aes(ymin=cill, ymax=ciul), position = position_dodge(.9)) +
ylim(0,1) +
ylab("Prop. participants") +
xlab("Generalization pattern") +
ggtitle("In person") +
themeML +
scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1")
Stats: Pearson’s Chi-squared test with Yates’ continuity correction
l1_tab = table(l1.f$w.ans.cat, l1.f$sample)[c("sub", "basic"),]
kable(tidy(chisq.test(l1_tab)))
| statistic | p.value | parameter |
|---|---|---|
| 0.4926903 | 0.4827297 | 1 |
Read in data and pre-process.
t3 <- read.csv("../data/anonymized/turk_replication_3_A.csv")
# make factors
t3$Answer.sample <- factor(t3$Answer.sample,
labels=c('teacher','learner')) # sample0 = teacher, sample1 = learner
t3 <- colwise(as.factor)(t3)
The total before any exclusions is N = 200.
Distribution of participants in preliminary exclusion criteria.
t3.exclusion.ns = t3 %>%
summarise(n_badTraining = length(which(Answer.click1 != "\"correct\"" |
Answer.click2 != "\"correct\"")),
n_badGeneralize = length(which(Answer.Qcheck1 != 0 | Answer.Qcheck2 != 0)),
n_badFilter = length(which(Answer.question1 != 'TRUE' | Answer.question2 != 'TRUE'|
Answer.question3 != 'TRUE' | Answer.question4 != 'TRUE')))
kable(gather(t3.exclusion.ns,"Exclusion category", "n"))
| Exclusion category | n |
|---|---|
| n_badTraining | 27 |
| n_badGeneralize | 17 |
| n_badFilter | 10 |
# subset data
t3.f = t3 %>%
filter(Answer.click1 == "\"correct\"" &
Answer.click2 == "\"correct\"") %>% # take out those who click on wrong training items
filter(Answer.Qcheck1 == 0 & Answer.Qcheck2 == 0) %>% # take out if missed check generalization question
filter(Answer.question1 == 'TRUE' & Answer.question2 == 'TRUE' &
Answer.question3 == 'TRUE' & Answer.question4 == 'TRUE') # take out those who missed filter question
The total after preliminary exclusions is N = 161.
Categorize response patterns based on response-pattern exclusion criterion.
if(sub_strict){
sub <- t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 &
t3.f$Answer.Qproper3 == 1 & t3.f$Answer.Qsub1 == 1 &
t3.f$Answer.Qsub2 == 1 & t3.f$Answer.Qbasic1 == 0 &
t3.f$Answer.Qbasic2 == 0 & t3.f$Answer.Qbasic3 == 0
} else {
sub <- t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 &
t3.f$Answer.Qproper3 == 1 & (t3.f$Answer.Qsub1 == 1 |
t3.f$Answer.Qsub2 == 1) & t3.f$Answer.Qbasic1 == 0 &
t3.f$Answer.Qbasic2 == 0 & t3.f$Answer.Qbasic3 == 0
}
if (basic_strict){
basic <- t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 &
t3.f$Answer.Qproper3 == 1 & t3.f$Answer.Qsub1 == 1 &
t3.f$Answer.Qsub2 == 1 & t3.f$Answer.Qbasic1 == 1 &
t3.f$Answer.Qbasic2 == 1 & t3.f$Answer.Qbasic3 == 1
} else {
basic <- t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 &
t3.f$Answer.Qproper3 == 1 & t3.f$Answer.Qsub1 == 1 &
t3.f$Answer.Qsub2 == 1 & (t3.f$Answer.Qbasic1 == 1 |
t3.f$Answer.Qbasic2 == 1 | t3.f$Answer.Qbasic3 == 1)
}
t3.f$w.ans.cat <- "other"
t3.f$w.ans.cat[sub] <- "sub"
t3.f$w.ans.cat[basic] <- "basic"
t3.f$w.ans.cat = factor(t3.f$w.ans.cat, levels = c("sub", "basic", "other"))
# get other responses
others.t3 = filter(t3.f, w.ans.cat == "other")
# filter out other responses
t3.f = t3.f[t3.f$w.ans.cat != "other",]
t3.f$w.ans.cat = droplevels(t3.f$w.ans.cat)
The number of particpants whose generalization strategy could not be catgorized was 21. The final sample was N = 140.
Distribution of final sample of participants across conditions.
final.cond.counts.t3 = t3.f %>%
rename(Condition = Answer.sample) %>%
group_by(Condition) %>%
summarise(n = n())
kable(final.cond.counts.t3)
| Condition | n |
|---|---|
| teacher | 76 |
| learner | 64 |
Get props.
t3.f.props = get_bootstrapped_props(t3.f, "w.ans.cat",
"Turk #3")
t3.f.props %>%
ggplot(aes(x = response.cat, y = prop, fill = Answer.sample)) +
geom_bar(stat = "identity", position=position_dodge()) +
geom_linerange(aes(ymin = cill, ymax = ciul),
position = position_dodge(.9)) +
ylim(0,1) +
ylab("Prop. participants") +
xlab("Generalization pattern") +
ggtitle("Turk Replication #3") +
themeML +
theme(legend.position = c(.85, .8)) +
scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1")
Stats: Pearson’s Chi-squared test with Yates’ continuity correction
t3_tab = table(t3.f$w.ans.cat, t3.f$Answer.sample)[c("sub", "basic"),]
kable(tidy(chisq.test(t3_tab)))
| statistic | p.value | parameter |
|---|---|---|
| 6.42033 | 0.0112821 | 1 |
Read in data and pre-process.
t4 <- read.csv("../data/anonymized/turk_replication_4_A.csv")
# make factors
t4$Answer.sample <- factor(t4$Answer.sample,
labels = c('teacher','learner')) # sample0 = teacher, sample1 = learner
t4 <- colwise(as.factor)(t4)
The total before any exclusions is N = 500.
Distribution of participants in preliminary exclusion criteria.
t4.exclusion.ns = t4 %>%
summarise(n_badTraining = length(which(Answer.click1 != "\"correct\"" |
Answer.click2 != "\"correct\"")),
n_badGeneralize = length(which(Answer.Qcheck1 != 0 | Answer.Qcheck2 != 0)),
n_badFilter = length(which(Answer.question1 != 'TRUE' | Answer.question2 !='TRUE'|
Answer.question3 != 'TRUE' | Answer.question4 !='TRUE')))
kable(gather(t4.exclusion.ns, "Exclusion category", "n"))
| Exclusion category | n |
|---|---|
| n_badTraining | 63 |
| n_badGeneralize | 40 |
| n_badFilter | 21 |
# subset data
t4.f = t4 %>%
filter(Answer.click1 == "\"correct\"" &
Answer.click2 == "\"correct\"") %>% # take out those who click on wrong training items
filter(Answer.Qcheck1 == 0 & Answer.Qcheck2 == 0) %>% # take out if missed check generalization question
filter(Answer.question1 == 'TRUE' & Answer.question2 == 'TRUE' &
Answer.question3 == 'TRUE' & Answer.question4 == 'TRUE')# take out those who missed filter question
The total after preliminary exclusions is N = 408.
Categorize response patterns based on response-pattern exclusion criterion.
if (sub_strict) {
sub <- t4.f$Answer.Qproper1 == 1 & t4.f$Answer.Qproper2 == 1 &
t4.f$Answer.Qproper3 == 1 & t4.f$Answer.Qsub1 == 1 &
t4.f$Answer.Qsub2 == 1 & t4.f$Answer.Qbasic1 == 0 &
t4.f$Answer.Qbasic2 == 0 & t4.f$Answer.Qbasic3 == 0
} else {
sub <- t4.f$Answer.Qproper1 == 1 & t4.f$Answer.Qproper2 == 1 &
t4.f$Answer.Qproper3 == 1 & (t4.f$Answer.Qsub1 == 1 |
t4.f$Answer.Qsub2 == 1) & t4.f$Answer.Qbasic1 == 0 &
t4.f$Answer.Qbasic2 == 0 & t4.f$Answer.Qbasic3 == 0
}
if (basic_strict){
basic <- t4.f$Answer.Qproper1 == 1 & t4.f$Answer.Qproper2 == 1 &
t4.f$Answer.Qproper3 == 1 & t4.f$Answer.Qsub1 == 1 &
t4.f$Answer.Qsub2 == 1 & t4.f$Answer.Qbasic1 == 1 &
t4.f$Answer.Qbasic2 == 1 & t4.f$Answer.Qbasic3 == 1
} else {
basic <- t4.f$Answer.Qproper1 == 1 & t4.f$Answer.Qproper2 == 1 &
t4.f$Answer.Qproper3 == 1 & t4.f$Answer.Qsub1 == 1 &
t4.f$Answer.Qsub2 == 1 & (t4.f$Answer.Qbasic1 == 1 |
t4.f$Answer.Qbasic2 == 1 | t4.f$Answer.Qbasic3 == 1)
}
t4.f$w.ans.cat <- "other"
t4.f$w.ans.cat[sub] <- "sub"
t4.f$w.ans.cat[basic] <- "basic"
t4.f$w.ans.cat = factor(t4.f$w.ans.cat,
levels = c("sub", "basic", "other"))
# filter out "other" responses
others.t4 = filter(t4.f, w.ans.cat == "other")
# filter out other responses
t4.f = t4.f[t4.f$w.ans.cat != "other",]
t4.f$w.ans.cat = droplevels(t4.f$w.ans.cat)
The number of particpants whose generalization strategy could not be catgorized was 61. The final sample was N = 347.
Distribution of final sample of participants across conditions.
final.cond.counts.t4 = t4.f %>%
rename(Condition = Answer.sample) %>%
group_by(Condition) %>%
summarise(n = n())
kable(final.cond.counts.t4)
| Condition | n |
|---|---|
| teacher | 193 |
| learner | 154 |
Get props.
t4.f.props = get_bootstrapped_props(t4.f, "w.ans.cat",
"Turk #4")
t4.f.props %>%
ggplot(aes(x = response.cat, y = prop, fill = Answer.sample)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_linerange(aes(ymin = cill, ymax = ciul),
position = position_dodge(.9)) +
ylim(0,1) +
ylab("Prop. participants") +
xlab("Generalization pattern") +
ggtitle("Turk Replication #4") +
themeML +
theme(legend.position = c(.85,.8)) +
scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1")
Stats: Pearson’s Chi-squared test with Yates’ continuity correction
t4_tab = table(t4.f$w.ans.cat, t4.f$Answer.sample)[c("sub", "basic"),]
kable(tidy(chisq.test(t4_tab)))
| statistic | p.value | parameter |
|---|---|---|
| 24.49194 | 7e-07 | 1 |
# merge together all raw data
xt_data.adults$exp = "X&T adults"
xt_data.children$exp = "X&T children"
t1.f$exp = "Exp. 1"
t2.f$exp = "Exp. 2"
l1.f$exp = "Exp. 3"
t3.f$exp = "Exp. 4"
t4.f$exp = "Exp. 5"
xt_data.adults$w.ans.cat = xt_data.adults$response
xt_data.children$w.ans.cat = xt_data.children$response
all.data.f = rbind(xt_data.children[,c("Answer.sample", "w.ans.cat", "exp")],
xt_data.adults[,c("Answer.sample", "w.ans.cat", "exp")],
t1.f[,c("Answer.sample", "w.ans.cat", "exp")],
t2.f[,c("Answer.sample", "w.ans.cat", "exp")],
l1.f[,c("Answer.sample", "w.ans.cat", "exp")],
t3.f[,c("Answer.sample", "w.ans.cat", "exp")],
t4.f[,c("Answer.sample", "w.ans.cat", "exp")])
all.data.f <- colwise(as.factor)(all.data.f)
# get props by experiment
all.data.f.props = all.data.f %>%
group_by(exp) %>%
do(get_bootstrapped_props(.,"w.ans.cat", .$exp[1]))
# re-order experiments
all.data.f.props$exp = factor(all.data.f.props$exp,
levels(all.data.f.props$exp)[c(7, 6, 1:5)])
all.data.f.props$response.cat = revalue(all.data.f.props$response.cat,
c("sub" = "sub."))
#pdf("../writeup/figures/FIG_2.pdf", height = 4, width = 12)
all.data.f.props %>%
ggplot(aes(x = response.cat, y = prop,
fill = Answer.sample)) +
geom_bar(stat = "identity",
position = position_dodge()) +
facet_grid(. ~ exp) +
geom_rect(data = all.data.f.props[21:28,],
fill = "gray",
xmin = -Inf, xmax = Inf, ymin = -Inf,
ymax = Inf, alpha = 0.07) +
geom_linerange(aes(ymin = cill, ymax = ciul), position = position_dodge(.9)) +
ylim(0, 1) +
ylab("Prop. Participants") +
xlab("Generalization Pattern") +
themeML +
scale_fill_brewer(name = "Sampling\nCondition",
palette = "Set1")
#dev.off()
# spread props and n_cond
all.data.f.props.basic = all.data.f.props %>%
filter(response.cat == "basic") %>%
select(-cill, -ciul, -response.cat, -prop, -n) %>%
spread(Answer.sample, n_cond, fill = F) %>%
rename(teacher_n = teacher, learner_n = learner)
all.data.f.props.basic = all.data.f.props %>%
filter(response.cat == "basic") %>%
select(-cill, -ciul, -response.cat,
-n_cond, -n) %>%
spread(Answer.sample, prop, fill = F) %>%
rename(teacher_prop = teacher,
learner_prop = learner) %>%
left_join(all.data.f.props.basic)
# get all effect sizes (compute.es package)
all.es = propes(all.data.f.props.basic$learner_prop,
all.data.f.props.basic$teacher_prop,
all.data.f.props.basic$learner_n,
all.data.f.props.basic$teacher_n,
verbose = F)
all.es$exp = all.data.f.props.basic$exp # add exp ids
Fixed effects
# metafor package
fixed.effects.d = rma(d, var.d, data = all.es, method = "FE")
fixed.effects.d
##
## Fixed-Effects Model (k = 7)
##
## Test for Heterogeneity:
## Q(df = 6) = 9.8747, p-val = 0.1300
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.5257 0.0888 5.9211 <.0001 0.3517 0.6998 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Random effects
random.effects.d = rma(d, var.d, data = all.es)
random.effects.d
##
## Random-Effects Model (k = 7; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 0.0375 (SE = 0.0596)
## tau (square root of estimated tau^2 value): 0.1936
## I^2 (total heterogeneity / total variability): 36.89%
## H^2 (total variability / sampling variability): 1.58
##
## Test for Heterogeneity:
## Q(df = 6) = 9.8747, p-val = 0.1300
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.5333 0.1273 4.1880 <.0001 0.2837 0.7829 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pdf("../writeup/figures/FIG_3.pdf", height = 5, width = 8)
par(cex = 1, font = 1)
forest(random.effects.d,
slab = all.es$exp,
mlab = "All",
xlab = "Cohen's d")
par(font = 2)
text(-3.6, 8.55, "Experiment")
text(5.8, 8.55, "Cohen's d [95% CI]")
addpoly(random.effects.d, row = -1, cex = .75,
annotate = F, col = "red", mlab = "", efac = 2)
#dev.off()
Fixed effects
fixed.effects.d.sub= rma(d, var.d, data = all.es[c(-3:-5),], method = "FE")
fixed.effects.d.sub
##
## Fixed-Effects Model (k = 4)
##
## Test for Heterogeneity:
## Q(df = 3) = 2.2724, p-val = 0.5178
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.7209 0.1151 6.2620 <.0001 0.4952 0.9465 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Random effects
random.effects.d.sub = rma(d, var.d, data = all.es[c(-3:-5),])
random.effects.d.sub
##
## Random-Effects Model (k = 4; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 0 (SE = 0.0476)
## tau (square root of estimated tau^2 value): 0
## I^2 (total heterogeneity / total variability): 0.00%
## H^2 (total variability / sampling variability): 1.00
##
## Test for Heterogeneity:
## Q(df = 3) = 2.2724, p-val = 0.5178
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.7209 0.1151 6.2620 <.0001 0.4952 0.9465 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pdf("../writeup/figures/FIG_4.pdf", height = 5, width = 8)
par(cex = 1, font = 1)
forest(random.effects.d.sub,
slab = all.es[c(-3:-5),]$exp,
mlab = "All",
xlab = "Cohen's d")
par(font = 2)
text(-2.5, 5.8, "Experiment")
text(5, 5.8, "Cohen's d [95% CI]")
addpoly(random.effects.d.sub, row = -1, cex = .75,
annotate = F, col = "red", mlab = "", efac = 2)
#dev.off()
ES based on X&T adults and Exp 1-4
# Note: Here we use r to estimate effect size, which is formally equivalent to phi. Phi is formally equivalent to omega for 2 x 2 tables, which is the effect size estimator used in the pwr.chisq.test function. Elsewhere, we work with effect sizes using d.
# fixed effects
fixed.effects = rma(r, var.r, data = all.es[2:6,], method = "FE")
power.analysis = pwr.chisq.test(w = fixed.effects$b,
df = 1,
sig.level = 0.05,
power = .95)
power.analysis
##
## Chi squared power calculation
##
## w = 0.215
## N = 281.1186
## df = 1
## sig.level = 0.05
## power = 0.95
##
## NOTE: N is the number of observations
print(paste("N for .95 power (fixed effects):",
power.analysis$N))
## [1] "N for .95 power (fixed effects): 281.118636803261"
print(paste("Extra: ", .35 * power.analysis$N))
## [1] "Extra: 98.3915228811413"
# random effects
random.effects = rma(r, var.r, data = all.es[2:6,])
power.analysis = pwr.chisq.test(w = random.effects$b,
df = 1,
sig.level = 0.05,
power = .95)
power.analysis
##
## Chi squared power calculation
##
## w = 0.2264005
## N = 253.5198
## df = 1
## sig.level = 0.05
## power = 0.95
##
## NOTE: N is the number of observations
print(paste("N for .95 power (random effects):",
power.analysis$N))
## [1] "N for .95 power (random effects): 253.519827834981"
print(paste("Extra: ", .35 * power.analysis$N))
## [1] "Extra: 88.7319397422435"
Thus, by the most conservative estimate (criteria: basic_strict & sub_strict = T; fixed effect), we need a N of 363 to achieve 95% power. With approximately 35% data loss, this means we need to run an extra 127 to achieve this power. This is approximately 500 participants.
One possible reason for the difference between our observed effect sizes and the original is the presence of moderators. There are two notable differences between our experiments and the original: the online nature of several of our experiments (“person effect”), and the different stimuli used in Experiments 1-3 relative to Experiments 4 and 5 (“stimuli effect”). Here we test whether these moderators are reliable, and the number of participants that would be needed in an additional experiment that would allows us to directly test the role of this moderator.
# Welch's t-test
welch_t_test <- function(d1, d2, s1, s2, n1, n2){ # s = variance
t = (d1 - d2)/sqrt((s1/n1) + (s2/n2))
df = ((s1/n1) + (s2/n2))^2 / (((s1/n1)^2/(n1-1)) + ((s2/n2)^2/(n2-1)))
p = 2*(1-pt(abs(t),df))
results = c(t, df, p)
names(results) = c("t", "df", "p")
results
}
## compare Exp 1 and 2 (online) to Exp. 3 (in lab)
# Exp. 1 and 2
mod = predict(rma(d, var.d, data = all.es[3:4,])) # FE vs. RE doesn't matter.
d1 = mod$pred
n1 = (all.es[all.es$exp == "Exp. 1", "N.total"] +
all.es[all.es$exp == "Exp. 2", "N.total"])/2 # mean(exp1, exp2)
s1 = mod$se^2
# Exp. 3
d2 = all.es[all.es$exp == "Exp. 3", "d"]
n2 = all.es[all.es$exp == "Exp. 3", "N.total"]
s2 = all.es[all.es$exp == "Exp. 3", "var.d"]
# compare
welch_t_test(d1, d2, s1, s2, n1, n2)
## t df p
## -3.061776631 31.598338647 0.004467422
Get person effect using Exp 3 vs. 1 and 2.
min_person_effect = all.es[all.es$exp == "Exp. 3", "d"] -
all.es[all.es$exp == "Exp. 2", "d"]
max_person_effect = all.es[all.es$exp == "Exp. 3", "d"] -
all.es[all.es$exp == "Exp. 1", "d"]
Get sample size of experiment 4 and 5
N_4_and_5 = all.es[all.es$exp == "Exp. 4", "N.total"] +
all.es[all.es$exp == "Exp. 5", "N.total"]
Do power analysis to estimate Exp. 6 n, based on the min effect size estimate.
try(pwr.t2n.test(d = min_person_effect, n1 = N_4_and_5, power = .8))
Do power analysis to estimate Exp. 6 n, based on the max effect size estimate.
pwr.t2n.test(d = max_person_effect, n1 = N_4_and_5, power = .8)
##
## t test power calculation
##
## n1 = 487
## n2 = 126.5189
## d = 0.28
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
#basic_strict & sub_strict = T; max_person_effect) 80.4
#basic_strict & sub_strict = T; min_person_effect) 45
#basic_strict & sub_strict = F; max_person_effect) NA
#basic_strict & sub_strict = F; min_person_effect) NA
#basic_strict = T & sub_strict = F; max_person_effect) 212.707
#basic_strict = T & sub_strict = F; min_person_effect) 1185.991
#basic_strict = F & sub_strict = T; max_person_effect) 126.51
#basic_strict = F & sub_strict = T; min_person_effect) NA
min_E6_n = 45 + (.25 * 45)
max_E6_n = 126.51+ (.25 * 126.51)
Depending on the criteria used, we would need somewhere between 45 - 1186 participants to detect a person effect. Assuming 25% data loss (based on Exp. 3 experiments), we would need approximatiely 60-160 participants. Under the criteria reported in the paper (“liberal”; basic_strict = F & sub_strict = T), we would need approximately 160 participants, again assuming 25% data loss.
response.patterns.t1 = others.t1 %>%
select(workerids, Answer.sample, c(13,14,21,23)) %>%
mutate(pattern = paste(Answer.Qwsmm1, Answer.Qwsmm2,
Answer.Qwsm1, Answer.Qwsm2, sep = "")) %>%
mutate(pattern_basic = paste(Answer.Qwsmm1, Answer.Qwsmm2, sep = "")) %>%
mutate(pattern_basic_null = ifelse(pattern_basic == "00", "no basic, inconsistent sub.",
"some basic, inconsistent sub.")) %>%
select(workerids, Answer.sample, pattern, pattern_basic, pattern_basic_null) %>%
mutate(pattern = as.factor(pattern))
pattern.sum.t1 = response.patterns.t1 %>%
group_by(Answer.sample, pattern_basic_null) %>%
summarise(n = n()) %>%
mutate(prop = n/sum(n))%>%
mutate(exp = "Exp. 1")
response.patterns.t2 = others.t2 %>%
select(workerids, Answer.sample, c(2,3,5,6,7,15,16,17)) %>%
mutate(pattern = paste(Answer.Qbasic1, Answer.Qbasic2,
Answer.Qbasic3, Answer.Qsub1, Answer.Qsub2, Answer.Qproper1,
Answer.Qproper2, Answer.Qproper3, sep = "")) %>%
mutate(pattern_basic = paste(Answer.Qbasic1, Answer.Qbasic2,
Answer.Qbasic3, sep = "")) %>%
mutate(pattern_basic_null = ifelse(pattern_basic == "000", "no basic, inconsistent sub.",
"some basic, inconsistent sub.")) %>%
select(workerids, Answer.sample, pattern, pattern_basic, pattern_basic_null) %>%
mutate(pattern = as.factor(pattern))
pattern.sum.t2 = response.patterns.t2 %>%
group_by(Answer.sample, pattern_basic_null) %>%
summarise(n = n()) %>%
mutate(prop = n/sum(n)) %>%
mutate(exp = "Exp. 2")
response.patterns.l1 = others.l1 %>%
select(subjno, Answer.sample, c(10,12:15,17:19)) %>%
mutate(pattern = paste(basic1_a, basic1_b, basic2_a, basic2_b,
sub1_a, sub1_b, sub2_a, sub2_b, sep = "")) %>%
mutate(pattern_basic = paste(basic1_a, basic1_b, basic2_a, basic2_b, sep = "")) %>%
mutate(pattern_basic_null = ifelse(pattern_basic == "0000", "no basic, inconsistent sub.",
"some basic, inconsistent sub.")) %>%
select(subjno, Answer.sample, pattern, pattern_basic, pattern_basic_null) %>%
mutate(pattern = as.factor(pattern))
pattern.sum.l1 = response.patterns.l1 %>%
group_by(Answer.sample, pattern_basic_null) %>%
summarise(n = n()) %>%
mutate(prop = n/sum(n)) %>%
mutate(exp = "Exp. 3")
response.patterns.t3 = others.t3 %>%
select(workerids, Answer.sample, c(18,20:27)) %>%
mutate(pattern = paste(Answer.Qbasic1, Answer.Qbasic2,
Answer.Qbasic3, Answer.Qsub1, Answer.Qsub2, Answer.Qproper1,
Answer.Qproper2, Answer.Qproper3, sep = "")) %>%
mutate(pattern_basic = paste(Answer.Qbasic1, Answer.Qbasic2,
Answer.Qbasic3, sep = "")) %>%
mutate(pattern_basic_null = ifelse(pattern_basic == "000", "no basic, inconsistent sub.",
"some basic, inconsistent sub.")) %>%
select(workerids, Answer.sample, pattern, pattern_basic, pattern_basic_null) %>%
mutate(pattern = as.factor(pattern))
pattern.sum.t3 = response.patterns.t3 %>%
group_by(Answer.sample, pattern_basic_null) %>%
summarise(n = n()) %>%
mutate(prop = n/sum(n)) %>%
mutate(exp = "Exp. 4")
response.patterns.t4 = others.t4 %>%
select(workerids, Answer.sample, c(18,20:27)) %>%
mutate(pattern = paste(Answer.Qbasic1, Answer.Qbasic2,
Answer.Qbasic3, Answer.Qsub1,
Answer.Qsub2, Answer.Qproper1,
Answer.Qproper2, Answer.Qproper3, sep = "")) %>%
mutate(pattern_basic = paste(Answer.Qbasic1, Answer.Qbasic2,
Answer.Qbasic3, sep = "")) %>%
mutate(pattern_basic_null = ifelse(pattern_basic == "000", "no basic, inconsistent sub.",
"some basic, inconsistent sub.")) %>%
select(workerids, Answer.sample, pattern, pattern_basic, pattern_basic_null) %>%
mutate(pattern = as.factor(pattern))
pattern.sum.t4 = response.patterns.t4 %>%
group_by(Answer.sample, pattern_basic_null) %>%
summarise(n = n()) %>%
mutate(prop = n/sum(n)) %>%
mutate(exp = "Exp. 5")
#Exp.1: [basic-1, basic-2, sub-1, sub-2]
#Exp. 2, 4 & 5: [basic-1, basic-2, basic-3, sub-1, sub-2, proper-1, proper-2, proper-3]
#Exp. 3: [basic-1a, basic-1b, basic-2a, basic-2b, sub-1a, sub-1b, sub-2a, sub-2b ]
all.pattern_basic_null = rbind(pattern.sum.t1, pattern.sum.t2,
pattern.sum.l1, pattern.sum.t3, pattern.sum.t4)
all.pattern_basic_null %>%
group_by(pattern_basic_null) %>%
summarise(n = sum(n)) %>%
mutate(prop = n/sum(n))
## Source: local data frame [2 x 3]
##
## pattern_basic_null n prop
## (chr) (int) (dbl)
## 1 no basic, inconsistent sub. 188 0.94949495
## 2 some basic, inconsistent sub. 10 0.05050505
ggplot(all.pattern_basic_null, aes(x = Answer.sample,
y= prop,
fill = pattern_basic_null)) +
geom_bar(stat = "identity", position = "stack") +
facet_grid(.~exp) +
scale_fill_discrete(name="") +
ylab("Prop excluded participants") +
xlab("Condition") +
ggtitle("Exclusion patterns") +
scale_fill_brewer(palette = "Set1") +
theme(legend.position="bottom", legend.title=element_blank()) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
themeML