This document was created from an R Markdown file. The R Markdown file can be found here. All analyses and plots can be reproduced from the raw data with the code in this file. Experiments conducted online can be viewed here.

A note on exclusions: There are several possible criteria for categorizing a participant as a subordinate or basic-level generalizer. X&T find that participants respond consistently across questions as either a subordinate or a basic generalizer for each of the two trials. In contrast, we observed varied responses across questions by participants in our sample. Thus, we adopt two criteria in analyzing our data: a “basic strict” criteria where we only include participants if they responded consistently (“yes” or “no” to all basic-level questions and “yes” or “no” to all subordinate level questions, “basic strict” = T), and a “liberal” criteria where we counted a particpant as a basic generalizer if they responded “yes” to any of the basic level questions (“basic strict” = F). The results of the liberal criteria are reported in the Main Text, and the results of the strict criteria are reported in Appendix A. In a set of post-hoc analyses, we also analyzed the data by varying the exclusion criteria for consistency in the subordinate category (“subordinate strict”).

## [1] "Basic strict criteria:  FALSE"
## [1] "Subordinate strict criteria:  TRUE"



Experimental Data

X&T (2007b)

Get data - adults.

N_adults = 14
N_per_condition = N_adults/2  # 2 conditions (teacher + learner)
# Note: The proportions described in the original report were obtained by aggregating across *trials* not participants (2 trials/participant). We correct for this here by taking the proportion of participants, which results in slightly different proportions than those reported in the original paper.

# make raw data
prop_sub_teacher = .928 
n_sub_teacher = round(prop_sub_teacher * N_per_condition)
n_basic_teacher = N_per_condition - n_sub_teacher 
teacher_xt_data = as.factor(c(rep("sub", n_sub_teacher),
                              rep("basic", n_basic_teacher)))

prop_sub_learner = .357
n_sub_learner = round(prop_sub_learner * N_per_condition)
n_basic_learner = N_per_condition - n_sub_learner
learner_xt_data = as.factor(c(rep("sub", n_sub_learner), 
                              rep("basic", n_basic_learner)))

xt_data.adults = data.frame(response = c(as.character(teacher_xt_data),
                                       as.character(learner_xt_data)))
xt_data.adults$Answer.sample[1:N_per_condition] = "teacher"
xt_data.adults$Answer.sample[(N_per_condition + 1):
                              (N_per_condition * 2)] = "learner"

Get props.

# get props
xt.a.props = get_bootstrapped_props(xt_data.adults, "response",
                                    "X&T (2007b) adults")

Get data - children.

N_child = 24
N_per_condition = N_child /2 # 2 conditions

# make raw data
prop_sub_teacher = .71
n_sub_teacher = round(prop_sub_teacher * N_per_condition)
n_basic_teacher = N_per_condition - n_sub_teacher 
teacher_xt_data = as.factor(c(rep("sub", n_sub_teacher),
                              rep("basic", n_basic_teacher)))

prop_sub_learner = .29
n_sub_learner = round(prop_sub_learner * N_per_condition)
n_basic_learner = N_per_condition - n_sub_learner
learner_xt_data = as.factor(c(rep("sub", n_sub_learner), 
                              rep("basic", n_basic_learner)))

xt_data.children = data.frame(response = c(as.character(teacher_xt_data),
                                       as.character(learner_xt_data)))
xt_data.children$Answer.sample[1:N_per_condition] = "teacher"
xt_data.children$Answer.sample[(N_per_condition + 1):
                               (N_per_condition * 2)] = "learner"

Get props.

xt.c.props = get_bootstrapped_props(xt_data.children,
                                    "response", "X&T (2007b) children")

N = 14 adults and 24 children in this study.

xt = rbind(xt.c.props, xt.a.props)

# re-order levels so consistent with other exps
xt$Answer.sample <- ordered(xt$Answer.sample, 
                    levels = c("teacher", "learner"))

ggplot(xt, aes(x = response.cat, y = prop, fill = Answer.sample)) +
  facet_grid(. ~ exp) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_linerange(aes(ymin = cill, ymax = ciul), position = position_dodge(.9)) +
  ylim(0,1) +
  ylab("Prop. participants") +
  xlab("Generalization pattern") +
  scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1") +
  ggtitle("Original Xu and Tenenbaum Data") +
  themeML  

Stats: Pearson’s Chi-squared test with Yates’ continuity correction

Adults.

xta_tab = table(xt_data.adults$Answer.sample, xt_data.adults$response)
kable(tidy(chisq.test(xta_tab)))
statistic p.value parameter
2.625 0.1051925 1

Children.

xtc_tab = table(xt_data.children$Answer.sample, xt_data.children$response)
kable(tidy(chisq.test(xtc_tab)))
statistic p.value parameter
4.166667 0.0412268 1

Exp. 1

Read in data and pre-process.

t1 = read.csv("../data/anonymized/turk_replication_1_A.csv")

# make factors
t1$Answer.sample <- factor(t1$Answer.sample, labels=c('teacher','learner')) # sample0 = teacher, sample1 = learner
t1$Answer.label <- factor(t1$Answer.label, labels=c('nolabel','label')) # 0 = nolabel, 1 = label
t1 <- colwise(as.factor)(t1)

Distribution of participants in preliminary exclusion criteria.

# get number of exclusions by category
t1.exclusion.ns = t1 %>%
              summarise(n_noLabelParticipants = length(which(Answer.label != 'label')),
                        n_repeatWorkers = length(which(duplicated(workerids))),
                        n_badTraining = length(which(Answer.click1 == "\"false\"" | 
                                                       Answer.click2 == "\"false\"")),
                        n_badGeneralize = length(which(Answer.Qwcheck != 0)),
                        n_badFilter = length(which(Answer.question1 == 'FALSE')))

kable(gather(t1.exclusion.ns,"Exclusion category", "n"))
Exclusion category n
n_noLabelParticipants 48
n_repeatWorkers 8
n_badTraining 21
n_badGeneralize 8
n_badFilter 0
total_n = dim(t1)[1] - t1.exclusion.ns[1,"n_noLabelParticipants"] - t1.exclusion.ns[1,"n_repeatWorkers"]

# subset data
t1.f = t1 %>%
       filter(Answer.label == 'label') %>% #remove nolabel participants (run 4)
       filter(!duplicated(workerids)) %>% #participants who completed multiple runs
       filter(Answer.click1 == "\"correct\"" & 
                Answer.click2 == "\"correct\"") %>% #correct training items (learning only)
       filter(Answer.Qwcheck == 0)  %>% #check generalization question
       filter(Answer.question1 == 'TRUE') #filter question

Some participants were in another, not analyzed condition (“noLabelParticipants”). The total number of participants before any relevant exclusions is N = 294. After preliminary exclusions, the total number of participants is N = 274.

Categorize response patterns based on response-pattern exclusion criterion.

if (sub_strict){
  sub <- t1.f$Answer.Qwsmm1 == 0 & t1.f$Answer.Qwsmm2 == 0 & 
  t1.f$Answer.Qwsm1 == 1 & t1.f$Answer.Qwsm2 == 1
} else {
  sub <- t1.f$Answer.Qwsmm1 == 0 & t1.f$Answer.Qwsmm2 == 0 & 
  (t1.f$Answer.Qwsm1 == 1 | t1.f$Answer.Qwsm2 == 1)
}

if (basic_strict){
  basic <- (t1.f$Answer.Qwsmm1 == 1 & t1.f$Answer.Qwsmm2 == 1) &
      t1.f$Answer.Qwsm1 == 1 & t1.f$Answer.Qwsm2 == 1 
} else {
  basic <- (t1.f$Answer.Qwsmm1 == 1 | t1.f$Answer.Qwsmm2 == 1) &
       t1.f$Answer.Qwsm1 == 1 & t1.f$Answer.Qwsm2 == 1 
}

t1.f$w.ans.cat <- "other"
t1.f$w.ans.cat[sub] <- "sub"
t1.f$w.ans.cat[basic] <- "basic"
t1.f$w.ans.cat = factor(t1.f$w.ans.cat, levels = c("sub", "basic", "other"))

# get other responeses
others.t1 = filter(t1.f, w.ans.cat == "other")

# filter out other responses
t1.f = t1.f[t1.f$w.ans.cat != "other",]
t1.f$w.ans.cat = droplevels(t1.f$w.ans.cat)

The number of particpants whose generalization strategy could not be catgorized was 79. The final sample was N = 195.

Distribution of final sample of participants across conditions.

final.cond.counts.t1 = t1.f %>%
  rename(Condition = Answer.sample) %>%
  group_by(Condition) %>%
  summarise(n = n())
  
kable(final.cond.counts.t1)
Condition n
teacher 94
learner 101

Get props.

t1.f.props = get_bootstrapped_props(t1.f, "w.ans.cat",
                                    "Turk #1")
t1.f.props %>%
  ggplot(aes(x = response.cat, y = prop, fill = Answer.sample)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_linerange(aes(ymin = cill, ymax = ciul), position = position_dodge(.9)) +
  ylim(0,1) +
  ylab("Prop. participants") +
  xlab("Generalization pattern") +
  ggtitle("Turk Replication #1") +
  theme(legend.position=c(.85,.8)) + 
  themeML  +
  scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1")

Stats: Pearson’s Chi-squared test with Yates’ continuity correction

t1_tab = table(t1.f$w.ans.cat, t1.f$Answer.sample)[c("sub", "basic"),]
kable(tidy(chisq.test(t1_tab)))
statistic p.value parameter
0.6245227 0.4293716 1

Exp. 2

Read in data and pre-process.

t2 <- read.csv("../data/anonymized/turk_replication_2_A.csv", header = T)
t2  <- t2 [,c(-1:-19,-21:-32,-36,-41:-43,-54:-55,-56)] # remove unnecessary columns

# make factors
t2$Answer.sample <- factor(t2$Answer.sample,
                           labels = c('teacher','learner')) # sample0 = teacher, sample1 = learner
t2 <- colwise(as.factor)(t2)

The total before any exclusions is N = 150.

Distribution of participants in preliminary exclusion criteria.

# get number of exclusions by category
t2.exclusion.ns = t2 %>%
              summarise(n_badTraining = length(which(Answer.click1 != "\"correct\"" |
                                                       Answer.click2 != "\"correct\"")),
                        n_badGeneralize = length(which(Answer.Qcheck1 != 0 | Answer.Qcheck2 != 0)),
                        n_badFilter = length(which(Answer.question1 != 'true' | 
                                                     Answer.question2 != 'true' | 
                                                     Answer.question3 != 'true' | 
                                                     Answer.question4 !='true')))
kable(gather(t2.exclusion.ns,"Exclusion category", "n"))
Exclusion category n
n_badTraining 22
n_badGeneralize 15
n_badFilter 9
# subset data
t2.f = t2 %>%
       filter(Answer.click1 == "\"correct\"" &
                Answer.click2 == "\"correct\"") %>% # take out those who click on wrong training items
      filter(Answer.Qcheck1 == 0 & Answer.Qcheck2 == 0) %>% # take out if missed check generalization question
      filter(Answer.question1 != 'true' | Answer.question2 != 'true' | 
               Answer.question3 == 'true' & Answer.question4 == 'true')  # take out those who missed attention check questions

The total after preliminary exclusions is N = 118.

Categorize response patterns based on response-pattern exclusion criterion.

if (sub_strict) {
  sub <-t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 & 
   t2.f$Answer.Qproper3 == 1 & t2.f$Answer.Qsub1 == 1 &
   t2.f$Answer.Qsub2 == 1 & t2.f$Answer.Qbasic1 == 0 &
   t2.f$Answer.Qbasic2 == 0 & t2.f$Answer.Qbasic3 == 0 
} else {
   sub <-  t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 & 
   t2.f$Answer.Qproper3 == 1 & (t2.f$Answer.Qsub1 == 1 |
   t2.f$Answer.Qsub2 == 1) & t2.f$Answer.Qbasic1 == 0 &
   t2.f$Answer.Qbasic2 == 0 & t2.f$Answer.Qbasic3 == 0 
}

if (basic_strict){
  basic <- t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 & 
    t2.f$Answer.Qproper3 == 1 & t2.f$Answer.Qsub1 == 1 &
    t2.f$Answer.Qsub2 == 1 & t2.f$Answer.Qbasic1 == 1 &
    t2.f$Answer.Qbasic2 == 1 & t2.f$Answer.Qbasic3 == 1 
} else {
  basic <- t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 & 
    t2.f$Answer.Qproper3 == 1 & t2.f$Answer.Qsub1 == 1 &
    t2.f$Answer.Qsub2 == 1 & (t2.f$Answer.Qbasic1 == 1 |
    t2.f$Answer.Qbasic2 == 1 | t2.f$Answer.Qbasic3 == 1)
}

t2.f$w.ans.cat <- "other"
t2.f$w.ans.cat[sub] <- "sub"
t2.f$w.ans.cat[basic] <- "basic"
t2.f$w.ans.cat = factor(t2.f$w.ans.cat, levels = c("sub", "basic", "other"))

# get other responeses
others.t2 = filter(t2.f, w.ans.cat == "other")

# filter out other responses
t2.f = t2.f[t2.f$w.ans.cat != "other",]
t2.f$w.ans.cat = droplevels(t2.f$w.ans.cat)

The number of particpants whose generalization strategy could not be catgorized was 28. The final sample was N = 90.

Distribution of final sample of participants across conditions.

final.cond.counts.t2 = t2.f %>%
  rename(Condition = Answer.sample) %>%
  group_by(Condition) %>%
  summarise(n = n())
  
kable(final.cond.counts.t2)
Condition n
teacher 53
learner 37

Get props.

t2.f.props = get_bootstrapped_props(t2.f, "w.ans.cat",
                                    "Turk #2")
t2.f.props %>%
  ggplot(aes(x = response.cat, y = prop, fill = Answer.sample)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_linerange(aes(ymin = cill, ymax = ciul), position = position_dodge(.9)) +
  ylim(0,1) +
  ylab("Prop. participants") +
  xlab("Generalization pattern") +
  ggtitle("Turk Replication #2") +
  themeML + 
  theme(legend.position = c(.85, .8)) + 
  scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1")

Stats: Pearson’s Chi-squared test with Yates’ continuity correction

t2_tab = table(t2.f$w.ans.cat, t2.f$Answer.sample)[c("sub", "basic"),]
kable(tidy(chisq.test(t2_tab)))
statistic p.value parameter
0.8939538 0.3444081 1

Exp. 3

Read in data and pre-process.

l1 <- read.csv("../data/anonymized/inlab_replication.csv")

# make factors
l1 <- colwise(as.factor)(l1)
names(l1)[names(l1) == "sample"] = "Answer.sample"

The total before any exclusions is N = 41.

Distribution of participants in preliminary exclusion criteria.

l1.exclusion.ns = l1 %>%
                  summarise(n_badTraining = length(
                    which(t2_a == 0 | t3_a == 0 |t2_b == 0 | t2_b == 0)))
kable(gather(l1.exclusion.ns,"Exclusion category", "n"))
Exclusion category n
n_badTraining 1
# subset data to those with correct training
l1.f = l1 %>%
       filter(t2_a == 1 & t3_a == 1 & t2_b == 1 & t2_b == 1)

The total after preliminary exclusions is N = 40.

Categorize response patterns based on response-pattern exclusion criterion.

if (sub_strict) {
  sub <- l1.f$basic1_a == 0 & l1.f$basic2_a == 0 & 
       l1.f$basic1_b == 0 & l1.f$basic2_b == 0 & 
       (l1.f$sub1_a == 1 & l1.f$sub2_a == 1 & 
          l1.f$sub1_b == 1 & l1.f$sub2_b == 1)
} else {
    sub <- l1.f$basic1_a == 0 & l1.f$basic2_a == 0 & 
       l1.f$basic1_b == 0 & l1.f$basic2_b == 0 & 
       (l1.f$sub1_a == 1 | l1.f$sub2_a == 1) &
          (l1.f$sub1_b == 1 | l1.f$sub2_b == 1)
}
if (basic_strict){
    basic <- (l1.f$basic1_a == 1 & l1.f$basic2_a == 1 &
            l1.f$basic1_b == 1 & l1.f$basic2_b == 1) & 
           (l1.f$sub1_a == 1 & l1.f$sub2_a == 1 & 
              l1.f$sub1_b == 1 & l1.f$sub2_b == 1)
  
} else {
  basic <- (l1.f$basic1_a == 1 | l1.f$basic2_a == 1 | 
            l1.f$basic1_b == 1 | l1.f$basic2_b == 1) & 
           (l1.f$sub1_a == 1 & l1.f$sub2_a == 1 & 
              l1.f$sub1_b == 1 & l1.f$sub2_b == 1)
}

l1.f$w.ans.cat <- "other"
l1.f$w.ans.cat[sub] <- "sub"
l1.f$w.ans.cat[basic] <- "basic"
l1.f$w.ans.cat <- factor(l1.f$w.ans.cat, c("sub","basic","other"))

# get other responeses
others.l1 = filter(l1.f, w.ans.cat == "other")

# filter out other responses
l1.f = l1.f[l1.f$w.ans.cat != "other",]
l1.f$w.ans.cat = droplevels(l1.f$w.ans.cat)

The number of particpants whose generalization strategy could not be catgorized was 9. The final sample was N = 31.

Distribution of final sample of participants across conditions.

final.cond.counts.l1 = l1.f %>%
  rename(Condition = Answer.sample) %>%
  group_by(Condition) %>%
  summarise(n = n())
  
kable(final.cond.counts.l1)
Condition n
learner 13
teacher 18

Get props.

l1.f.props = get_bootstrapped_props(l1.f, "w.ans.cat",
                                    "In person")
l1.f.props %>%
  ggplot(aes(x=response.cat, y = prop, fill = Answer.sample)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_linerange(aes(ymin=cill, ymax=ciul), position = position_dodge(.9)) +
  ylim(0,1) +
  ylab("Prop. participants") +
  xlab("Generalization pattern") +
  ggtitle("In person") +
  themeML + 
  scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1")

Stats: Pearson’s Chi-squared test with Yates’ continuity correction

l1_tab = table(l1.f$w.ans.cat, l1.f$sample)[c("sub", "basic"),]
kable(tidy(chisq.test(l1_tab)))
statistic p.value parameter
0.4926903 0.4827297 1

Exp. 4

Read in data and pre-process.

t3 <- read.csv("../data/anonymized/turk_replication_3_A.csv")

# make factors
t3$Answer.sample <- factor(t3$Answer.sample, 
                           labels=c('teacher','learner')) # sample0 = teacher, sample1 = learner
t3 <- colwise(as.factor)(t3)

The total before any exclusions is N = 200.

Distribution of participants in preliminary exclusion criteria.

t3.exclusion.ns = t3 %>%
              summarise(n_badTraining = length(which(Answer.click1 != "\"correct\"" | 
                                                       Answer.click2 != "\"correct\"")),
                        n_badGeneralize = length(which(Answer.Qcheck1 != 0 | Answer.Qcheck2 != 0)),
                        n_badFilter = length(which(Answer.question1 != 'TRUE' | Answer.question2 != 'TRUE'| 
                                Answer.question3 != 'TRUE' | Answer.question4 != 'TRUE')))

kable(gather(t3.exclusion.ns,"Exclusion category", "n"))
Exclusion category n
n_badTraining 27
n_badGeneralize 17
n_badFilter 10
# subset data
t3.f = t3 %>%
      filter(Answer.click1 == "\"correct\"" & 
                Answer.click2 == "\"correct\"") %>% # take out those who click on wrong training items
      filter(Answer.Qcheck1 == 0 & Answer.Qcheck2 == 0) %>% # take out if missed check generalization question
      filter(Answer.question1 == 'TRUE' & Answer.question2 == 'TRUE' & 
            Answer.question3 == 'TRUE' & Answer.question4 == 'TRUE') # take out those who missed filter question

The total after preliminary exclusions is N = 161.

Categorize response patterns based on response-pattern exclusion criterion.

if(sub_strict){
  sub <-  t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 & 
    t3.f$Answer.Qproper3 == 1 & t3.f$Answer.Qsub1 == 1 &
    t3.f$Answer.Qsub2 == 1 & t3.f$Answer.Qbasic1 == 0 &
    t3.f$Answer.Qbasic2 == 0 & t3.f$Answer.Qbasic3 == 0 
} else {
    sub <-  t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 & 
    t3.f$Answer.Qproper3 == 1 & (t3.f$Answer.Qsub1 == 1 |
    t3.f$Answer.Qsub2 == 1) & t3.f$Answer.Qbasic1 == 0 &
    t3.f$Answer.Qbasic2 == 0 & t3.f$Answer.Qbasic3 == 0 
}

if (basic_strict){
  basic <- t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 & 
    t3.f$Answer.Qproper3 == 1 & t3.f$Answer.Qsub1 == 1 &
    t3.f$Answer.Qsub2 == 1 & t3.f$Answer.Qbasic1 == 1 &
    t3.f$Answer.Qbasic2 == 1 & t3.f$Answer.Qbasic3 == 1 
} else {
  basic <- t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 & 
      t3.f$Answer.Qproper3 == 1 & t3.f$Answer.Qsub1 == 1 &
      t3.f$Answer.Qsub2 == 1 & (t3.f$Answer.Qbasic1 == 1 |
      t3.f$Answer.Qbasic2 == 1 | t3.f$Answer.Qbasic3 == 1)
}

t3.f$w.ans.cat <- "other"
t3.f$w.ans.cat[sub] <- "sub"
t3.f$w.ans.cat[basic] <- "basic"
t3.f$w.ans.cat = factor(t3.f$w.ans.cat, levels = c("sub", "basic", "other"))

# get other responses
others.t3 = filter(t3.f, w.ans.cat == "other")

# filter out other responses
t3.f = t3.f[t3.f$w.ans.cat != "other",]
t3.f$w.ans.cat = droplevels(t3.f$w.ans.cat)

The number of particpants whose generalization strategy could not be catgorized was 21. The final sample was N = 140.

Distribution of final sample of participants across conditions.

final.cond.counts.t3 = t3.f %>%
  rename(Condition = Answer.sample) %>%
  group_by(Condition) %>%
  summarise(n = n())
  
kable(final.cond.counts.t3)
Condition n
teacher 76
learner 64

Get props.

t3.f.props = get_bootstrapped_props(t3.f, "w.ans.cat", 
                                    "Turk #3")
t3.f.props %>%
  ggplot(aes(x = response.cat, y = prop, fill = Answer.sample)) +
  geom_bar(stat = "identity", position=position_dodge()) +
  geom_linerange(aes(ymin = cill, ymax = ciul), 
                 position = position_dodge(.9)) +
  ylim(0,1) +
  ylab("Prop. participants") +
  xlab("Generalization pattern") +
  ggtitle("Turk Replication #3") +
  themeML + 
  theme(legend.position = c(.85, .8)) + 
  scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1")

Stats: Pearson’s Chi-squared test with Yates’ continuity correction

t3_tab = table(t3.f$w.ans.cat, t3.f$Answer.sample)[c("sub", "basic"),]
kable(tidy(chisq.test(t3_tab)))
statistic p.value parameter
6.42033 0.0112821 1

Exp. 5

Read in data and pre-process.

t4 <- read.csv("../data/anonymized/turk_replication_4_A.csv")

# make factors
t4$Answer.sample <- factor(t4$Answer.sample, 
                           labels = c('teacher','learner')) # sample0 = teacher, sample1 = learner
t4 <- colwise(as.factor)(t4)

The total before any exclusions is N = 500.

Distribution of participants in preliminary exclusion criteria.

t4.exclusion.ns = t4 %>%
              summarise(n_badTraining = length(which(Answer.click1 != "\"correct\"" | 
                                                       Answer.click2 != "\"correct\"")),
                        n_badGeneralize = length(which(Answer.Qcheck1 != 0 | Answer.Qcheck2 != 0)),
                        n_badFilter = length(which(Answer.question1 != 'TRUE' | Answer.question2 !='TRUE'| 
                                Answer.question3 != 'TRUE' | Answer.question4 !='TRUE')))
kable(gather(t4.exclusion.ns, "Exclusion category", "n"))
Exclusion category n
n_badTraining 63
n_badGeneralize 40
n_badFilter 21
# subset data
t4.f = t4 %>%
      filter(Answer.click1 == "\"correct\"" & 
                Answer.click2 == "\"correct\"") %>% # take out those who click on wrong training items
      filter(Answer.Qcheck1 == 0 & Answer.Qcheck2 == 0) %>% # take out if missed check generalization question
      filter(Answer.question1 == 'TRUE' & Answer.question2 == 'TRUE' & 
            Answer.question3 == 'TRUE' & Answer.question4 == 'TRUE')# take out those who missed filter question

The total after preliminary exclusions is N = 408.

Categorize response patterns based on response-pattern exclusion criterion.

if (sub_strict) {
  sub <-  t4.f$Answer.Qproper1 == 1 & t4.f$Answer.Qproper2 == 1 & 
    t4.f$Answer.Qproper3 == 1 & t4.f$Answer.Qsub1 == 1 &
    t4.f$Answer.Qsub2 == 1 & t4.f$Answer.Qbasic1 == 0 &
    t4.f$Answer.Qbasic2 == 0 & t4.f$Answer.Qbasic3 == 0 
} else {
    sub <-  t4.f$Answer.Qproper1 == 1 & t4.f$Answer.Qproper2 == 1 & 
    t4.f$Answer.Qproper3 == 1 & (t4.f$Answer.Qsub1 == 1 |
    t4.f$Answer.Qsub2 == 1) & t4.f$Answer.Qbasic1 == 0 &
    t4.f$Answer.Qbasic2 == 0 & t4.f$Answer.Qbasic3 == 0 
}

if (basic_strict){
  basic <- t4.f$Answer.Qproper1 == 1 & t4.f$Answer.Qproper2 == 1 & 
    t4.f$Answer.Qproper3 == 1 & t4.f$Answer.Qsub1 == 1 &
    t4.f$Answer.Qsub2 == 1 & t4.f$Answer.Qbasic1 == 1 &
    t4.f$Answer.Qbasic2 == 1 & t4.f$Answer.Qbasic3 == 1 
} else {
  basic <- t4.f$Answer.Qproper1 == 1 & t4.f$Answer.Qproper2 == 1 & 
      t4.f$Answer.Qproper3 == 1 & t4.f$Answer.Qsub1 == 1 &
      t4.f$Answer.Qsub2 == 1 & (t4.f$Answer.Qbasic1 == 1 |
      t4.f$Answer.Qbasic2 == 1 | t4.f$Answer.Qbasic3 == 1)
}

t4.f$w.ans.cat <- "other"
t4.f$w.ans.cat[sub] <- "sub"
t4.f$w.ans.cat[basic] <- "basic"
t4.f$w.ans.cat = factor(t4.f$w.ans.cat, 
                        levels = c("sub", "basic", "other"))

# filter out "other" responses
others.t4 = filter(t4.f, w.ans.cat == "other")

# filter out other responses
t4.f = t4.f[t4.f$w.ans.cat != "other",]
t4.f$w.ans.cat = droplevels(t4.f$w.ans.cat)

The number of particpants whose generalization strategy could not be catgorized was 61. The final sample was N = 347.

Distribution of final sample of participants across conditions.

final.cond.counts.t4 = t4.f %>%
  rename(Condition = Answer.sample) %>%
  group_by(Condition) %>%
  summarise(n = n())
  
kable(final.cond.counts.t4)
Condition n
teacher 193
learner 154

Get props.

t4.f.props = get_bootstrapped_props(t4.f, "w.ans.cat",
                                    "Turk #4")
t4.f.props %>%
  ggplot(aes(x = response.cat, y = prop, fill = Answer.sample)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_linerange(aes(ymin = cill, ymax = ciul), 
                 position = position_dodge(.9)) +
  ylim(0,1) +
  ylab("Prop. participants") +
  xlab("Generalization pattern") +
  ggtitle("Turk Replication #4") +
  themeML + 
  theme(legend.position = c(.85,.8)) + 
  scale_fill_brewer(name = "Sampling\nCondition", palette = "Set1")

Stats: Pearson’s Chi-squared test with Yates’ continuity correction

t4_tab = table(t4.f$w.ans.cat, t4.f$Answer.sample)[c("sub", "basic"),]
kable(tidy(chisq.test(t4_tab)))
statistic p.value parameter
24.49194 7e-07 1



Meta-Analytic Synthesis

All proportions

# merge together all raw data 
xt_data.adults$exp = "X&T adults"
xt_data.children$exp = "X&T children"
t1.f$exp = "Exp. 1"
t2.f$exp = "Exp. 2"
l1.f$exp = "Exp. 3"
t3.f$exp = "Exp. 4"
t4.f$exp = "Exp. 5"
xt_data.adults$w.ans.cat = xt_data.adults$response
xt_data.children$w.ans.cat = xt_data.children$response

all.data.f = rbind(xt_data.children[,c("Answer.sample", "w.ans.cat", "exp")],
                  xt_data.adults[,c("Answer.sample", "w.ans.cat", "exp")],
                   t1.f[,c("Answer.sample", "w.ans.cat", "exp")],
                   t2.f[,c("Answer.sample", "w.ans.cat", "exp")],
                   l1.f[,c("Answer.sample", "w.ans.cat", "exp")],
                   t3.f[,c("Answer.sample", "w.ans.cat", "exp")],
                   t4.f[,c("Answer.sample", "w.ans.cat", "exp")])
all.data.f <- colwise(as.factor)(all.data.f)

# get props by experiment
all.data.f.props = all.data.f %>%
                    group_by(exp) %>%
                    do(get_bootstrapped_props(.,"w.ans.cat", .$exp[1])) 

# re-order experiments
all.data.f.props$exp = factor(all.data.f.props$exp,
                           levels(all.data.f.props$exp)[c(7, 6, 1:5)]) 
all.data.f.props$response.cat  = revalue(all.data.f.props$response.cat,
                                         c("sub" = "sub."))

#pdf("../writeup/figures/FIG_2.pdf", height = 4, width = 12)
all.data.f.props %>%
  ggplot(aes(x = response.cat, y = prop, 
                             fill = Answer.sample)) +
  geom_bar(stat = "identity", 
           position = position_dodge()) +
  facet_grid(. ~ exp) +
  geom_rect(data = all.data.f.props[21:28,],
            fill = "gray", 
            xmin = -Inf, xmax = Inf, ymin = -Inf,
            ymax = Inf, alpha = 0.07) +
  geom_linerange(aes(ymin = cill, ymax = ciul), position = position_dodge(.9)) +
  ylim(0, 1) +
  ylab("Prop. Participants") +
  xlab("Generalization Pattern") +
  themeML + 
  scale_fill_brewer(name = "Sampling\nCondition", 
                    palette = "Set1") 

#dev.off()

Grand effect size estimate

# spread props and n_cond
all.data.f.props.basic = all.data.f.props %>%
                        filter(response.cat == "basic") %>%
                        select(-cill, -ciul, -response.cat, -prop, -n) %>%
                        spread(Answer.sample, n_cond, fill = F) %>%
                        rename(teacher_n = teacher, learner_n = learner)

all.data.f.props.basic = all.data.f.props  %>% 
                        filter(response.cat == "basic") %>%
                        select(-cill, -ciul, -response.cat,
                               -n_cond, -n) %>%
                        spread(Answer.sample, prop, fill = F) %>%
                        rename(teacher_prop = teacher,
                               learner_prop = learner) %>%
                        left_join(all.data.f.props.basic)

# get all effect sizes (compute.es package)
all.es = propes(all.data.f.props.basic$learner_prop, 
           all.data.f.props.basic$teacher_prop, 
           all.data.f.props.basic$learner_n,
           all.data.f.props.basic$teacher_n,
           verbose = F)

all.es$exp = all.data.f.props.basic$exp # add exp ids

All experiments

Fixed effects

# metafor package
fixed.effects.d = rma(d, var.d, data = all.es, method = "FE")
fixed.effects.d
## 
## Fixed-Effects Model (k = 7)
## 
## Test for Heterogeneity: 
## Q(df = 6) = 9.8747, p-val = 0.1300
## 
## Model Results:
## 
## estimate       se     zval     pval    ci.lb    ci.ub          
##   0.5257   0.0888   5.9211   <.0001   0.3517   0.6998      *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Random effects

random.effects.d = rma(d, var.d, data = all.es)
random.effects.d
## 
## Random-Effects Model (k = 7; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 0.0375 (SE = 0.0596)
## tau (square root of estimated tau^2 value):      0.1936
## I^2 (total heterogeneity / total variability):   36.89%
## H^2 (total variability / sampling variability):  1.58
## 
## Test for Heterogeneity: 
## Q(df = 6) = 9.8747, p-val = 0.1300
## 
## Model Results:
## 
## estimate       se     zval     pval    ci.lb    ci.ub          
##   0.5333   0.1273   4.1880   <.0001   0.2837   0.7829      *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pdf("../writeup/figures/FIG_3.pdf", height = 5, width = 8)
par(cex = 1, font = 1)
forest(random.effects.d, 
       slab = all.es$exp,
       mlab = "All", 
       xlab = "Cohen's d")
par(font = 2)
text(-3.6, 8.55, "Experiment")
text(5.8, 8.55, "Cohen's d [95% CI]")

addpoly(random.effects.d, row = -1, cex = .75, 
        annotate = F,  col = "red", mlab = "", efac = 2)

#dev.off()

Experiments with “good” stimuli only

Fixed effects

fixed.effects.d.sub= rma(d, var.d, data = all.es[c(-3:-5),], method = "FE")
fixed.effects.d.sub
## 
## Fixed-Effects Model (k = 4)
## 
## Test for Heterogeneity: 
## Q(df = 3) = 2.2724, p-val = 0.5178
## 
## Model Results:
## 
## estimate       se     zval     pval    ci.lb    ci.ub          
##   0.7209   0.1151   6.2620   <.0001   0.4952   0.9465      *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Random effects

random.effects.d.sub = rma(d, var.d, data = all.es[c(-3:-5),])
random.effects.d.sub
## 
## Random-Effects Model (k = 4; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 0 (SE = 0.0476)
## tau (square root of estimated tau^2 value):      0
## I^2 (total heterogeneity / total variability):   0.00%
## H^2 (total variability / sampling variability):  1.00
## 
## Test for Heterogeneity: 
## Q(df = 3) = 2.2724, p-val = 0.5178
## 
## Model Results:
## 
## estimate       se     zval     pval    ci.lb    ci.ub          
##   0.7209   0.1151   6.2620   <.0001   0.4952   0.9465      *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pdf("../writeup/figures/FIG_4.pdf", height = 5, width = 8)
par(cex = 1, font = 1)
forest(random.effects.d.sub, 
       slab = all.es[c(-3:-5),]$exp,
       mlab = "All", 
       xlab = "Cohen's d")
par(font = 2)
text(-2.5, 5.8, "Experiment")
text(5, 5.8, "Cohen's d [95% CI]")

addpoly(random.effects.d.sub, row = -1, cex = .75, 
        annotate = F,  col = "red", mlab = "", efac = 2)

#dev.off()

Power analysis for Exp. 5

ES based on X&T adults and Exp 1-4

# Note: Here we use r to estimate effect size, which is formally equivalent to phi. Phi is formally equivalent to omega for 2 x 2 tables, which is the effect size estimator used in the pwr.chisq.test function. Elsewhere, we work with effect sizes using d. 

# fixed effects
fixed.effects = rma(r, var.r, data = all.es[2:6,], method = "FE")
power.analysis = pwr.chisq.test(w = fixed.effects$b,
                                df = 1, 
                                sig.level = 0.05,
                                power = .95)
power.analysis
## 
##      Chi squared power calculation 
## 
##               w = 0.215
##               N = 281.1186
##              df = 1
##       sig.level = 0.05
##           power = 0.95
## 
## NOTE: N is the number of observations
print(paste("N for .95 power (fixed effects):",
            power.analysis$N))
## [1] "N for .95 power (fixed effects): 281.118636803261"
print(paste("Extra: ", .35 * power.analysis$N))
## [1] "Extra:  98.3915228811413"
# random effects
random.effects = rma(r, var.r, data = all.es[2:6,])
power.analysis = pwr.chisq.test(w = random.effects$b, 
                                df = 1, 
                                sig.level = 0.05, 
                                power = .95)
power.analysis
## 
##      Chi squared power calculation 
## 
##               w = 0.2264005
##               N = 253.5198
##              df = 1
##       sig.level = 0.05
##           power = 0.95
## 
## NOTE: N is the number of observations
print(paste("N for .95 power (random effects):",
            power.analysis$N))
## [1] "N for .95 power (random effects): 253.519827834981"
print(paste("Extra: ", .35 * power.analysis$N))
## [1] "Extra:  88.7319397422435"

Thus, by the most conservative estimate (criteria: basic_strict & sub_strict = T; fixed effect), we need a N of 363 to achieve 95% power. With approximately 35% data loss, this means we need to run an extra 127 to achieve this power. This is approximately 500 participants.



Additonal Analyses

Moderators

One possible reason for the difference between our observed effect sizes and the original is the presence of moderators. There are two notable differences between our experiments and the original: the online nature of several of our experiments (“person effect”), and the different stimuli used in Experiments 1-3 relative to Experiments 4 and 5 (“stimuli effect”). Here we test whether these moderators are reliable, and the number of participants that would be needed in an additional experiment that would allows us to directly test the role of this moderator.

Person effect – Does online vs. in lab matter?

# Welch's t-test
welch_t_test <- function(d1, d2, s1, s2, n1, n2){ # s = variance
  t = (d1 - d2)/sqrt((s1/n1) + (s2/n2))
  df = ((s1/n1) + (s2/n2))^2 / (((s1/n1)^2/(n1-1)) + ((s2/n2)^2/(n2-1)))
  p = 2*(1-pt(abs(t),df))
  results = c(t, df, p)
  names(results) = c("t", "df", "p")
  
  results
}

## compare Exp 1 and 2 (online) to Exp. 3 (in lab)
# Exp. 1 and 2
mod = predict(rma(d, var.d, data = all.es[3:4,])) # FE vs. RE doesn't matter.

d1 = mod$pred  
n1 = (all.es[all.es$exp == "Exp. 1", "N.total"] +
        all.es[all.es$exp == "Exp. 2", "N.total"])/2 # mean(exp1, exp2)
s1 = mod$se^2

# Exp. 3
d2 = all.es[all.es$exp == "Exp. 3", "d"]  
n2 = all.es[all.es$exp == "Exp. 3", "N.total"]  
s2 = all.es[all.es$exp == "Exp. 3", "var.d"]

# compare
welch_t_test(d1, d2, s1, s2, n1, n2)
##            t           df            p 
## -3.061776631 31.598338647  0.004467422

Person effect power – How many additional participants would we need to detect this difference?

Get person effect using Exp 3 vs. 1 and 2.

min_person_effect = all.es[all.es$exp == "Exp. 3", "d"] - 
  all.es[all.es$exp == "Exp. 2", "d"] 

max_person_effect = all.es[all.es$exp == "Exp. 3", "d"] - 
  all.es[all.es$exp == "Exp. 1", "d"] 

Get sample size of experiment 4 and 5

N_4_and_5 = all.es[all.es$exp == "Exp. 4", "N.total"] + 
  all.es[all.es$exp == "Exp. 5", "N.total"] 

Do power analysis to estimate Exp. 6 n, based on the min effect size estimate.

try(pwr.t2n.test(d = min_person_effect, n1 = N_4_and_5, power = .8))

Do power analysis to estimate Exp. 6 n, based on the max effect size estimate.

pwr.t2n.test(d = max_person_effect, n1 = N_4_and_5, power = .8)
## 
##      t test power calculation 
## 
##              n1 = 487
##              n2 = 126.5189
##               d = 0.28
##       sig.level = 0.05
##           power = 0.8
##     alternative = two.sided
#basic_strict & sub_strict = T; max_person_effect) 80.4
#basic_strict & sub_strict = T; min_person_effect) 45
#basic_strict & sub_strict = F; max_person_effect) NA
#basic_strict & sub_strict = F; min_person_effect) NA
#basic_strict = T & sub_strict = F; max_person_effect) 212.707
#basic_strict = T & sub_strict = F; min_person_effect) 1185.991
#basic_strict = F & sub_strict = T; max_person_effect)  126.51
#basic_strict  = F & sub_strict = T; min_person_effect) NA

min_E6_n = 45 + (.25  * 45)
max_E6_n = 126.51+ (.25 * 126.51)

Depending on the criteria used, we would need somewhere between 45 - 1186 participants to detect a person effect. Assuming 25% data loss (based on Exp. 3 experiments), we would need approximatiely 60-160 participants. Under the criteria reported in the paper (“liberal”; basic_strict = F & sub_strict = T), we would need approximately 160 participants, again assuming 25% data loss.

Pattern of “other” responses

response.patterns.t1 = others.t1 %>%
  select(workerids, Answer.sample, c(13,14,21,23)) %>%
  mutate(pattern = paste(Answer.Qwsmm1,  Answer.Qwsmm2, 
                         Answer.Qwsm1, Answer.Qwsm2, sep = "")) %>%
  mutate(pattern_basic = paste(Answer.Qwsmm1,  Answer.Qwsmm2, sep = "")) %>%
  mutate(pattern_basic_null = ifelse(pattern_basic == "00", "no basic, inconsistent sub.", 
                                     "some basic, inconsistent sub.")) %>%
  select(workerids, Answer.sample, pattern, pattern_basic, pattern_basic_null) %>%
  mutate(pattern = as.factor(pattern))

pattern.sum.t1 = response.patterns.t1 %>%
  group_by(Answer.sample, pattern_basic_null) %>%
  summarise(n = n()) %>%
  mutate(prop = n/sum(n))%>%
  mutate(exp = "Exp. 1")

response.patterns.t2 = others.t2 %>%
  select(workerids, Answer.sample, c(2,3,5,6,7,15,16,17)) %>%
  mutate(pattern = paste(Answer.Qbasic1,  Answer.Qbasic2, 
                         Answer.Qbasic3, Answer.Qsub1, Answer.Qsub2, Answer.Qproper1,
                         Answer.Qproper2, Answer.Qproper3, sep = "")) %>%
  mutate(pattern_basic = paste(Answer.Qbasic1,  Answer.Qbasic2, 
                         Answer.Qbasic3, sep = "")) %>%
  mutate(pattern_basic_null = ifelse(pattern_basic == "000", "no basic, inconsistent sub.", 
                                     "some basic, inconsistent sub.")) %>%
  select(workerids, Answer.sample, pattern, pattern_basic, pattern_basic_null) %>%
  mutate(pattern = as.factor(pattern))

pattern.sum.t2 = response.patterns.t2 %>%
  group_by(Answer.sample, pattern_basic_null) %>%
  summarise(n = n()) %>%
  mutate(prop = n/sum(n)) %>%
  mutate(exp = "Exp. 2")

response.patterns.l1 = others.l1 %>%
  select(subjno, Answer.sample, c(10,12:15,17:19)) %>%
  mutate(pattern = paste(basic1_a, basic1_b, basic2_a, basic2_b,
                         sub1_a, sub1_b, sub2_a, sub2_b, sep = "")) %>%
    mutate(pattern_basic = paste(basic1_a, basic1_b, basic2_a, basic2_b, sep = "")) %>%
   mutate(pattern_basic_null = ifelse(pattern_basic == "0000", "no basic, inconsistent sub.", 
                                     "some basic, inconsistent sub.")) %>%
  select(subjno, Answer.sample, pattern, pattern_basic, pattern_basic_null) %>%
  mutate(pattern = as.factor(pattern))

pattern.sum.l1 = response.patterns.l1 %>%
  group_by(Answer.sample, pattern_basic_null) %>%
  summarise(n = n()) %>%
  mutate(prop = n/sum(n)) %>%
  mutate(exp = "Exp. 3")

response.patterns.t3 = others.t3 %>%
  select(workerids, Answer.sample, c(18,20:27)) %>%
  mutate(pattern = paste(Answer.Qbasic1,  Answer.Qbasic2, 
                         Answer.Qbasic3, Answer.Qsub1, Answer.Qsub2, Answer.Qproper1,
                         Answer.Qproper2, Answer.Qproper3, sep = "")) %>%
  mutate(pattern_basic = paste(Answer.Qbasic1,  Answer.Qbasic2, 
                         Answer.Qbasic3, sep = "")) %>%
  mutate(pattern_basic_null = ifelse(pattern_basic == "000", "no basic, inconsistent sub.", 
                                     "some basic, inconsistent sub.")) %>%
  select(workerids, Answer.sample, pattern, pattern_basic, pattern_basic_null) %>%
  mutate(pattern = as.factor(pattern))

pattern.sum.t3 = response.patterns.t3 %>%
  group_by(Answer.sample, pattern_basic_null) %>%
  summarise(n = n()) %>%
  mutate(prop = n/sum(n)) %>%
  mutate(exp = "Exp. 4")

response.patterns.t4 = others.t4 %>%
  select(workerids, Answer.sample, c(18,20:27)) %>%
  mutate(pattern = paste(Answer.Qbasic1,  Answer.Qbasic2, 
                         Answer.Qbasic3, Answer.Qsub1, 
                         Answer.Qsub2, Answer.Qproper1,
                         Answer.Qproper2, Answer.Qproper3, sep = "")) %>%
  mutate(pattern_basic = paste(Answer.Qbasic1,  Answer.Qbasic2, 
                         Answer.Qbasic3, sep = "")) %>%
  mutate(pattern_basic_null = ifelse(pattern_basic == "000", "no basic, inconsistent sub.", 
                                     "some basic, inconsistent sub.")) %>%
  select(workerids, Answer.sample, pattern, pattern_basic, pattern_basic_null) %>%
  mutate(pattern = as.factor(pattern))

pattern.sum.t4 = response.patterns.t4 %>%
  group_by(Answer.sample, pattern_basic_null) %>%
  summarise(n = n()) %>%
  mutate(prop = n/sum(n)) %>%
  mutate(exp = "Exp. 5")

#Exp.1: [basic-1, basic-2, sub-1, sub-2]
#Exp. 2, 4 & 5: [basic-1, basic-2, basic-3, sub-1, sub-2, proper-1, proper-2, proper-3]
#Exp. 3: [basic-1a, basic-1b, basic-2a, basic-2b, sub-1a, sub-1b, sub-2a, sub-2b ]

all.pattern_basic_null = rbind(pattern.sum.t1, pattern.sum.t2, 
                               pattern.sum.l1, pattern.sum.t3, pattern.sum.t4)

all.pattern_basic_null %>%
  group_by(pattern_basic_null) %>%
  summarise(n = sum(n)) %>%
  mutate(prop = n/sum(n)) 
## Source: local data frame [2 x 3]
## 
##              pattern_basic_null     n       prop
##                           (chr) (int)      (dbl)
## 1   no basic, inconsistent sub.   188 0.94949495
## 2 some basic, inconsistent sub.    10 0.05050505
ggplot(all.pattern_basic_null, aes(x = Answer.sample, 
                                   y= prop,
                                   fill = pattern_basic_null)) + 
  geom_bar(stat = "identity", position = "stack") +
  facet_grid(.~exp) +
  scale_fill_discrete(name="") +
  ylab("Prop excluded participants") +
  xlab("Condition") +
  ggtitle("Exclusion patterns") +
  scale_fill_brewer(palette = "Set1") +
  theme(legend.position="bottom", legend.title=element_blank()) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  themeML