Xu and Tenenbaum 2007b Replication

Bayesian models of word learning predict that learners use rational statistical inference to identify the meaning of a new word. Under these models, learners infer a distribution over all possible word meanings based on observing a limited set of exemplars of a word. One prediction of this account is that different causal models underlying the generation of data might lead to different inferences about word meaning. These different causal models (formally instantiated in the model’s likelihood) are known as sampling conditions. Under ‘weak sampling,’ examples are understood to be drawn randomly from all possible examples. In contrast, under ‘strong sampling,’ examples are understood to be drawn only from the concept’s extension. Critically, the model predicts that learners should generalize more narrowly under strong sampling than under weak sampling.

In the present studies, we explored these predictions by replicating Xu and Tenenbaum (2007b) in a manipulation of the communicative context of word learning. In the weak sampling condition, the naive learner guesses about the training set of exemplars. In the strong sampling condition, a knowledgeable teacher identifies a training set of exemplars. As predicted by the model, Xu and Tenenbaum (2007b) found that both adults and children overwhelmingly generalized more narrowly under strong sampling, than under weak sampling. We completed two online and an in-lab replication of this study, and found evidence for a pattern similar to the original data, but with far more variability across participant than observed in the original study.

Studies:
     Turk #1
     Turk #2
     In lab

Turk Replication #1

Read in data and pre-process.

Filter.

# subset data
d1 <- subset(d1, d1$Answer.label == 'label') # take out nolabel trials (study 4)
d1 <- subset(d1, !duplicated(d1$workerid)) # take out those who did it twice
d1 <- subset(d1, Answer.click1 == "\"correct\"" & Answer.click2 == "\"correct\"") # take out if selected wrong training items
d1 <- subset(d1, Answer.Qwcheck == 0) # take out if missed check generalization question
d1 <- subset(d1, Answer.question1 == 'true') # take out those who missed filter question

Categorize response patterns based on criterion.

criterion = "unstrict1"  # category criterion

d1 <- d1[d1$Answer.label== "label",]
numsubj = dim(d1)[1]

if (criterion == "strict"){
  proper <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 & 
    (d1$Answer.Qwsm1 == 0 & d1$Answer.Qwsm2 == 0)
  sub <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 & 
    d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1
  basic <- (d1$Answer.Qwsmm1 == 1 & d1$Answer.Qwsmm2 == 1) &
    d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1 
  
} else if (criterion == "unstrict2"){
  proper <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 & 
    (d1$Answer.Qwsm1 == 0 & d1$Answer.Qwsm2 == 0)
  sub <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 & 
    (d1$Answer.Qwsm1 == 1 | d1$Answer.Qwsm2 == 1)
  basic <- (d1$Answer.Qwsmm1 == 1 | d1$Answer.Qwsmm2 == 1) &
    (d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1)
  
} else if (criterion == "unstrict1"){
  proper <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 & 
    (d1$Answer.Qwsm1 == 0 | d1$Answer.Qwsm2 == 0)
  sub <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 & 
    d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1
  basic <- (d1$Answer.Qwsmm1 == 1 | d1$Answer.Qwsmm2 == 1) &
    d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1     
  
} else if(criterion == "x&t"){
  sub <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 & 
    d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1
  basic <- (d1$Answer.Qwsmm1 == 1 & d1$Answer.Qwsmm2 == 1) &
    d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1         
}
d1$w.ans.cat <- "other"
d1$w.ans.cat[sub] <- "sub"
d1$w.ans.cat[basic] <- "basic"
d1$w.ans.cat[proper] <- "proper"
d1$w.ans.cat = factor(d1$w.ans.cat, levels=c("proper", "sub", "basic", "other"))

# get proportions and bootstrapped CIS
M <- data.frame()

for (k in 1:length(levels(d1$w.ans.cat))){
    M = rbind(M, ddply(d1, .(Answer.sample), 
                     function (d) {getPs(d$w.ans.cat, levels(d$w.ans.cat)[k])}))
}

Plot.

Turk Replication #2

Read in data and pre-process

Filter.

# subset data
d2 <- subset(d2, Answer.click1 == "\"correct\"" & Answer.click2 == "\"correct\"") # take out those who click on wrong training items
d2 <- subset(d2, Answer.Qcheck1 == 0 & Answer.Qcheck2 == 0) # take out if missed check generalization question
d2 <- subset(d2, Answer.question1 == 'true' & Answer.question2 =='true' & 
  Answer.question3 == 'true' & Answer.question4=='true' ) # take out those who missed filter question

Categorize response patterns based on criterion.

criterion = "unstrictold" 
numsubj = dim(d2)[1]

if (criterion == "unstrictold"){
  proper <- (d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 & 
    d2$Answer.Qproper3 == 1) & d2$Answer.Qsub1 == 0 &
        d2$Answer.Qsub2 == 0 & d2$Answer.Qbasic1 == 0 &
        d2$Answer.Qbasic2 == 0 & d2$Answer.Qbasic3 == 0 
    sub <-  d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 & 
        d2$Answer.Qproper3 == 1 & d2$Answer.Qsub1 == 1 &
        d2$Answer.Qsub2 == 1 & d2$Answer.Qbasic1 == 0 &
        d2$Answer.Qbasic2 == 0 & d2$Answer.Qbasic3 == 0 
    basic <- d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 & 
        d2$Answer.Qproper3 == 1 & d2$Answer.Qsub1 == 1 &
        d2$Answer.Qsub2 == 1 & d2$Answer.Qbasic1 == 1 &
        d2$Answer.Qbasic2 == 1 & d2$Answer.Qbasic3 == 1 
} else if (criterion == "unstrictnew") { 
    proper <- (d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 & 
        d2$Answer.Qproper3 == 1) & (d2$Answer.Qsub1 == 0 |
        d2$Answer.Qsub2 == 0) & d2$Answer.Qbasic1 == 0 &
        d2$Answer.Qbasic2 == 0 & d2$Answer.Qbasic3 == 0 
    sub <-  d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 & 
        d2$Answer.Qproper3 == 1 & d2$Answer.Qsub1 == 1 &
        d2$Answer.Qsub2 == 1 & d2$Answer.Qbasic1 == 0 &
        d2$Answer.Qbasic2 == 0 & d2$Answer.Qbasic3 == 0 
    basic <- d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 & 
        d2$Answer.Qproper3 == 1 & d2$Answer.Qsub1 == 1 &
        d2$Answer.Qsub2 == 1 & (d2$Answer.Qbasic1 == 1 |
        d2$Answer.Qbasic2 == 1 | d2$Answer.Qbasic3 == 1)
    
}

d2$w.ans.cat <- "other"
d2$w.ans.cat[proper] <- "proper"
d2$w.ans.cat[sub] <- "sub"
d2$w.ans.cat[basic] <- "basic"
d2$w.ans.cat <- factor(d2$w.ans.cat,c("proper","sub","basic","other"))

# get proportions and bootstrapped CIS
M2 <- data.frame()
for (k in 1:length(levels(d2$w.ans.cat))){
    M2 = rbind(M2, ddply(d2, .(Answer.sample), 
                     function (d) {getPs(d$w.ans.cat, levels(d$w.ans.cat)[k])}))
}

Plot.

In Lab Replication

l1 <- read.csv("../data/inlab_replication.csv")

proper <- l1$basic1_a == 0 & l1$basic2_a == 0 & 
  l1$basic1_b == 0 & l1$basic2_b == 0 & 
  (l1$sub1_a == 0 | l1$sub2_a == 0 | l1$sub1_b == 0 | l1$sub2_b == 0)
sub <- l1$basic1_a == 0 & l1$basic2_a == 0 & 
  l1$basic1_b == 0 & l1$basic2_b == 0 & 
  (l1$sub1_a == 1 & l1$sub2_a == 1 & l1$sub1_b == 1 & l1$sub2_b == 1)
basic <- (l1$basic1_a == 1 | l1$basic2_a == 1 | 
            l1$basic1_b == 1 | l1$basic2_b == 1) & 
  (l1$sub1_a == 1 & l1$sub2_a == 1 & l1$sub1_b == 1 & l1$sub2_b == 1)

l1$ans.cat <- "other"
l1$ans.cat[proper] <- "proper"
l1$ans.cat[sub] <- "sub"
l1$ans.cat[basic] <- "basic"
l1$ans.cat <- factor(l1$ans.cat,c("proper","sub","basic","other"))

# get proportions and bootstrapped CIS
M3 <- data.frame()
for (k in 1:length(levels(l1$ans.cat))){
    M3 = rbind(M3, ddply(l1, .(sample), 
                     function (d) {getPs(d$ans.cat, levels(d$ans.cat)[k])}))
}

Plot.