==== May 06, 2015 ====
In the present studies, we attempted to replicate Xu and Tenenbaum (2007b) in a manipulation of the communicative context of word learning. In the weak sampling condition, the naive learner guesses about the training set of exemplars. In the strong sampling condition, a knowledgeable teacher identifies a training set of exemplars. As predicted by the model, Xu and Tenenbaum found that both adults and children overwhelmingly generalized more narrowly under strong sampling, than under weak sampling. We completed two online and an in-lab replication of this study, and found evidence for a pattern similar to the original data, but with far more variability across participant than observed in the original study.
Note here that we are using a more liberal definition of basic generalization than Xu and Tenenbaum (2007b) – we count a generalization as basic if the participant generalization to the basic level on any basic level question.
Studies:
     (1) Turk #1
     (2) Turk #2
     (3) In lab
Read in data and pre-process.
Filter.
# subset data
d1 <- subset(d1, d1$Answer.label == 'label') # take out nolabel trials (study 4)
d1 <- subset(d1, !duplicated(d1$workerid)) # take out those who did it twice
d1 <- subset(d1, Answer.click1 == "\"correct\"" & Answer.click2 == "\"correct\"") # take out if selected wrong training items
d1 <- subset(d1, Answer.Qwcheck == 0) # take out if missed check generalization question
d1 <- subset(d1, Answer.question1 == 'true') # take out those who missed filter question
Categorize response patterns based on criterion.
criterion = "unstrict1" # category criterion
d1 <- d1[d1$Answer.label== "label",]
numsubj = dim(d1)[1]
if (criterion == "strict"){
proper <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 &
(d1$Answer.Qwsm1 == 0 & d1$Answer.Qwsm2 == 0)
sub <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 &
d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1
basic <- (d1$Answer.Qwsmm1 == 1 & d1$Answer.Qwsmm2 == 1) &
d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1
} else if (criterion == "unstrict2"){
proper <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 &
(d1$Answer.Qwsm1 == 0 & d1$Answer.Qwsm2 == 0)
sub <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 &
(d1$Answer.Qwsm1 == 1 | d1$Answer.Qwsm2 == 1)
basic <- (d1$Answer.Qwsmm1 == 1 | d1$Answer.Qwsmm2 == 1) &
(d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1)
} else if (criterion == "unstrict1"){
proper <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 &
(d1$Answer.Qwsm1 == 0 | d1$Answer.Qwsm2 == 0)
sub <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 &
d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1
basic <- (d1$Answer.Qwsmm1 == 1 | d1$Answer.Qwsmm2 == 1) &
d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1
} else if(criterion == "x&t"){
sub <- d1$Answer.Qwsmm1 == 0 & d1$Answer.Qwsmm2 == 0 &
d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1
basic <- (d1$Answer.Qwsmm1 == 1 & d1$Answer.Qwsmm2 == 1) &
d1$Answer.Qwsm1 == 1 & d1$Answer.Qwsm2 == 1
}
d1$w.ans.cat <- "other"
d1$w.ans.cat[sub] <- "sub"
d1$w.ans.cat[basic] <- "basic"
#d1$w.ans.cat[proper] <- "proper"
d1$w.ans.cat = factor(d1$w.ans.cat, levels=c("proper", "sub", "basic", "other"))
d1 = d1[d1$w.ans.cat != "other",]
# get proportions and bootstrapped CIS
M <- data.frame()
for (k in 1:length(levels(d1$w.ans.cat))){
M = rbind(M, ddply(d1, .(Answer.sample),
function (d) {getPs(d$w.ans.cat, levels(d$w.ans.cat)[k])}))
}
M$exp = "Turk #1"
Plot.
Stats No difference.
d1_counts= table(d1$w.ans.cat, d1$Answer.sample)[2:3,]
chisq.test(d1_counts)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: d1_counts
## X-squared = 0.6245, df = 1, p-value = 0.4294
Read in data and pre-process
Filter.
# subset data
d2 <- subset(d2, Answer.click1 == "\"correct\"" & Answer.click2 == "\"correct\"") # take out those who click on wrong training items
d2 <- subset(d2, Answer.Qcheck1 == 0 & Answer.Qcheck2 == 0) # take out if missed check generalization question
d2 <- subset(d2, Answer.question1 == 'true' & Answer.question2 =='true' &
Answer.question3 == 'true' & Answer.question4=='true' ) # take out those who missed filter question
Categorize response patterns based on criterion.
criterion = "unstrictnew"
numsubj = dim(d2)[1]
if (criterion == "unstrictold"){
proper <- (d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 &
d2$Answer.Qproper3 == 1) & d2$Answer.Qsub1 == 0 &
d2$Answer.Qsub2 == 0 & d2$Answer.Qbasic1 == 0 &
d2$Answer.Qbasic2 == 0 & d2$Answer.Qbasic3 == 0
sub <- d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 &
d2$Answer.Qproper3 == 1 & d2$Answer.Qsub1 == 1 &
d2$Answer.Qsub2 == 1 & d2$Answer.Qbasic1 == 0 &
d2$Answer.Qbasic2 == 0 & d2$Answer.Qbasic3 == 0
basic <- d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 &
d2$Answer.Qproper3 == 1 & d2$Answer.Qsub1 == 1 &
d2$Answer.Qsub2 == 1 & d2$Answer.Qbasic1 == 1 &
d2$Answer.Qbasic2 == 1 & d2$Answer.Qbasic3 == 1
} else if (criterion == "unstrictnew") {
proper <- (d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 &
d2$Answer.Qproper3 == 1) & (d2$Answer.Qsub1 == 0 |
d2$Answer.Qsub2 == 0) & d2$Answer.Qbasic1 == 0 &
d2$Answer.Qbasic2 == 0 & d2$Answer.Qbasic3 == 0
sub <- d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 &
d2$Answer.Qproper3 == 1 & d2$Answer.Qsub1 == 1 &
d2$Answer.Qsub2 == 1 & d2$Answer.Qbasic1 == 0 &
d2$Answer.Qbasic2 == 0 & d2$Answer.Qbasic3 == 0
basic <- d2$Answer.Qproper1 == 1 & d2$Answer.Qproper2 == 1 &
d2$Answer.Qproper3 == 1 & d2$Answer.Qsub1 == 1 &
d2$Answer.Qsub2 == 1 & (d2$Answer.Qbasic1 == 1 |
d2$Answer.Qbasic2 == 1 | d2$Answer.Qbasic3 == 1)
}
d2$w.ans.cat <- "other"
#d2$w.ans.cat[proper] <- "proper"
d2$w.ans.cat[sub] <- "sub"
d2$w.ans.cat[basic] <- "basic"
d2$w.ans.cat <- factor(d2$w.ans.cat,c("proper","sub","basic","other"))
d2 = d2[d2$w.ans.cat != "other",]
# get proportions and bootstrapped CIS
M2 <- data.frame()
for (k in 1:length(levels(d2$w.ans.cat))){
M2 = rbind(M2, ddply(d2, .(Answer.sample),
function (d) {getPs(d$w.ans.cat, levels(d$w.ans.cat)[k])}))
}
M2$exp = "Turk #2"
Plot.
Stats No difference.
d2_counts= table(d2$w.ans.cat, d2$Answer.sample)[2:3,]
chisq.test(d2_counts)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: d2_counts
## X-squared = 0.894, df = 1, p-value = 0.3444
l1 <- read.csv("../data/inlab_replication.csv")
#proper <- l1$basic1_a == 0 & l1$basic2_a == 0 &
# l1$basic1_b == 0 & l1$basic2_b == 0 &
# (l1$sub1_a == 0 | l1$sub2_a == 0 | l1$sub1_b == 0 | l1$sub2_b == 0)
sub <- l1$basic1_a == 0 & l1$basic2_a == 0 &
l1$basic1_b == 0 & l1$basic2_b == 0 &
(l1$sub1_a == 1 & l1$sub2_a == 1 & l1$sub1_b == 1 & l1$sub2_b == 1)
basic <- (l1$basic1_a == 1 | l1$basic2_a == 1 |
l1$basic1_b == 1 | l1$basic2_b == 1) &
(l1$sub1_a == 1 & l1$sub2_a == 1 & l1$sub1_b == 1 & l1$sub2_b == 1)
l1$ans.cat <- "other"
#l1$ans.cat[proper] <- "proper"
l1$ans.cat[sub] <- "sub"
l1$ans.cat[basic] <- "basic"
l1$ans.cat <- factor(l1$ans.cat,c("proper","sub","basic","other"))
l1 = l1[l1$ans.cat != "other",]
# get proportions and bootstrapped CIS
M3 <- data.frame()
for (k in 1:length(levels(l1$ans.cat))){
M3 = rbind(M3, ddply(l1, .(sample),
function (d) {getPs(d$ans.cat, levels(d$ans.cat)[k])}))
}
M3$exp = "In Lab"
Plot.
Stats No difference.
l1_counts= table(l1$ans.cat, l1$sample)[2:3,]
chisq.test(l1_counts)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: l1_counts
## X-squared = 0.4076, df = 1, p-value = 0.5232
X and T data
# adults
n_responses = 14 # N=7/condition, 2 responses/participant
prop_sub_teacher= .928
n_sub_teacher = round(prop_sub_teacher * n_responses)
n_basic_teacher = n_responses - n_sub_teacher
teacher_xt_data = as.factor(c(rep("sub", n_sub_teacher), rep("basic", n_basic_teacher)))
prop_sub_learner = .357
n_sub_learner = round(prop_sub_learner * n_responses)
n_basic_learner= n_responses - n_sub_learner
learner_xt_data = as.factor(c(rep("sub", n_sub_learner), rep("basic", n_basic_learner)))
#xt_data = data.frame(teacher = teacher_xt_data, learner = learner_xt_data )
xt_data = data.frame(response=c(as.character(teacher_xt_data),as.character(learner_xt_data)))
xt_data$sample[1:14] = "teacher"
xt_data$sample[15:28] = "learner"
# get proportions and bootstrapped CIS
M4 <- data.frame()
for (k in 1:length(levels(learner_xt_data))){
M4 = rbind(M4, ddply(xt_data, .(sample),
function (d) {getPs(d$response, levels(d$response)[k])}))
}
M4$exp = "X&T (2007b)"
Plot of all experiments
Effect sizes
e1 = d.fc(d1$w.ans.cat[d1$w.ans.cat != "other"], d1$Answer.sample[d1$w.ans.cat != "other"])
e2 = d.fc(d2$w.ans.cat[d2$w.ans.cat != "other"], d2$Answer.sample[d2$w.ans.cat != "other"])
e3= d.fc(l1$ans.cat[l1$ans.cat != "other"], l1$sample[l1$ans.cat != "other"])
e4 = d.fc(xt_data$response, xt_data$sample)