Xu and Tenenbaum 2007b Replication

Supplementary Information

TABLE OF CONTENTS
     (1) Xu and Tenenabum 2007b (original)
     (2) Exp. 1 - online
     (3) Exp. 2 - online
     (4) Exp. 3 - in person
     (5) Exp. 4 - online
     (5) All

This document was created from an R Markdown file. The R Markdown file can be found here. All analyses and plots can be reproduced from the raw data with the code in this file. This document also contains links to the experimental tasks.

There are different possible criteria for categorizing a participant as a subordinate or basic-level generalizer. X&T find that participants respond consistently across questions as either a subordinate or a basic generalizer for each of the two trials. In contrast, in our sample, we observed a wide variety of responses across questions by participants. Thus, we adopt two criteria in analyzing our data: a “strict” criteria where we only include participants if they responded correctly, and a “nonstrict” criteria where we counted a particpant as a basic generalizer if they responded “yes” to any of the basic level questions.

## [1] "Strict criteria:  FALSE"

Original Xu and Tenenbaum 2007b

X&T 2007b data - adults

N = 14
N_per_condition = (N/2) * 2 # 7/condition

prop_sub_teacher= .928 
n_sub_teacher = round(prop_sub_teacher * N_per_condition)
n_basic_teacher = N_per_condition - n_sub_teacher 
teacher_xt_data = as.factor(c(rep("sub", n_sub_teacher),
                              rep("basic", n_basic_teacher)))

prop_sub_learner = .357
n_sub_learner = round(prop_sub_learner * N_per_condition)
n_basic_learner= N_per_condition - n_sub_learner
learner_xt_data = as.factor(c(rep("sub", n_sub_learner), 
                              rep("basic", n_basic_learner)))

xt_data.adults = data.frame(response=c(as.character(teacher_xt_data),
                                       as.character(learner_xt_data)))
xt_data.adults$Answer.sample[1:N_per_condition] = "teacher"
xt_data.adults$Answer.sample[(N_per_condition+1):
                              (N_per_condition*2)] = "learner"

# get proportions and bootstrapped CIS
M <- data.frame()
for (k in 1:length(levels(learner_xt_data))){
    M = rbind(M, ddply(xt_data.adults, .(Answer.sample), 
                     function (d) {get.prop.CIs(d$response, levels(d$response)[k])}))
}

xt.a.props = M
names(xt.a.props)[names(xt.a.props) == "ps.boot"] = "prop"
xt.a.props$exp ="X&T (2007b)"

# add n's
xt.a.props[xt.a.props$Answer.sample == "teacher" &
           xt.a.props$w.ans.cat == "sub", "n" ] = n_sub_teacher 
xt.a.props[xt.a.props$Answer.sample == "teacher" & 
           xt.a.props$w.ans.cat == "basic", "n"] = n_basic_teacher 
xt.a.props[xt.a.props$Answer.sample == "learner" & 
           xt.a.props$w.ans.cat == "sub", "n" ] = n_sub_learner 
xt.a.props[xt.a.props$Answer.sample == "learner" & 
           xt.a.props$w.ans.cat == "basic", "n" ] = n_basic_learner

X&T 2007b data - children

N = 24
N_per_condition = (N/2) * 2 # 12/condition, 2 trials per participant

prop_sub_teacher= .71
n_sub_teacher = round(prop_sub_teacher * N_per_condition)
n_basic_teacher = N_per_condition - n_sub_teacher 
teacher_xt_data = as.factor(c(rep("sub", n_sub_teacher),
                              rep("basic", n_basic_teacher)))

prop_sub_learner = .29
n_sub_learner = round(prop_sub_learner * N_per_condition)
n_basic_learner= N_per_condition - n_sub_learner
learner_xt_data = as.factor(c(rep("sub", n_sub_learner), 
                              rep("basic", n_basic_learner)))

xt_data.children = data.frame(response=c(as.character(teacher_xt_data),
                                       as.character(learner_xt_data)))
xt_data.children$Answer.sample[1:N_per_condition] = "teacher"
xt_data.children$Answer.sample[(N_per_condition+1):
                               (2*N_per_condition)] = "learner"

# get proportions and bootstrapped CIS
M <- data.frame()
for (k in 1:length(levels(learner_xt_data))){
    M = rbind(M, ddply(xt_data.children, .(Answer.sample), 
                     function (d) {get.prop.CIs(d$response, levels(d$response)[k])}))
}

xt.c.props = M
names(xt.c.props)[names(xt.c.props) == "ps.boot"] = "prop"
xt.c.props$exp ="X&T (2007b) children"

# add n's
xt.c.props[xt.c.props$Answer.sample == "teacher" &
           xt.c.props$w.ans.cat== "sub", "n" ] = n_sub_teacher 
xt.c.props[xt.c.props$Answer.sample == "teacher" & 
           xt.c.props$w.ans.cat== "basic", "n"] = n_basic_teacher 
xt.c.props[xt.c.props$Answer.sample == "learner" & 
           xt.c.props$w.ans.cat== "sub", "n" ] = n_sub_learner 
xt.c.props[xt.c.props$Answer.sample == "learner" & 
           xt.c.props$w.ans.cat== "basic", "n" ] = n_basic_learner

Experiment 1

Read in data and pre-process.

t1 = read.csv("../data/turk_replication_1.csv")

# make factors
t1$Answer.sample <- factor(t1$Answer.sample, labels=c('teacher','learner')) # sample0= teacher, sample1=learner
t1$Answer.label <- factor(t1$Answer.label, labels=c('nolabel','label')) # 0=nolabel, 1=label

t1 <- colwise(as.factor)(t1)

Filter.

# get number of exclusions by category
t1.exclusion.ns = t1 %>%
              summarise(n_noLabelTrials = length(which(Answer.label != 'label')),
                        n_repeatWorkers = length(which(duplicated(workerid))),
                        n_badTraining = length(which(Answer.click1 == "\"false\"" | 
                                                       Answer.click2 == "\"false\"")),
                        n_badGeneralize = length(which(Answer.Qwcheck != 0)),
                        n_badFilter = length(which(Answer.question1 == 'FALSE')))

t1.exclusion.ns

##   n_noLabelTrials n_repeatWorkers n_badTraining n_badGeneralize
## 1              48               8            21               8
##   n_badFilter
## 1           0

# subset data
t1.f = t1 %>%
       filter(Answer.label == 'label') %>% #remove nolabel trials (run 4)
       filter(!duplicated(workerid)) %>% #participants who completed multiple runs
       filter(Answer.click1 == "\"correct\"" & 
                Answer.click2 == "\"correct\"") %>% #correct training items (learning only)
       filter(Answer.Qwcheck == 0)  %>% #check generalization question
       filter(Answer.question1 == 'TRUE') #filter question

dim(t1)[1] # total

## [1] 350

dim(t1.f)[1] # total with exclusions

## [1] 274

Categorize response patterns based on criterion.

sub <- t1.f$Answer.Qwsmm1 == 0 & t1.f$Answer.Qwsmm2 == 0 & 
  t1.f$Answer.Qwsm1 == 1 & t1.f$Answer.Qwsm2 == 1
if (strict){
  basic <- (t1.f$Answer.Qwsmm1 == 1 & t1.f$Answer.Qwsmm2 == 1) &
      t1.f$Answer.Qwsm1 == 1 & t1.f$Answer.Qwsm2 == 1 
} else {
  basic <- (t1.f$Answer.Qwsmm1 == 1 | t1.f$Answer.Qwsmm2 == 1) &
       t1.f$Answer.Qwsm1 == 1 & t1.f$Answer.Qwsm2 == 1 
}

t1.f$w.ans.cat <- "other"
t1.f$w.ans.cat[sub] <- "sub"
t1.f$w.ans.cat[basic] <- "basic"

if (proper){
  #proper <- t1.f$Answer.Qwsmm1 == 0 & t1.f$Answer.Qwsmm2 == 0 & 
  #      (t1.f$Answer.Qwsm1 == 0 & t1.f$Answer.Qwsm2 == 0)
  proper <- t1.f$Answer.Qwsmm1 == 0 & t1.f$Answer.Qwsmm2 == 0 & 
        (t1.f$Answer.Qwsm1 == 0 | t1.f$Answer.Qwsm2 == 0)
  t1.f$w.ans.cat[proper] <- "proper"
  t1.f$w.ans.cat = factor(t1.f$w.ans.cat, levels=c("proper", "sub", "basic", "other"))
} else {
  t1.f$w.ans.cat = factor(t1.f$w.ans.cat, levels=c("sub", "basic", "other"))
}

# filter out other responses
length(which(t1.f$w.ans.cat == "other"))

## [1] 79

t1.f = t1.f[t1.f$w.ans.cat != "other",]
t1.f$w.ans.cat = droplevels(t1.f$w.ans.cat)

# get proportions and bootstrapped CIS
M <- data.frame()
for (k in 1:length(levels(t1.f$w.ans.cat))){
    M = rbind(M, ddply(t1.f, .(Answer.sample), 
                     function (d) {get.prop.CIs(d$w.ans.cat, levels(d$w.ans.cat)[k])}))
}

t1.f.props = t1.f %>%
          group_by(Answer.sample, w.ans.cat) %>%
          summarise (n = n()) %>%
          mutate(prop = n / sum(n),
                 exp = "Turk #1") %>%
          left_join(M)

Stats

t1_tab = table(t1.f$w.ans.cat, t1.f$Answer.sample)[c("sub", "basic"),]
chisq.test(t1_tab)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  t1_tab
## X-squared = 0.62452, df = 1, p-value = 0.4294

Experiment #2

Read in data and pre-process

t2 <- read.csv("../data/turk_replication_2.csv", sep="\t", header=T)
t2  <- t2 [,c(-1:-19,-21:-32,-36,-41:-43,-54:-55,-56)] # remove unnecessary columns

# make factors
t2$Answer.sample <- factor(t2$Answer.sample, labels=c('teacher','learner')) # sample0 = teacher, sample1=learner
t2 <- colwise(as.factor)(t2)

Filter.

# get number of exclusions by category
t2.exclusion.ns = t2 %>%
              summarise(n_badTraining = length(which(Answer.click1 != "\"correct\"" |
                                                       Answer.click2 != "\"correct\"")),
                        n_badGeneralize = length(which(Answer.Qcheck1 != 0 | Answer.Qcheck2 != 0)),
                        n_badFilter = length(which(Answer.question3 != 'true' | Answer.question4 !='true')))
t2.exclusion.ns

##   n_badTraining n_badGeneralize n_badFilter
## 1            22              15           8

# subset data
t2.f = t2 %>%
       filter(Answer.click1 == "\"correct\"" &
                Answer.click2 == "\"correct\"") %>% # take out those who click on wrong training items
      filter(Answer.Qcheck1 == 0 & Answer.Qcheck2 == 0) %>% # take out if missed check generalization question
      filter(Answer.question3 == 'true' & Answer.question4 =='true')  # take out those who missed filter question

dim(t2)[1] # total

## [1] 150

dim(t2.f)[1] # total with exclusions

## [1] 118

Categorize response patterns based on criterion.

sub <-  t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 & 
  t2.f$Answer.Qproper3 == 1 & t2.f$Answer.Qsub1 == 1 &
  t2.f$Answer.Qsub2 == 1 & t2.f$Answer.Qbasic1 == 0 &
  t2.f$Answer.Qbasic2 == 0 & t2.f$Answer.Qbasic3 == 0 

if (strict){
  basic <- t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 & 
    t2.f$Answer.Qproper3 == 1 & t2.f$Answer.Qsub1 == 1 &
    t2.f$Answer.Qsub2 == 1 & t2.f$Answer.Qbasic1 == 1 &
    t2.f$Answer.Qbasic2 == 1 & t2.f$Answer.Qbasic3 == 1 
} else {
  basic <- t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 & 
    t2.f$Answer.Qproper3 == 1 & t2.f$Answer.Qsub1 == 1 &
    t2.f$Answer.Qsub2 == 1 & (t2.f$Answer.Qbasic1 == 1 |
    t2.f$Answer.Qbasic2 == 1 | t2.f$Answer.Qbasic3 == 1)
}

t2.f$w.ans.cat <- "other"
t2.f$w.ans.cat[sub] <- "sub"
t2.f$w.ans.cat[basic] <- "basic"

if(proper){
  proper <- (t2.f$Answer.Qproper1 == 1 & t2.f$Answer.Qproper2 == 1 & 
    t2.f$Answer.Qproper3 == 1) & t2.f$Answer.Qsub1 == 0 &
    t2.f$Answer.Qsub2 == 0 & t2.f$Answer.Qbasic1 == 0 &
    t2.f$Answer.Qbasic2 == 0 & t2.f$Answer.Qbasic3 == 0 
    
    t2.f$w.ans.cat[proper] <- "proper"
    t2.f$w.ans.cat = factor(t2.f$w.ans.cat, levels=c("proper", "sub", "basic", "other"))

} else {
  t2.f$w.ans.cat = factor(t2.f$w.ans.cat, levels=c("sub", "basic", "other"))
}

# filter out "other" responses
length(which(t2.f$w.ans.cat == "other"))

## [1] 28

t2.f = t2.f[t2.f$w.ans.cat != "other",]
t2.f$w.ans.cat = droplevels(t2.f$w.ans.cat)

# get proportions and bootstrapped CIS
M <- data.frame()
for (k in 1:length(levels(t2.f$w.ans.cat))){
    M = rbind(M, ddply(t2.f, .(Answer.sample), 
                     function (d) {get.prop.CIs(d$w.ans.cat, levels(d$w.ans.cat)[k])}))
}

t2.f.props = t2.f %>%
          group_by(Answer.sample, w.ans.cat) %>%
          summarise (n = n()) %>%
          mutate(prop = n / sum(n),
                 exp = "Turk #2") %>%
          left_join(M)

Stats

t2_tab = table(t2.f$w.ans.cat, t2.f$Answer.sample)[c("sub", "basic"),]
chisq.test(t2_tab)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  t2_tab
## X-squared = 0.89395, df = 1, p-value = 0.3444

Experiment 3 - In Lab Replication

l1 <- read.csv("../data/inlab_replication.csv")

# make factors
l1 <- colwise(as.factor)(l1)
names(l1)[names(l1) == "sample"] = "Answer.sample"

Filter.

l1.exclusion.ns = l1 %>%
                  summarise(n_badTraining = length(which(t2_a == 0 | t3_a == 0 |
                                                           t2_b == 0 | t2_b == 0)))
l1.exclusion.ns

##   n_badTraining
## 1             1

# subset data
l1.f = l1 %>%
       filter(t2_a == 1 & t3_a == 1 & t2_b == 1 & t2_b == 1)

dim(l1)[1] # total

## [1] 41

dim(l1.f)[1] # total with exclusions

## [1] 40

Categorize response patterns based on criterion.

sub <- l1.f$basic1_a == 0 & l1.f$basic2_a == 0 & 
       l1.f$basic1_b == 0 & l1.f$basic2_b == 0 & 
       (l1.f$sub1_a == 1 & l1.f$sub2_a == 1 & 
          l1.f$sub1_b == 1 & l1.f$sub2_b == 1)
if (strict){
    basic <- (l1.f$basic1_a == 1 & l1.f$basic2_a == 1 &
            l1.f$basic1_b == 1 & l1.f$basic2_b == 1) & 
           (l1.f$sub1_a == 1 & l1.f$sub2_a == 1 & 
              l1.f$sub1_b == 1 & l1.f$sub2_b == 1)
  
} else {
  basic <- (l1.f$basic1_a == 1 | l1.f$basic2_a == 1 | 
            l1.f$basic1_b == 1 | l1.f$basic2_b == 1) & 
           (l1.f$sub1_a == 1 & l1.f$sub2_a == 1 & 
              l1.f$sub1_b == 1 & l1.f$sub2_b == 1)
}

l1.f$w.ans.cat <- "other"
l1.f$w.ans.cat[sub] <- "sub"
l1.f$w.ans.cat[basic] <- "basic"

if (proper){
  proper <- l1.f$basic1_a == 0 & l1.f$basic2_a == 0 & 
            l1.f$basic1_b == 0 & l1.f$basic2_b == 0 & 
            (l1.f$sub1_a == 0 | l1.f$sub2_a == 0 |
               l1.f$sub1_b == 0 | l1.f$sub2_b == 0)
  l1.f$w.ans.cat[proper] <- "proper"
  l1.f$w.ans.cat <- factor(l1.f$w.ans.cat,c("proper","sub","basic","other"))
} else {
  l1.f$w.ans.cat <- factor(l1.f$w.ans.cat,c("sub","basic","other"))
}

# filter out "other"" responses
length(which(l1.f$w.ans.cat  == "other"))

## [1] 9

l1.f = l1.f[l1.f$w.ans.cat != "other",]
l1.f$w.ans.cat = droplevels(l1.f$w.ans.cat)

# get proportions and bootstrapped CIS
M <- data.frame()
for (k in 1:length(levels(l1.f$w.ans.cat))){
    M = rbind(M, ddply(l1.f, .(Answer.sample), 
                     function (d) {get.prop.CIs(d$w.ans.cat, levels(d$w.ans.cat)[k])}))
}

l1.f.props = l1.f %>%
          group_by(Answer.sample, w.ans.cat) %>%
          summarise (n = n()) %>%
          mutate(prop = n / sum(n),
                 exp = "In Lab") %>%
          left_join(M)

# re-order levels so consistent with other exps
l1.f.props$Answer.sample <- ordered(l1.f.props$Answer.sample, 
                    levels = c("teacher", "learner"))

Stats

l1_tab = table(l1.f$w.ans.cat, l1.f$sample)[c("sub", "basic"),]
chisq.test(l1_tab)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  l1_tab
## X-squared = 0.49269, df = 1, p-value = 0.4827

Expeiment 4 - Turk Replication #3

Read in data and pre-process

t3 <- read.csv("../data/turk_replication_3.csv")

# make factors
t3$Answer.sample <- factor(t3$Answer.sample, labels=c('teacher','learner')) # sample0 = teacher, sample1=learner
t3 <- colwise(as.factor)(t3)

Filter.

t3.exclusion.ns = t3 %>%
              summarise(n_badTraining = length(which(Answer.click1 != "\"correct\"" | 
                                                       Answer.click2 != "\"correct\"")),
                        n_badGeneralize = length(which(Answer.Qcheck1 != 0 | Answer.Qcheck2 != 0)),
                        n_badFilter = length(which(Answer.question1 != 'TRUE' | Answer.question2 !='TRUE'| 
                                Answer.question3 != 'TRUE' | Answer.question4 !='TRUE')))
t3.exclusion.ns

##   n_badTraining n_badGeneralize n_badFilter
## 1            27              17          10

# subset data
t3.f= t3 %>%
      filter(Answer.click1 == "\"correct\"" & 
                Answer.click2 == "\"correct\"") %>% # take out those who click on wrong training items
      filter(Answer.Qcheck1 == 0 & Answer.Qcheck2 == 0) %>% # take out if missed check generalization question
      filter(Answer.question1 == 'TRUE' & Answer.question2 =='TRUE' & 
            Answer.question3 == 'TRUE' & Answer.question4 =='TRUE')  # take out those who missed filter question

dim(t3)[1] # total

## [1] 200

dim(t3.f)[1] # total with exclusions

## [1] 161

Categorize response patterns based on criterion.

sub <-  t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 & 
    t3.f$Answer.Qproper3 == 1 & t3.f$Answer.Qsub1 == 1 &
    t3.f$Answer.Qsub2 == 1 & t3.f$Answer.Qbasic1 == 0 &
    t3.f$Answer.Qbasic2 == 0 & t3.f$Answer.Qbasic3 == 0 

if (strict){
  basic <- t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 & 
    t3.f$Answer.Qproper3 == 1 & t3.f$Answer.Qsub1 == 1 &
    t3.f$Answer.Qsub2 == 1 & t3.f$Answer.Qbasic1 == 1 &
    t3.f$Answer.Qbasic2 == 1 & t3.f$Answer.Qbasic3 == 1 
} else {
  basic <- t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 & 
      t3.f$Answer.Qproper3 == 1 & t3.f$Answer.Qsub1 == 1 &
      t3.f$Answer.Qsub2 == 1 & (t3.f$Answer.Qbasic1 == 1 |
      t3.f$Answer.Qbasic2 == 1 | t3.f$Answer.Qbasic3 == 1)
}

t3.f$w.ans.cat <- "other"
t3.f$w.ans.cat[sub] <- "sub"
t3.f$w.ans.cat[basic] <- "basic"

if (proper){
  proper <- (t3.f$Answer.Qproper1 == 1 & t3.f$Answer.Qproper2 == 1 & 
            t3.f$Answer.Qproper3 == 1) & t3.f$Answer.Qsub1 == 0 &
            t3.f$Answer.Qsub2 == 0 & t3.f$Answer.Qbasic1 == 0 &
            t3.f$Answer.Qbasic2 == 0 & t3.f$Answer.Qbasic3 == 0 
  
  t3.f$w.ans.cat[proper] <- "proper"
  t3.f$w.ans.cat = factor(t3.f$w.ans.cat, levels=c("proper", "sub", "basic", "other"))
} else { 
  t3.f$w.ans.cat = factor(t3.f$w.ans.cat, levels=c("sub", "basic", "other"))
}

# filter out "other"" responses
length(which(t3.f$w.ans.cat == "other"))

## [1] 21

t3.f = t3.f[t3.f$w.ans.cat != "other",]
t3.f$w.ans.cat = droplevels(t3.f$w.ans.cat)

# get proportions and bootstrapped CIS
M <- data.frame()
for (k in 1:length(levels(t3.f$w.ans.cat))){
    M = rbind(M, ddply(t3.f, .(Answer.sample), 
                     function (d) {get.prop.CIs(d$w.ans.cat, levels(d$w.ans.cat)[k])}))
}

t3.f.props = t3.f %>%
          group_by(Answer.sample, w.ans.cat) %>%
          summarise (n = n()) %>%
          mutate(prop = n / sum(n),
                 exp = "Turk #3") %>%
          left_join(M)

ggplot(t3.f.props, aes(x=factor(w.ans.cat), y=prop, fill=Answer.sample)) +
  geom_bar(stat="identity", position=position_dodge()) +
  geom_linerange(aes(ymin=ciwl,ymax=ciul), position=position_dodge(.9)) +
  ylim(0,1) +
  ylab("Prop. participants") +
  xlab("Generalization pattern") +
  ggtitle("Turk Replication #3") +
  themeML + 
  theme(legend.position=c(.85,.8)) + 
    scale_fill_brewer(name="Sampling\nCondition", palette="Set1")

Stats

t3_tab = table(t3.f$w.ans.cat, t3.f$Answer.sample)[c("sub", "basic"),]
chisq.test(t3_tab)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  t3_tab
## X-squared = 6.4203, df = 1, p-value = 0.01128

All experiments

Effect sizes

t1.f.es = d.fc(t1.f$w.ans.cat, t1.f$Answer.sample)
t2.f.es= d.fc(t2.f$w.ans.cat, t2.f$Answer.sample)
t3.f.es = d.fc(t3.f$w.ans.cat, t3.f$Answer.sample)
l1.f.es= d.fc(l1.f$w.ans.cat, l1.f$Answer.sample)
xt.a.es = d.fc(xt_data.adults$response, xt_data.adults$Answer.sample)
xt.c.es = d.fc(xt_data.children$response, xt_data.children$Answer.sample)

all.es = rbind(t1.f.es, t2.f.es, t3.f.es, l1.f.es, xt.a.es)
all.es$exp = c("Turk #1" ,"Turk #2", "Turk #3", "In Lab",
               "X&T (2007b)")
all.es$exp<- ordered(as.factor(all.es$exp), 
                     levels = c("X&T (2007b)", "Turk #1" ,"Turk #2", 
                                  "In Lab", "Turk #3"))

all.es$exp = revalue(all.es$exp, c("X&T (2007b)" = "X&T (2007b)", "Turk #1" = "Exp. 1",
                                "Turk #2" = "Exp. 2", "In Lab" = "Exp. 3", "Turk #3" = "Exp. 4"))

#pdf("../../writeup/figures/FIG_3.pdf", height = 4, width = 4)
ggplot(all.es, aes(x = exp, y = effect_size, ymin = cill, ymax = ciul)) + 
  geom_pointrange(position=position_dodge(width = 0.25), size = .8) + 
  ylab("Effect Size") + 
  xlab("Experiment") + 
  geom_hline(y = 0, linetype = "dotted") +
  theme(text = element_text(size=14)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#dev.off()

how to calculate errors bars – XT do it over responses, I’m doing it over participants