Replication of SPSS 2011 with 2 methodological modifications from exp 2 (following SPSS): Blocking and reordering of trials (sub first), and using different label for all 12 trials.

Read in and process data

EXPTNUM <- 4
a.data.filename <- paste0("exp", EXPTNUM, "_A.csv")
files = dir("../production-results/")
d = data.frame()
for (i in 1:length(files)[1]) {
    s <- fromJSON(paste("../production-results/", files[i], sep = ""))
    s$answers$asses = ifelse(is.null(s$answers$asses), "NA", s$answers$asses)
    d = bind_rows(d, data.frame(s))
}
names(d) <- str_replace(names(d), "answers.", "")
d.anonymized <- anonymize.sids(d, "WorkerId")

#write.csv(d.anonymized, a.data.filename)

Munge

d4 = read.csv(a.data.filename)

d4.long = d4 %>%
  gather(variable, value, contains("_")) %>%
  mutate(trial_num =  unlist(lapply(strsplit(as.character(variable),
                                      "_T"),function(x) x[2])),
         variable = unlist(lapply(strsplit(as.character(variable),
                                      "_"),function(x) x[1]))) %>%
  spread(variable, value) %>%
  mutate(trial_num = as.numeric(trial_num)) %>%
  mutate_if(is.character, funs(as.factor)) 

d4.munged = d4.long %>%
          select(subids, trial_num, category, condition, selected) %>%
          mutate(selected = lapply(str_split(selected, ","), 
                                   function(x) {str_sub(x, 4, 6)})) %>%
          mutate(prop_sub = unlist(lapply(selected, function(x){sum(x == "sub")/2})),
                 prop_bas = unlist(lapply(selected, function(x){sum(x == "bas")/2})),
                 prop_sup = unlist(lapply(selected, function(x){sum(x == "sup")/4}))) %>%
          select(-selected)
d4.munged$exp = EXPTNUM
#write.csv(d4.munged, paste0("../../../data/exp", EXPTNUM, "_data_munged.csv"), row.names = F)

Reproduce XT2007a Fig. 5

ms4 = d4.munged %>%
  gather(variable, value, c(prop_sub, prop_bas, prop_sup)) %>%
  group_by(condition,variable, subids) %>%
  mutate(value = as.numeric(value)) %>%
  summarize(value = mean(value)) %>%
  group_by(condition,variable) %>%
  multi_boot_standard(column = "value")  %>%
  mutate(variable = as.factor(variable))

ms.plot <- ms4
ms.plot$variable = factor(ms.plot$variable,levels(ms.plot$variable)[c(2,1,3)])
ms.plot$condition = factor(ms.plot$condition,levels(ms.plot$condition)[c(4,2,1,3)])
ms.plot$condition = plyr::mapvalues(ms.plot$condition,
                               from = c("one", "3bas", "3sub",
                                        "3sup"), 
                               to = c("1", "3 basic", "3 sub.", "3 super."))

ggplot(ms.plot, aes(x = condition, y = mean, group = variable, fill = variable)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, 
                     ymax = ci_upper), 
                 position=position_dodge(width = .9)) +
  ylab("Proportion of \ntest objects chosen") +
  xlab("Examples") +
  theme_bw() +
  theme(legend.title = element_blank())

Reproduce SPSS Fig. 3

(plotting proportion basic level)

Read in all previous data and bind to current dataset

files = dir("../../../data/")
all.d = data.frame()
for (i in 1:length(files)[1]) {
    s <- read.csv(paste0("../../../data/", files[i]))
    all.d = bind_rows(all.d, data.frame(s))
}

Get means and plot

all.d$condition = plyr::mapvalues(all.d$condition,
                               from = c("one", "3bas", "3sub", "3sup"), 
                               to = c("one", "three_basic", 
                                        "three_subordinate",
                                        "three_superordinate"))
all.d$condition = as.factor(all.d$condition)

all.ms = all.d %>%
  gather(variable, value, c(prop_sub, prop_bas, prop_sup)) %>%
  group_by(condition,variable, exp, subids) %>%
  mutate(value = as.numeric(value)) %>%
  summarize(value = mean(value)) %>%
  group_by(condition,variable, exp) %>%
  multi_boot_standard(column = "value")  %>%
  ungroup() %>%
  mutate(variable = as.factor(variable)) %>%
  filter(condition == "one" | condition == "three_subordinate") %>%
  filter(variable == "prop_bas")

ggplot(all.ms, aes(x = exp, y = mean, group = condition, fill = condition)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, 
                     ymax = ci_upper), 
                 position=position_dodge(width = .9)) +
  ylim(0,1)+
  ylab("Proportion basic-level choices ") +
  xlab("Experiment") +
  theme_bw() +
  theme(legend.title = element_blank())

Plot effect size

Get effect sizes of our experiments. Note that since we have the raw data available we can calculate the effect size taking into account the paired nature of the data, using effsize::cohen.d(paired = TRUE)).

all.ms.subj = all.d %>%
  gather(variable, value, c(prop_sub, prop_bas, prop_sup)) %>%
  group_by(condition,variable, exp, subids) %>%
  mutate(value = as.numeric(value)) %>%
  summarize(value = mean(value)) %>%
  filter(condition == "one" | condition == "three_subordinate", variable == 'prop_bas') %>%
  spread(condition, value) %>%
  ungroup () %>%
  select(-variable)

conf.ints = all.ms.subj %>%
  group_by(exp) %>%
  do(data.frame(conf.int = effsize::cohen.d(.$one, .$three_subordinate, paired = T)$conf.int))  %>%
  ungroup() %>%
  mutate(bound=rep(c("low", "high"), 
                   length.out = EXPTNUM*2)) %>%
  spread(bound, conf.int) 

effect.sizes.paired = all.ms.subj %>%
  group_by(exp) %>%
  do(data.frame(d = effsize::cohen.d(.$one, .$three_subordinate, paired = T)$estimate)) %>%
  left_join(conf.ints) %>%
  ungroup () %>%
  mutate(es.type = "paired",
         exp = as.numeric(exp))

effect.sizes.npaired  = all.ms.subj %>%
  group_by(exp) %>%
  summarize(m.one = mean(one),
            sd.one = sd(one),
            m.3sub = mean(three_subordinate),
            sd.3sub = sd(three_subordinate),
            n= n()) %>%
  do(data.frame(d = mes(.$m.one, .$m.3sub, .$sd.one,
       .$sd.3sub, .$n, .$n, verbose = F)$d,
       var.d = mes(.$m.one, .$m.3sub, .$sd.one,
       .$sd.3sub, .$n, .$n, verbose = F)$var.d))%>%
  mutate(high = d + (1.96*var.d),
         low = d - (1.96*var.d),
         es.type = "nonpaired",
         exp = c(1:4))  # fix this

effect.sizes = bind_rows(effect.sizes.paired,
                         effect.sizes.npaired) %>%
  mutate(exp = as.factor(exp),
         n = 50) # fix this %>%

# make experiment nums meaningful
effect.sizes$exp = plyr::mapvalues(effect.sizes$exp ,
                               from = c("1", "2", "3", "4"), 
                               to = c("E1 XTa replication", "E2 SPSS replication", 
                                      "E3 XTa replication, blocked/diff labels", 
                                      "E4 SPSS replication, blocked/diff labels"))

Calculate effect sizes from previous experiments. Here are the means and sd based on SPSS table 1.

original.effect.sizes = data.frame(exp = c("XT_adults_e1", "XT_children_e2",
                        "SPSS_e1",
                        "SPSS_e2",
                        "SPSS_e3",
                        "SPSS_eS1",
                        "SPSS_eS2"),
                        one_means = c(76, 40, 48.24, 30.83, 39.91, 24.56, 15.79), 
                        three_means = c(9, 6, 10.53, 53.33, 51.75, 16.67, 11.40),
                        one_sd = c(40.40, 40.40, 40.40,37.18,35.20,32.09,24.51),
                        three_sd = c(24.97, 24.97, 24.97,36.11,42.63,25.45,24.25),
                        n = c(22, 36, 19, 20, 19,19,19))
kable(original.effect.sizes)
exp one_means three_means one_sd three_sd n
XT_adults_e1 76.00 9.00 40.40 24.97 22
XT_children_e2 40.00 6.00 40.40 24.97 36
SPSS_e1 48.24 10.53 40.40 24.97 19
SPSS_e2 30.83 53.33 37.18 36.11 20
SPSS_e3 39.91 51.75 35.20 42.63 19
SPSS_eS1 24.56 16.67 32.09 25.45 19
SPSS_eS2 15.79 11.40 24.51 24.25 19

Calculate previous effect sizes

original.effect.sizes$d = mes(original.effect.sizes$one_means/100,
                              original.effect.sizes$three_means/100, 
                              original.effect.sizes$one_sd/100,
                              original.effect.sizes$three_sd/100,
                              original.effect.sizes$n,
                              original.effect.sizes$n, verbose = F)$d

original.effect.sizes$d_var = mes(original.effect.sizes$one_means/100,
                                  original.effect.sizes$three_means/100,
                                  original.effect.sizes$one_sd/100,
                                  original.effect.sizes$three_sd/100,
                                  original.effect.sizes$n, 
                                  original.effect.sizes$n,
                                  verbose = F)$var.d

original.effect.sizes = original.effect.sizes %>%
                            mutate(high = d + (1.96*d_var),
                                     low = d - (1.96*d_var)) %>%
                            select(-one_means, -three_means, -one_sd, -three_sd) %>%
                            mutate(es.type = "nonpaired")

Bind previous and current effect sizes together and plot

all.es = bind_rows(original.effect.sizes, effect.sizes)
all.es$design <- c("simult.","simult.","simult.", "seq.", "seq.", "simult.", "simult.", "simult.", "seq.", "simult.", "seq." ,"simult.", "seq.", "simult.", "seq." )

ggplot(all.es, aes(x = exp, y = d, color = es.type)) +
  geom_point(aes(size = n))+
  geom_linerange(aes(ymax =high, ymin=low),
                 position = position_dodge(width = 0)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  facet_grid(design~., drop = TRUE, scales = "free_y")+
  coord_flip() +
  geom_hline(yintercept = 0, linetype = 2) +
  ggtitle("Effect size") +
  xlab("Experiment")

Misc

By category

ms4 = d4.munged %>%
  gather(variable, value, c(prop_sub, prop_bas, prop_sup)) %>%
  mutate(variable = as.factor(variable)) %>%
  group_by(condition,variable, category, subids) %>%
  mutate(value = as.numeric(value)) %>%
  summarize(value = mean(value))%>%
  group_by(condition,variable,category) %>%
  multi_boot_standard(column = "value") 

ms4$variable = factor(ms4$variable,levels(ms4$variable)[c(2,1,3)])
ms4$condition = factor(ms4$condition,levels(ms4$condition)[c(4,2,1,3)])
ms4$condition = plyr::mapvalues(ms4$condition,
 from = c("one", "3bas", "3sub",
                                        "3sup"), 
                               to = c("1", "3 basic", "3 sub.", "3 super."))

ggplot(ms4, aes(x = condition, y = mean, group = variable, fill = variable)) +
  facet_grid(~category) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, 
                     ymax = ci_upper), 
                 position=position_dodge(width = .9)) +
  ylab("Proportion of \ntest objects chosen") +
  xlab("Examples") +
  theme_bw() +
  theme(legend.title = element_blank())

Post-task questions

d4 %>%
  group_by(education) %>%
  summarise(n = n()) %>%
  kable()
education n
1 10
2 15
3 21
4 4
d4 %>%
  group_by(enjoyment) %>%
  summarise(n = n()) %>%
  kable()
enjoyment n
0 1
1 18
2 31
d4 %>%
  mutate(language = tolower(language)) %>%
  group_by(language) %>%
  summarise(n = n()) %>%
  kable()
language n
english 50
d4 %>%
  group_by(gender) %>%
  summarise(n = n()) %>%
  kable()
gender n
19
Female 13
Male 18
d4 %>%
  group_by(asses) %>%
  summarise(n = n()) %>%
  kable()
asses n
Confused 2
No 2
Yes 27
NA 19
d4 %>%
  mutate(age = as.numeric(as.character(age))) %>%
  ggplot(aes(x= age)) +
  geom_histogram() +
  theme_bw() +
  ggtitle("Age distribution")

unique(d4$comments)
##  [1]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
##  [2] that was fun thanks                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
##  [3] I have no comments. Thanks.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
##  [4] None.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
##  [5] I really enjoyed the HIT but it took me a moment to realize I should choose all of the like items (I think). Really fun regardless :)                                                                                                                                                                                                                                                                                                                                                                                            
##  [6] It ran smoothly.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
##  [7] I second guessed myself at various points about how inclusive I should be when there were slight variations in design (ie, a flat front school bus versus a regular school bus), but I suppose that was the point of the experiment.                                                                                                                                                                                                                                                                                             
##  [8] A straightforward and enjoyable task.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
##  [9] It really made me think!                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         
## [10] Can I have a bonus :)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
## [11] Confusing                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        
## [12] This was an interesting and fun HIT! Some of the last few picks though did confuse me as they were different words corresponding to the same pictures/objects that earlier in the survey had different words attributed to them. I thought about not picking any of them for that reason, but didn't know if that was an option or would be seen as not completing it correctly... so I just went with what my intuition felt was correct in making my choices.\n\nThank you for the opportunity to participate. Have a nice day!
## [13] none ty                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          
## [14] none                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
## [15] This was a fun hit.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
## [16] Interesting!                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     
## [17] thanks and God Bless!                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
## [18] <NA>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
## 17 Levels:  ... This was an interesting and fun HIT! Some of the last few picks though did confuse me as they were different words corresponding to the same pictures/objects that earlier in the survey had different words attributed to them. I thought about not picking any of them for that reason, but didn't know if that was an option or would be seen as not completing it correctly... so I just went with what my intuition felt was correct in making my choices.\n\nThank you for the opportunity to participate. Have a nice day!

Task time

As a sanity check look at total task time: Expect participants to take longer in the SPSS experiments since its sequential. The data look consistent with this.

d1 = read.csv("../../exp1/analysis/exp1_A.csv" ) %>%
      mutate(exp = "XT2007a replication (E1)")
d2 = read.csv("../../exp2/analysis/exp2_A.csv" ) %>%
      mutate(exp = "SPSS replication (E2)")
d3 = read.csv("../../exp3/analysis/exp3_A.csv" ) %>%
      mutate(exp = "XT2007a replication (E3), blocked/diff labels")
d4 = mutate(d4, exp = "SPSS replication (E4), blocked/diff labels")

all = rbind(d1, d2, d3, d4)

all$SubmitTime = gsub("T|Z","",all$SubmitTime)
all$AcceptTime = gsub("T|Z","",all$AcceptTime)
all$SubmitTime = strptime(all$SubmitTime, "%F%T")
all$AcceptTime = strptime(all$AcceptTime, "%F%T")
all$total_time = as.numeric(all$SubmitTime) - as.numeric(all$AcceptTime)
all$exp = as.factor(all$exp)
all$exp = factor(all$exp,levels(all$exp)[c(3,4,1,2)])

ggplot(all, aes(x = exp, y = total_time/60)) +
    ylab("Task time (min)") +
    geom_boxplot() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

all %>%
  select(-AcceptTime, -SubmitTime) %>%
  group_by(exp) %>%
  mutate(total_time = total_time/60) %>%
  multi_boot_standard(column = "total_time") %>%
  ggplot(aes(x = exp, y = mean, fill = exp)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, 
                     ymax = ci_upper), 
                 position=position_dodge(width = .9)) +
  ylab("Task time (min)") +
  xlab("Examples") +
  theme_bw() +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))