Replication of Spencer, Perone, Smith, and Samuelson (2011), Expt. 2 (sequential presentation of training items).

Read in raw data and anonymize

EXPTNUM <- 2
a.data.filename <- paste0("exp", EXPTNUM, "_A.csv")
files = dir("../production-results/")
d = data.frame()
for (i in 1:length(files)[1]) {
    s <- fromJSON(paste("../production-results/", files[i], sep = ""))
    s$answers$asses = ifelse(is.null(s$answers$asses), "NA", s$answers$asses)
    d = bind_rows(d, data.frame(s))
}
names(d) <- str_replace(names(d), "answers.", "")
d.anonymized <- anonymize.sids(d, "WorkerId")

write.csv(d.anonymized, a.data.filename)

Munge

d2 = read.csv(a.data.filename)

d2.long = d2 %>%
  gather(variable, value, contains("_")) %>%
  mutate(trial_num =  unlist(lapply(strsplit(as.character(variable),
                                      "_T"),function(x) x[2])),
         variable = unlist(lapply(strsplit(as.character(variable),
                                      "_"),function(x) x[1]))) %>%
  spread(variable, value) %>%
  mutate(trial_num = as.numeric(trial_num)) %>%
  mutate_if(is.character, funs(as.factor)) 

d2.munged = d2.long %>%
          select(subids, trial_num, category, condition, selected) %>%
          mutate(selected = lapply(str_split(selected, ","), 
                                   function(x) {str_sub(x, 4, 6)})) %>%
          mutate(prop_sub = lapply(selected, function(x){sum(x == "sub")/2}),
                 prop_bas = lapply(selected, function(x){sum(x == "bas")/2}),
                 prop_sup = lapply(selected, function(x){sum(x == "sup")/4})) %>%
          select(-selected)

Reproduce XT2007a Figure 5.

ms2 = d2.munged %>%
  gather(variable, value, c(prop_sub, prop_bas, prop_sup)) %>%
  group_by(condition,variable) %>%
  mutate(value = as.numeric(value)) %>%
  multi_boot_standard(column = "value")  %>%
  mutate(variable = as.factor(variable))

ms.plot <- ms2
ms.plot$variable = factor(ms.plot$variable,levels(ms.plot$variable)[c(2,1,3)])
ms.plot$condition = factor(ms.plot$condition,levels(ms.plot$condition)[c(1,3,2,4)])
ms.plot$condition = plyr::mapvalues(ms.plot$condition,
                               from = c("one", "three_basic", 
                                        "three_subordinate",
                                        "three_superordinate"), 
                               to = c("1", "3 basic", "3 sub.", "3 super."))

ggplot(ms.plot, aes(x = condition, y = mean, group = variable, fill = variable)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, 
                     ymax = ci_upper), 
                 position=position_dodge(width = .9)) +
  ylab("Proportion of \ntest objects chosen") +
  xlab("Examples") +
  theme_bw() +
  theme(legend.title = element_blank())

Here’s the XT2007a data:

grid.raster(readPNG("xtfig5.png"))

XTMEM 1 and 2 plotted as by SPSS:

d1 = read.csv("../../exp1/analysis/exp1_a.csv")

d1.long = d1 %>%
  gather(variable, value, contains("_")) %>%
  mutate(trial_num =  unlist(lapply(strsplit(as.character(variable),
                                      "_T"),function(x) x[2])),
         variable = unlist(lapply(strsplit(as.character(variable),
                                      "_"),function(x) x[1]))) %>%
  spread(variable, value) %>%
  mutate(trial_num = as.numeric(trial_num)) %>%
  mutate_if(is.character, funs(as.factor)) 

d1.munged = d1.long %>%
          select(subids, trial_num, category, condition, selected) %>%
          mutate(selected = lapply(str_split(selected, ","), 
                                   function(x) {str_sub(x, 4, 6)})) %>%
          mutate(prop_sub = lapply(selected, function(x){sum(x == "sub")/2}),
                 prop_bas = lapply(selected, function(x){sum(x == "bas")/2}),
                 prop_sup = lapply(selected, function(x){sum(x == "sup")/4})) %>%
          select(-selected)

ms1 = d1.munged %>%
  gather(variable, value, c(prop_sub, prop_bas, prop_sup)) %>%
  group_by(condition,variable) %>%
  mutate(value = as.numeric(value)) %>%
  multi_boot_standard(column = "value")  %>%
  mutate(variable = as.factor(variable))

crit.conds1 <- ms1 %>%
  filter(condition == "one" | condition == "three_subordinate") %>%
  filter(variable == "prop_bas") %>%
  mutate(exp = "XT2007a replication (E1)")

crit.conds2 <- ms2 %>%
  filter(condition == "one" | condition == "three_subordinate") %>%
  filter(variable == "prop_bas") %>%
  mutate(exp = "SPSS replication (E2)")

crit.conds = rbind(crit.conds1, crit.conds2) %>%
  mutate(exp = fct_rev(exp))

ggplot(crit.conds, aes(x = exp, y = mean, group = condition, fill = condition)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, 
                     ymax = ci_upper), 
                 position=position_dodge(width = .9)) +
  ylim(0,1)+
  ylab("Proportion basic-level choices ") +
  xlab("Experiment") +
  theme_bw() +
  theme(legend.title = element_blank())

And, the proportion basic for the 1 and sub conditions only in the SPSS data:

grid.raster(readPNG("spssfig3.png"))

We don’t see the flip observed by SPSS; looks almost identical to XTMEM1/XT2007a.

By category

ms2 = d2.munged %>%
  gather(variable, value, c(prop_sub, prop_bas, prop_sup)) %>%
  mutate(variable = as.factor(variable)) %>%
  group_by(condition,variable,category) %>%
  mutate(value = as.numeric(value)) %>%
  multi_boot_standard(column = "value") 

ms2$variable = factor(ms2$variable,levels(ms2$variable)[c(2,1,3)])
ms2$condition = factor(ms2$condition,levels(ms2$condition)[c(1,3,2,4)])
ms2$condition = plyr::mapvalues(ms2$condition,
                               from = c("one", "three_basic", 
                                        "three_subordinate", "three_superordinate"), 
                               to = c("1", "3 basic", "3 sub.", "3 super."))

ggplot(ms2, aes(x = condition, y = mean, group = variable, fill = variable)) +
  facet_grid(~category) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, 
                     ymax = ci_upper), 
                 position=position_dodge(width = .9)) +
  ylab("Proportion of \ntest objects chosen") +
  xlab("Examples") +
  theme_bw() +
  theme(legend.title = element_blank())

Post-task questions

d2 %>%
  group_by(education) %>%
  summarise(n = n()) %>%
  kable()
education n
0 1
1 6
2 20
3 20
4 3
d2 %>%
  group_by(enjoyment) %>%
  summarise(n = n()) %>%
  kable()
enjoyment n
1 17
2 33
d2 %>%
  mutate(language = tolower(language)) %>%
  group_by(language) %>%
  summarise(n = n()) %>%
  kable()
language n
english 49
english 1
d2 %>%
  group_by(gender) %>%
  summarise(n = n()) %>%
  kable()
gender n
17
Female 15
Male 18
d2 %>%
  group_by(asses) %>%
  summarise(n = n()) %>%
  kable()
asses n
Confused 1
Yes 32
NA 17
d2 %>%
  mutate(age = as.numeric(as.character(age))) %>%
  ggplot(aes(x= age)) +
  geom_histogram() +
  theme_bw() +
  ggtitle("Age distribution")

unique(d2$comments)
##  [1]                                                                                                                                                                                                               
##  [2] None                                                                                                                                                                                                          
##  [3] This was fun                                                                                                                                                                                                  
##  [4] interesting hit, thanks!                                                                                                                                                                                      
##  [5] Great study                                                                                                                                                                                                   
##  [6] thanks                                                                                                                                                                                                        
##  [7] I like these, but it sucks never having feedback on them; like, was the turtle right to select? :P                                                                                                            
##  [8] That was fun!                                                                                                                                                                                                 
##  [9] N/A                                                                                                                                                                                                           
## [10] I thought it had to do with they type, not the picture.                                                                                                                                                       
## [11] It was fun.                                                                                                                                                                                                   
## [12] none                                                                                                                                                                                                          
## [13] I enjoyed it!  Thanks for the HIT!  I did my best and hope the data is useful!                                                                                                                                
## [14] Good Luck!                                                                                                                                                                                                    
## [15] A bit long for the pay, if I'm honest                                                                                                                                                                         
## [16] I liked this hit - it was different and made me think about words generally vs specifically (i.e. a 'dog' vs a 'dalmatian'.  Thanks and good luck                                                             
## [17] Thank you, good luck with your research!                                                                                                                                                                      
## [18] Thanks!                                                                                                                                                                                                       
## [19] ty                                                                                                                                                                                                            
## [20]  I enjoyed helping you with your research.                                                                                                                                                                    
## [21] hi                                                                                                                                                                                                            
## [22] It was interesting trying to decipher what was being asked. That is, in the early levels I had to choose (as I had very little information to go on) whether the word was specific or more generic in meaning.
## [23] This was so fun, I would love to do more if you have them                                                                                                                                                     
## 23 Levels:  ... ty

Task time

Just as a sanity check look at total task time: Expect participants to take longer in the SPSS experiment since its sequential. The data look consistent with ths.

d2 = mutate(d2, exp = "SPSS replication (E2)")
d1 = mutate(d1, exp = "XT2007a replication (E1)")
all = rbind(d2, d1)

all$SubmitTime = gsub("T|Z","",all$SubmitTime)
all$AcceptTime = gsub("T|Z","",all$AcceptTime)
all$SubmitTime = strptime(all$SubmitTime, "%F%T")
all$AcceptTime = strptime(all$AcceptTime, "%F%T")
all$total_time = as.numeric(all$SubmitTime) - as.numeric(all$AcceptTime)
all$exp = fct_rev(all$exp)

ggplot(all, aes(x = exp, y = total_time/60)) +
    ylab("Task time (min)") +
    geom_boxplot() +
    theme_bw()

all %>%
  select(-AcceptTime, -SubmitTime) %>%
  group_by(exp) %>%
  mutate(total_time = total_time/60) %>%
  multi_boot_standard(column = "total_time") %>%
  ggplot(aes(x = exp, y = mean, fill = exp)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, 
                     ymax = ci_upper), 
                 position=position_dodge(width = .9)) +
  ylab("Task time (min)") +
  xlab("Examples") +
  theme_bw() +
  theme(legend.title = element_blank())