load and define things

Read in data

files = dir("production-results/")
d = data.frame()
for (i in 1:length(files)[1]) {
    s <- as.data.frame(fromJSON(paste("production-results/", files[i], sep = "")))
    d = rbind(d, s)
}

# clean up names
names(d) = unlist(strsplit(names(d), "rs."))[unlist(strsplit(names(d), "rs."))
                                             != "answe"]

Do exclusions

d %>%
  summarise(se = sum(selfEstimator == self_estimator_recall),
            pe = sum(partnerEstimator == partner_estimator_recall))
##    se  pe
## 1 295 272
# drop participants who missed social manipulation check (both self and partner)
d.f = d %>%
    filter(selfEstimator == self_estimator_recall,
           partnerEstimator == partner_estimator_recall)

# drop multi-word responses?
# drop bad guesers

Manipulation checks

# likability ratings
likable = d.f %>%
          mutate(likable = as.numeric(likable)) %>%
          multi_boot(column="likable",
              summary_groups = c("cond"),
              statistics_functions = c("mean", "ci_lower","ci_upper"))

ggplot(likable, aes(y=mean, x = cond, fill = cond)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("Likeability") +
  ylim(0,7) +
  themeML

# workability ratings
workability = d.f %>%
          mutate(how_well = as.numeric(how_well)) %>%
          multi_boot(column="how_well",
              summary_groups = c("cond"),
              statistics_functions = c("mean", "ci_lower","ci_upper"))

ggplot(workability, aes(y = mean, x = cond, fill = cond)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("Workability") +
  ylim(0,7) +
  themeML   

Munge data for by-trial analyses.

gd <- d.f %>% 
  gather(trial_q, resp, starts_with("T")) %>%
  filter(!(grepl("_rt",as.character(trial_q)))) %>%
  separate(trial_q, c("trial","measure")) %>%
  mutate(trial = as.numeric(str_sub(trial, start=2, end=3))) %>%
  spread(measure, resp) %>%
  rowwise %>%
  mutate(lev2 = adist(correctWord, guessedWord)[1],
         ins = drop(attr(adist(correctWord, guessedWord, counts = TRUE), "counts"))[1],
         del = drop(attr(adist(correctWord, guessedWord, counts = TRUE), "counts"))[2],
         sub = drop(attr(adist(correctWord, guessedWord, counts = TRUE), "counts"))[3], 
         correct = correctWord==guessedWord,
         nchars = nchar(guessedWord),
         allCond = paste(selfEstimator, partnerEstimator))
## Warning: attributes are not identical across measure variables; they will
## be dropped

Levenshtein distance by condition

ms.lev <- gd %>%
  group_by(WorkerId, cond) %>%
  summarise(lev2 = mean(lev2)) %>%
  group_by(cond) %>%
  multi_boot_standard(col = c("lev2"))
## Warning: Grouping rowwise data frame strips rowwise nature
## Joining by: "cond"
ggplot(ms.lev, aes(y=mean, x = cond, fill = cond)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,4) +
  ggtitle("Levenshtein edit distance") +
  themeML 

ms.lev.all<- gd %>%
    group_by(WorkerId, allCond) %>%
     summarise(lev2 = mean(lev2)) %>%
     group_by(allCond) %>%
     multi_boot_standard(col = c("lev2"))
## Warning: Grouping rowwise data frame strips rowwise nature
## Joining by: "allCond"
ggplot(ms.lev.all, aes(y=mean, x = allCond, fill = allCond)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,4) +
  ggtitle("Levenshtein edit distance") +
  themeML 

Substiutions by condition

ms.sub<- gd %>%
  group_by(WorkerId, cond) %>%
  summarise(sub = mean(sub)) %>%
  group_by(cond) %>%
  multi_boot_standard(col = c("sub"))
## Warning: Grouping rowwise data frame strips rowwise nature
## Joining by: "cond"
ggplot(ms.sub, aes(y=mean, x = cond, fill = cond)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,4) +
  ggtitle("Substitutions") +
  themeML 

Deletions by condition

ms.del<- gd %>%
  group_by(WorkerId, cond) %>%
  summarise(del = mean(del)) %>%
  group_by(cond) %>%
  multi_boot_standard(col = c("del"))
## Warning: Grouping rowwise data frame strips rowwise nature
## Joining by: "cond"
ggplot(ms.del, aes(y=mean, x = cond, fill = cond)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,1) +
  ggtitle("Deletions") +
  themeML 

Insertions by condition

ms.ins<- gd %>%
  group_by(WorkerId, cond) %>%
  summarise(ins = mean(ins)) %>%
  group_by(cond) %>%
  multi_boot_standard(col = c("ins"))
## Warning: Grouping rowwise data frame strips rowwise nature
## Joining by: "cond"
ggplot(ms.ins, aes(y=mean, x = cond, fill = cond)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,.6) +
  ggtitle("Insertions") +
  themeML 

Accuracy by condition

ms.correct <- gd %>%
  group_by(WorkerId, cond) %>%
  summarise(correct = mean(correct)) %>%
  group_by(cond) %>%
  multi_boot_standard(col = c("correct"))
## Warning: Grouping rowwise data frame strips rowwise nature
## Joining by: "cond"
ggplot(ms.correct, aes(y=mean, x = cond, fill = cond)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,1) +
  ggtitle("accuracy") +
  themeML 

ms.correct.all <- gd %>%
  group_by(WorkerId, allCond) %>%
  summarise(correct = mean(correct)) %>%
  group_by(allCond) %>%
  multi_boot_standard(col = c("correct"))
## Warning: Grouping rowwise data frame strips rowwise nature
## Joining by: "allCond"
ggplot(ms.correct.all, aes(y=mean, x = allCond, fill = allCond)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,1) +
  ggtitle("accuracy") +
  themeML 

Correlations between edit measures and likeability

#lev
ms.corrs = gd %>%
    group_by(WorkerId, cond) %>%
    summarise(edits = mean(lev2, na.rm = T),
              likable = mean(as.numeric(as.character(likable))),
              howWell = mean(as.numeric(as.character(how_well))),
              prop_correct = length(which(correct == TRUE))/6,
              sub = mean(sub, na.rm = T)) 
## Warning: Grouping rowwise data frame strips rowwise nature
ggplot(ms.corrs, aes(y=edits, x = likable, color = cond)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("likability vs. edits") +
  stat_smooth(method = "lm") +
   annotate("text", x=2, y=.5, color = "red", size = 7,
          label=paste("r=", round(cor(ms.corrs$likable, ms.corrs$edits, 
                                      use = "complete.obs"), 2))) +  
  themeML 
## ymax not defined: adjusting position using y instead

#sub
ggplot(ms.corrs, aes(y=sub, x = likable, color = cond)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("likability vs. sub") +
  stat_smooth(method = "lm") +
   annotate("text", x=2, y=.5, color = "red", size = 7,
          label=paste("r=", round(cor(ms.corrs$likable, ms.corrs$sub,
                                      use = "complete.obs"), 2))) +  
  themeML 
## ymax not defined: adjusting position using y instead

Correlations between edit measures and workability

ggplot(ms.corrs, aes(y=edits, x = howWell, color = cond)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("workability vs. edits") +
  stat_smooth(method = "lm") +
   annotate("text", x=2, y=.5, color = "red", size = 7,
          label=paste("r=", round(cor(ms.corrs$howWell, ms.corrs$edits, 
                                      use = "complete.obs"), 2))) +  
  themeML 
## ymax not defined: adjusting position using y instead

ggplot(ms.corrs, aes(y=sub, x = howWell, color = cond)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("workability vs. sub") +
  stat_smooth(method = "lm") +
   annotate("text", x=2, y=.5, color = "red", size = 7,
          label=paste("r=", round(cor(ms.corrs$howWell, ms.corrs$sub, 
                                      use = "complete.obs"), 2))) +  
  themeML 
## ymax not defined: adjusting position using y instead

Correlations between accuracy and likeability

ggplot(ms.corrs, aes(y=prop_correct, x = likable, color = cond)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("likability vs. accuracy") +
  stat_smooth(method = "lm") +  
   annotate("text", x=2, y=.5, color = "red", size = 7,
          label=paste("r=", round(cor(ms.corrs$likable, ms.corrs$prop_correct, 
                                      use = "complete.obs"), 2))) +
  themeML 
## ymax not defined: adjusting position using y instead

Correlations between accuracy and workability

ggplot(ms.corrs, aes(y=prop_correct, x = howWell, color = cond)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("workability vs. accuracy") +
  stat_smooth(method = "lm") +  
    annotate("text", x=2, y=.5, color = "red", size = 7,
          label=paste("r=", round(cor(ms.corrs$howWell, ms.corrs$prop_correct,
                                      use = "complete.obs"), 2))) +
  themeML 
## ymax not defined: adjusting position using y instead