likeSpeak 6 Analysis

load and define things

preprocess

#Pre-process from database          
setwd('/Users/Allison/Documents/likeSpeak/experiment/psiturk_likespeak/')
#setwd('/Documents/GRADUATE_SCHOOL/Projects/likeSpeak/experiment/psiturk_likespeak/')
db_name = "MGEtest5.db"
table_name = "RCI5"

sqlite    <- RSQLite::SQLite()
exampledb <- dbConnect(sqlite, db_name)
db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, " WHERE status = 4", sep = ""))
#db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, sep = ""))

d = data.frame()
for (i in 1:dim(db_query)[1]){
  if (!is.na(db_query$datastring[i])) {
    
    rthing = fromJSON(db_query$datastring[i])  # get datastring to r object
    
    # get trial data 
    k = rthing$data['trialdata']$trialdata
    k = k[k$phase != "INSTRUCTIONS",]
    
    # add participant info
    k$workerID = rthing$workerId
    k$hitId = rthing$hitId
    k$parentID = rthing$questiondata$parentID
    k$gen = rthing$questiondata$gen
    k$chain = rthing$questiondata$chain
    d = rbind(d, k)
  }
}

# drop weirdo columns
drops <- c("templates","template", "action")
d = d[,!(names(d) %in% drops)]

# make stuff factors
factor_cols <- names(d)[c(-5,-11:-14, -17:-19)] 
numeric_cols <- names(d)[c(5,17:19)] 
d[factor_cols] <- lapply(d[factor_cols], as.factor) 
d[numeric_cols] <- lapply(d[numeric_cols], as.numeric)

Drop participants who missed social manipulation check (both self and partner)

d = d %>%
    group_by(workerID) %>%
      mutate(self_guess_accuracy = self_guess_accuracy[!is.na(partner_guess_accuracy)],
             partner_guess_accuracy = partner_guess_accuracy[!is.na(partner_guess_accuracy)]) %>%
      filter(self_guess_accuracy != "incorrect" & partner_guess_accuracy != "incorrect")

# get rid of multiword responses
d$guessed_label_Nwords = sapply(gregexpr("\\S+", d$guessedLabel), length) 
d = d[d$guessed_label_Nwords != 2,]

Make all possible condition groups

d$groupCompare = as.factor(ifelse(d$particEstimator == "Overestimator" & 
                                    d$partnerEstimator == "Overestimator", "OO",
                                ifelse(d$particEstimator == "Overestimator" &
                                         d$partnerEstimator == "Underestimator", "OU",
                                       ifelse(d$particEstimator == "Underestimator" & 
                                                d$partnerEstimator == "Underestimator", "UU","UO"))))

d$groupCompare  <- factor(d$groupCompare, levels=c("OO", "UU", "OU", "UO"))

Likeability ratings

ggplot(d[!is.na(d$likableRating),], aes(x=as.factor(likableRating), 
                                        fill = condition)) +
  geom_bar(position="dodge") +
  ggtitle("Likeability") +
  xlab("rating") +
  ylim(0,7) +
  themeML

Likeability ratings by condition

likable = d %>%
  filter(phase == "QUESTIONNAIRE" & !is.na(likableRating)) %>%
  multi_boot(column="likableRating",
              summary_groups = c("condition"),
              statistics_functions = c("mean", "ci_lower","ci_upper"))

ggplot(likable, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("Likeability") +
  ylim(0,7) +
  themeML

Likeability ratings by condition

likable = d %>%
  filter(phase == "QUESTIONNAIRE" & !is.na(likableRating)) %>%
  multi_boot(column="likableRating",
              summary_groups = c("groupCompare", "condition"),
              statistics_functions = c("mean", "ci_lower","ci_upper"))

ggplot(likable, aes(y=mean, x = groupCompare, fill=condition)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("Likeability") +
  ylim(0,7) +
  themeML

Workability ratings

ggplot(d[!is.na(d$howWellRating),], aes(x=as.factor(howWellRating), fill = condition)) +
  geom_bar(position="dodge") +  
  ggtitle("How well worked together") +
  xlab("rating") +
  ylim(0,10) +
  themeML

Workability ratings by condition

workable = d %>%
  filter(phase == "QUESTIONNAIRE" & !is.na(howWellRating)) %>%
  multi_boot(column="howWellRating",
              summary_groups = c("condition"),
              statistics_functions = c("mean", "ci_lower","ci_upper"))

ggplot(workable, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("How well worked together") +
  ylim(0,7) +
  themeML

Workability ratings by condition

workable = d %>%
  filter(phase == "QUESTIONNAIRE" & !is.na(howWellRating)) %>%
  multi_boot(column="howWellRating",
              summary_groups = c("groupCompare", "condition"),
              statistics_functions = c("mean", "ci_lower","ci_upper"))

ggplot(workable, aes(y=mean, x = groupCompare, fill = condition)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("How well worked together") +
  ylim(0,7) +
  themeML

Edit distances

for (i in 1:dim(d)[1]){
      d$lev2[i] = adist(d$word[i], d$guessedLabel[i])
      d$ins[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = T), "counts"))[1]
      d$del[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = T), "counts"))[2]
      d$sub[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = T), "counts"))[3]
}

ggplot(d, aes(x=lev2, fill = condition)) +
  geom_bar(position="dodge", binwidth = 1) +  
  ggtitle("Edit distance") +
  xlab("edit distance") +
  themeML

ggplot(d, aes(x=sub, fill = condition)) +
  geom_bar(position="dodge", binwidth = 1) +  
  ggtitle("substitutions") +
  xlab("subsitutions") +
  themeML

led = d %>%
        group_by(condition)  %>%
        filter(phase == "TEST" & !is.na(lev2))  %>%
        multi_boot(column="lev2",
                    summary_groups = c("condition"),
                    statistics_functions = c("mean", "ci_lower","ci_upper"))
        
ggplot(led, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,6) +
  ggtitle("Levenshtein edit distance") +
  themeML

led = d %>%
        filter(phase == "TEST" & !is.na(lev2))  %>%
        multi_boot(column="lev2",
                    summary_groups = c("groupCompare", "condition"),
                    statistics_functions = c("mean", "ci_lower","ci_upper"))
        
ggplot(led, aes(y=mean, x = groupCompare, fill = condition)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,4) +
  ggtitle("Levenshtein edit distance") +
  themeML

subs = d %>%
        filter(phase == "TEST" & !is.na(lev2))  %>%
        multi_boot(column="sub",
                    summary_groups = c("groupCompare", "condition"),
                    statistics_functions = c("mean", "ci_lower","ci_upper"))
        
ggplot(subs, aes(y=mean, x = groupCompare, fill = condition)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,4) +
  ggtitle("substitutions") +
  themeML

RTs

d$rt.log = log(d$rt)

rts = d %>%
        filter(!is.na(rt.log))  %>%
        multi_boot(column="rt.log",
                    summary_groups = c("groupCompare", "condition"),
                    statistics_functions = c("mean", "ci_lower","ci_upper"))


ggplot(rts, aes(y=mean, x = groupCompare, fill = "condition")) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("RTs") +
  ylim(0,10) +
  themeML

Accuracy by condition

acc = d %>%
        group_by(workerID, condition)  %>%
        filter(!is.na(accuracy))  %>%
        mutate(correct = length(which(accuracy == "correct"))) %>%
        summarise(prop_correct = correct[1]/8) %>%
        multi_boot(column="prop_correct",
                    summary_groups = c("condition"),
                    statistics_functions = c("mean", "ci_lower","ci_upper"))


ggplot(acc, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,1) +
  ggtitle("Accuracy") +
  themeML

Accuracy by condition

acc = d %>%
        group_by(workerID, groupCompare, condition)  %>%
        filter(!is.na(accuracy))  %>%
        mutate(correct = length(which(accuracy == "correct"))) %>%
        summarise(prop_correct = correct[1]/8) %>%
        multi_boot(column="prop_correct",
                    summary_groups = c("groupCompare", "condition"),
                    statistics_functions = c("mean", "ci_lower","ci_upper"))


ggplot(acc, aes(y=mean, x = groupCompare, fill = condition)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,1) +
  ggtitle("Accuracy") +
  themeML

#write.csv(d[d$phase == "TEST",c("workerID", "word", "guessedLabel")], "wordfilter.csv")

Correlation between likability and number of edits

p = d %>%
  group_by(workerID) %>%
  filter(length(likableRating[!is.na(likableRating)]) > 0) %>%
  filter(length(howWellRating[!is.na(howWellRating)]) > 0) %>%      
  mutate(likableRating = likableRating[!is.na(likableRating)],
         howWellRating = howWellRating[!is.na(howWellRating)])
k = p %>%
    group_by(workerID, condition, groupCompare) %>%
    mutate(correct = length(which(accuracy == "correct"))) %>%
    summarise(edits = mean(lev2, na.rm = T),
              likable = mean(likableRating),
              howWell = mean(howWellRating),
              rt = mean(rt.log, na.rm = T),
              prop_correct = correct[1]/8) %>%
    filter(!is.na(groupCompare))

ggplot(k, aes(y=edits, x = likable)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("likability vs. edits") +
  stat_smooth(method = "lm") +
  annotate("text", x=2, y=.5, color = "red", size = 7,
          label=paste("r=", round(cor(k$likable, k$edits), 2))) +
  themeML

ggplot(k, aes(y=edits, x = likable)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("likability vs. edits") +
  stat_smooth(method = "lm") +
  facet_grid(.~groupCompare) +
  themeML

ggplot(k, aes(y=edits, x = howWell)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("workability vs. edits") +
  stat_smooth(method = "lm") +
  annotate("text", x=2, y=.5, color = "red", size = 7,
          label=paste("r=", round(cor(k$howWell, k$edits), 2))) +
  themeML

ggplot(k, aes(y=edits, x = howWell)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("workability vs. edits") +
  stat_smooth(method = "lm") +
  facet_grid(.~groupCompare) +
  themeML

ggplot(k, aes(y=rt, x = likable)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("likability vs. rt") +
  stat_smooth(method = "lm") +
  facet_grid(.~groupCompare) +
  themeML

ggplot(k, aes(y=rt, x = howWell)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("workability vs. rt") +
  stat_smooth(method = "lm") +
  facet_grid(.~groupCompare) +
  themeML

ggplot(k, aes(y=prop_correct, x = likable)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("likability vs. prop_correct") +
  stat_smooth(method = "lm") +
  annotate("text", x=2, y=.5, color = "red", size = 7,
          label=paste("r=", round(cor(k$likable, k$prop_correct), 2))) +
  themeML

ggplot(k, aes(y=prop_correct, x = likable)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("likability vs. prop_correct") +
  stat_smooth(method = "lm") +
  facet_grid(.~groupCompare) +
  themeML

ggplot(k, aes(y=prop_correct, x = howWell)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("workability vs. prop_correct") +
  stat_smooth(method = "lm") +
  annotate("text", x=2, y=.5, color = "red", size = 7,
          label=paste("r=", round(cor(k$howWell, k$prop_correct), 2))) +
  themeML

ggplot(k, aes(y=prop_correct, x = howWell)) +
  geom_point(position="dodge", stat="identity") +  
  ggtitle("workability vs. prop_correct") +
  stat_smooth(method = "lm") +
  facet_grid(.~groupCompare) +
  themeML

social variables predict edits controling for rt

summary(lm(edits~rt + howWell, k))

## 
## Call:
## lm(formula = edits ~ rt + howWell, data = k)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0122 -0.6597  0.0335  0.6523  2.6088 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 11.18270    0.77035  14.516   <2e-16 ***
## rt          -1.09367    0.09768 -11.196   <2e-16 ***
## howWell     -0.07301    0.03538  -2.064   0.0402 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9292 on 234 degrees of freedom
## Multiple R-squared:  0.3665, Adjusted R-squared:  0.3611 
## F-statistic: 67.69 on 2 and 234 DF,  p-value: < 2.2e-16

summary(lm(edits~rt + likable, k))

## 
## Call:
## lm(formula = edits ~ rt + likable, data = k)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.02300 -0.66914  0.01895  0.66504  2.62469 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.98862    0.78331   14.03   <2e-16 ***
## rt          -1.11780    0.09822  -11.38   <2e-16 ***
## likable      0.02719    0.03942    0.69    0.491    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9367 on 234 degrees of freedom
## Multiple R-squared:  0.3563, Adjusted R-squared:  0.3508 
## F-statistic: 64.76 on 2 and 234 DF,  p-value: < 2.2e-16

summary(lm(prop_correct~rt + likable, k))

## 
## Call:
## lm(formula = prop_correct ~ rt + likable, data = k)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41662 -0.16020 -0.04344  0.12762  0.69704 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.521011   0.183437  -8.292 8.78e-15 ***
## rt           0.227099   0.023001   9.873  < 2e-16 ***
## likable     -0.004907   0.009232  -0.532    0.596    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2194 on 234 degrees of freedom
## Multiple R-squared:  0.2941, Adjusted R-squared:  0.2881 
## F-statistic: 48.75 on 2 and 234 DF,  p-value: < 2.2e-16

likeSpeak 6 Analysis

Allison Durkin and Molly Lewis