load and define things

preprocess

#Pre-process from database          
setwd('/Users/Allison/Documents/likeSpeak/experiment/psiturk_likespeak/')
#setwd('/Documents/GRADUATE_SCHOOL/Projects/likeSpeak/experiment/psiturk_likespeak/')
db_name = "MGEtest2.db"
table_name = "RCI5"

sqlite    <- RSQLite::SQLite()
exampledb <- dbConnect(sqlite, db_name)
db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, " WHERE status = 4", sep = ""))
#db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, sep = ""))

d = data.frame()
for (i in 12:dim(db_query)[1]){ #start at 12 because data added to previous pilot database
  if (!is.na(db_query$datastring[i])) {
    
    rthing = fromJSON(db_query$datastring[i])  # get datastring to r object
    
    # get trial data 
    k = rthing$data['trialdata']$trialdata
    k = k[k$phase != "INSTRUCTIONS",]
    
    # add participant info
    k$workerID = rthing$workerId
    k$hitId = rthing$hitId
    k$parentID = rthing$questiondata$parentID
    k$gen = rthing$questiondata$gen
    k$chain = rthing$questiondata$chain
    d = rbind(d, k)
  }
}

# drop weirdo columns
drops <- c("templates","template", "action")
d = d[,!(names(d) %in% drops)]

# make stuff factors
factor_cols <- names(d)[c(-9:-13, -18:-19)] 
numeric_cols <- names(d)[18:19] 
d[factor_cols] <- lapply(d[factor_cols], as.factor) 
d[numeric_cols] <- lapply(d[numeric_cols], as.numeric)
## Warning in lapply(d[numeric_cols], as.numeric): NAs introduced by coercion
## Warning in lapply(d[numeric_cols], as.numeric): NAs introduced by coercion

Make all possible condition groups

d$groupCompare = as.factor(ifelse(d$particEstimator == "Overestimator" & d$partnerEstimator == "Overestimator", "OO",
                                ifelse(d$particEstimator == "Overestimator" & d$partnerEstimator == "Underestimator", "OU",
                                       ifelse(d$particEstimator == "Underestimator" & d$partnerEstimator == "Underestimator", "UU","UO"))))

Accuracy by condition

acc = d %>%
        group_by(workerID, condition)  %>%
        filter(!is.na(accuracy))  %>%
        mutate(correct = length(which(accuracy == "correct"))) %>%
        summarise(prop_correct = correct[1]/8) %>%
        group_by(condition) %>%
        multi_boot(column="prop_correct",
                                summary_groups = c("condition"),
             statistics_functions = c("mean", "ci_lower","ci_upper"))


ggplot(acc, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,1) +
  ggtitle("Accuracy") +
  themeML 

Levenshtein edit distance by condition

led = d %>%
        group_by(workerID, condition)  %>%
        filter(phase == "TEST" & !is.na(lev))  %>%
        mutate(total_lev = sum(lev)) %>%
        summarise(mean_worker_lev = total_lev[1]/8) %>%
        group_by(condition) %>%
        multi_boot(column="mean_worker_lev",
                                summary_groups = c("condition"),
             statistics_functions = c("mean", "ci_lower","ci_upper"))
        

ggplot(led, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,6) +
  ggtitle("Levenshtein edit distance") +
  themeML 

Likeability ratings by condition

likable = d %>%
  filter(phase == "QUESTIONNAIRE" & !is.na(likableRating)) %>%
  group_by(condition) %>%
  multi_boot(column="likableRating",
            summary_groups = c("condition"),
             statistics_functions = c("mean", "ci_lower","ci_upper"))

ggplot(likable, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("Likeability") +
  ylim(0,10) +
  themeML          

Workability ratings by condition

workable = d %>%
  filter(phase == "QUESTIONNAIRE" & !is.na(howWellRating)) %>%
  group_by(condition) %>%
  multi_boot(column="howWellRating",
            summary_groups = c("condition"),
             statistics_functions = c("mean", "ci_lower","ci_upper"))

ggplot(workable, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("How well worked together") +
  ylim(0,10) +
  themeML