load and define things
preprocess
#Pre-process from database
setwd('/Users/Allison/Documents/likeSpeak/experiment/psiturk_likespeak/')
#setwd('/Documents/GRADUATE_SCHOOL/Projects/likeSpeak/experiment/psiturk_likespeak/')
db_name = "MGEtest2.db"
table_name = "RCI5"
sqlite <- RSQLite::SQLite()
exampledb <- dbConnect(sqlite, db_name)
db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, " WHERE status = 4", sep = ""))
#db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, sep = ""))
d = data.frame()
for (i in 12:dim(db_query)[1]){ #start at 12 because data added to previous pilot database
if (!is.na(db_query$datastring[i])) {
rthing = fromJSON(db_query$datastring[i]) # get datastring to r object
# get trial data
k = rthing$data['trialdata']$trialdata
k = k[k$phase != "INSTRUCTIONS",]
# add participant info
k$workerID = rthing$workerId
k$hitId = rthing$hitId
k$parentID = rthing$questiondata$parentID
k$gen = rthing$questiondata$gen
k$chain = rthing$questiondata$chain
d = rbind(d, k)
}
}
# drop weirdo columns
drops <- c("templates","template", "action")
d = d[,!(names(d) %in% drops)]
# make stuff factors
factor_cols <- names(d)[c(-9:-13, -18:-19)]
numeric_cols <- names(d)[18:19]
d[factor_cols] <- lapply(d[factor_cols], as.factor)
d[numeric_cols] <- lapply(d[numeric_cols], as.numeric)
## Warning in lapply(d[numeric_cols], as.numeric): NAs introduced by coercion
## Warning in lapply(d[numeric_cols], as.numeric): NAs introduced by coercion
Make all possible condition groups
d$groupCompare = as.factor(ifelse(d$particEstimator == "Overestimator" & d$partnerEstimator == "Overestimator", "OO",
ifelse(d$particEstimator == "Overestimator" & d$partnerEstimator == "Underestimator", "OU",
ifelse(d$particEstimator == "Underestimator" & d$partnerEstimator == "Underestimator", "UU","UO"))))
Accuracy by condition
acc = d %>%
group_by(workerID, condition) %>%
filter(!is.na(accuracy)) %>%
mutate(correct = length(which(accuracy == "correct"))) %>%
summarise(prop_correct = correct[1]/8) %>%
group_by(condition) %>%
multi_boot(column="prop_correct",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(acc, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,1) +
ggtitle("Accuracy") +
themeML
Levenshtein edit distance by condition
led = d %>%
group_by(workerID, condition) %>%
filter(phase == "TEST" & !is.na(lev)) %>%
mutate(total_lev = sum(lev)) %>%
summarise(mean_worker_lev = total_lev[1]/8) %>%
group_by(condition) %>%
multi_boot(column="mean_worker_lev",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(led, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,6) +
ggtitle("Levenshtein edit distance") +
themeML
Likeability ratings by condition
likable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(likableRating)) %>%
group_by(condition) %>%
multi_boot(column="likableRating",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(likable, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("Likeability") +
ylim(0,10) +
themeML
Workability ratings by condition
workable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(howWellRating)) %>%
group_by(condition) %>%
multi_boot(column="howWellRating",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(workable, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("How well worked together") +
ylim(0,10) +
themeML