load and define things
preprocess
#Pre-process from database
setwd('/Users/Allison/Documents/likeSpeak/experiment/psiturk_likespeak/')
#setwd('/Documents/GRADUATE_SCHOOL/Projects/likeSpeak/experiment/psiturk_likespeak/')
db_name = "MGEtest5.db"
table_name = "RCI5"
sqlite <- RSQLite::SQLite()
exampledb <- dbConnect(sqlite, db_name)
db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, " WHERE status = 4", sep = ""))
#db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, sep = ""))
d = data.frame()
for (i in 1:dim(db_query)[1]){
if (!is.na(db_query$datastring[i])) {
rthing = fromJSON(db_query$datastring[i]) # get datastring to r object
# get trial data
k = rthing$data['trialdata']$trialdata
k = k[k$phase != "INSTRUCTIONS",]
# add participant info
k$workerID = rthing$workerId
k$hitId = rthing$hitId
k$parentID = rthing$questiondata$parentID
k$gen = rthing$questiondata$gen
k$chain = rthing$questiondata$chain
d = rbind(d, k)
}
}
# drop weirdo columns
drops <- c("templates","template", "action")
d = d[,!(names(d) %in% drops)]
# make stuff factors
factor_cols <- names(d)[c(-5,-11:-14, -17:-19)]
numeric_cols <- names(d)[c(5,17:19)]
d[factor_cols] <- lapply(d[factor_cols], as.factor)
d[numeric_cols] <- lapply(d[numeric_cols], as.numeric)
Drop participants who missed social manipulation check (both self and partner)
d = d %>%
group_by(workerID) %>%
mutate(self_guess_accuracy = self_guess_accuracy[!is.na(partner_guess_accuracy)],
partner_guess_accuracy = partner_guess_accuracy[!is.na(partner_guess_accuracy)]) %>%
filter(self_guess_accuracy != "incorrect" & partner_guess_accuracy != "incorrect")
# get rid of multiword responses
d$guessed_label_Nwords = sapply(gregexpr("\\S+", d$guessedLabel), length)
d = d[d$guessed_label_Nwords != 2,]
Make all possible condition groups
d$groupCompare = as.factor(ifelse(d$particEstimator == "Overestimator" &
d$partnerEstimator == "Overestimator", "OO",
ifelse(d$particEstimator == "Overestimator" &
d$partnerEstimator == "Underestimator", "OU",
ifelse(d$particEstimator == "Underestimator" &
d$partnerEstimator == "Underestimator", "UU","UO"))))
d$groupCompare <- factor(d$groupCompare, levels=c("OO", "UU", "OU", "UO"))
ggplot(d[!is.na(d$likableRating),], aes(x=as.factor(likableRating),
fill = condition)) +
geom_bar(position="dodge") +
ggtitle("Likeability") +
xlab("rating") +
ylim(0,7) +
themeML
Likeability ratings by condition
likable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(likableRating)) %>%
multi_boot(column="likableRating",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(likable, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("Likeability") +
ylim(0,7) +
themeML
Likeability ratings by condition
likable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(likableRating)) %>%
multi_boot(column="likableRating",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(likable, aes(y=mean, x = groupCompare, fill=condition)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("Likeability") +
ylim(0,7) +
themeML
Workability ratings
ggplot(d[!is.na(d$howWellRating),], aes(x=as.factor(howWellRating), fill = condition)) +
geom_bar(position="dodge") +
ggtitle("How well worked together") +
xlab("rating") +
ylim(0,10) +
themeML
Workability ratings by condition
workable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(howWellRating)) %>%
multi_boot(column="howWellRating",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(workable, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("How well worked together") +
ylim(0,7) +
themeML
Workability ratings by condition
workable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(howWellRating)) %>%
multi_boot(column="howWellRating",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(workable, aes(y=mean, x = groupCompare, fill = condition)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("How well worked together") +
ylim(0,7) +
themeML
Edit distances
for (i in 1:dim(d)[1]){
d$lev2[i] = adist(d$word[i], d$guessedLabel[i])
d$ins[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = T), "counts"))[1]
d$del[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = T), "counts"))[2]
d$sub[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = T), "counts"))[3]
}
ggplot(d, aes(x=lev2, fill = condition)) +
geom_bar(position="dodge", binwidth = 1) +
ggtitle("Edit distance") +
xlab("edit distance") +
themeML
ggplot(d, aes(x=sub, fill = condition)) +
geom_bar(position="dodge", binwidth = 1) +
ggtitle("substitutions") +
xlab("subsitutions") +
themeML
led = d %>%
group_by(condition) %>%
filter(phase == "TEST" & !is.na(lev2)) %>%
multi_boot(column="lev2",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(led, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,6) +
ggtitle("Levenshtein edit distance") +
themeML
led = d %>%
filter(phase == "TEST" & !is.na(lev2)) %>%
multi_boot(column="lev2",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(led, aes(y=mean, x = groupCompare, fill = condition)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,4) +
ggtitle("Levenshtein edit distance") +
themeML
subs = d %>%
filter(phase == "TEST" & !is.na(lev2)) %>%
multi_boot(column="sub",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(subs, aes(y=mean, x = groupCompare, fill = condition)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,4) +
ggtitle("substitutions") +
themeML
RTs
d$rt.log = log(d$rt)
rts = d %>%
filter(!is.na(rt.log)) %>%
multi_boot(column="rt.log",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(rts, aes(y=mean, x = groupCompare, fill = "condition")) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("RTs") +
ylim(0,10) +
themeML
Accuracy by condition
acc = d %>%
group_by(workerID, condition) %>%
filter(!is.na(accuracy)) %>%
mutate(correct = length(which(accuracy == "correct"))) %>%
summarise(prop_correct = correct[1]/8) %>%
multi_boot(column="prop_correct",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(acc, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,1) +
ggtitle("Accuracy") +
themeML
Accuracy by condition
acc = d %>%
group_by(workerID, groupCompare, condition) %>%
filter(!is.na(accuracy)) %>%
mutate(correct = length(which(accuracy == "correct"))) %>%
summarise(prop_correct = correct[1]/8) %>%
multi_boot(column="prop_correct",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(acc, aes(y=mean, x = groupCompare, fill = condition)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,1) +
ggtitle("Accuracy") +
themeML
#write.csv(d[d$phase == "TEST",c("workerID", "word", "guessedLabel")], "wordfilter.csv")
Correlation between likability and number of edits
p = d %>%
group_by(workerID) %>%
filter(length(likableRating[!is.na(likableRating)]) > 0) %>%
filter(length(howWellRating[!is.na(howWellRating)]) > 0) %>%
mutate(likableRating = likableRating[!is.na(likableRating)],
howWellRating = howWellRating[!is.na(howWellRating)])
k = p %>%
group_by(workerID, condition, groupCompare) %>%
mutate(correct = length(which(accuracy == "correct"))) %>%
summarise(edits = mean(lev2, na.rm = T),
likable = mean(likableRating),
howWell = mean(howWellRating),
rt = mean(rt.log, na.rm = T),
prop_correct = correct[1]/8) %>%
filter(!is.na(groupCompare))
ggplot(k, aes(y=edits, x = likable)) +
geom_point(position="dodge", stat="identity") +
ggtitle("likability vs. edits") +
stat_smooth(method = "lm") +
annotate("text", x=2, y=.5, color = "red", size = 7,
label=paste("r=", round(cor(k$likable, k$edits), 2))) +
themeML
ggplot(k, aes(y=edits, x = likable)) +
geom_point(position="dodge", stat="identity") +
ggtitle("likability vs. edits") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
ggplot(k, aes(y=edits, x = howWell)) +
geom_point(position="dodge", stat="identity") +
ggtitle("workability vs. edits") +
stat_smooth(method = "lm") +
annotate("text", x=2, y=.5, color = "red", size = 7,
label=paste("r=", round(cor(k$howWell, k$edits), 2))) +
themeML
ggplot(k, aes(y=edits, x = howWell)) +
geom_point(position="dodge", stat="identity") +
ggtitle("workability vs. edits") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
ggplot(k, aes(y=rt, x = likable)) +
geom_point(position="dodge", stat="identity") +
ggtitle("likability vs. rt") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
ggplot(k, aes(y=rt, x = howWell)) +
geom_point(position="dodge", stat="identity") +
ggtitle("workability vs. rt") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
ggplot(k, aes(y=prop_correct, x = likable)) +
geom_point(position="dodge", stat="identity") +
ggtitle("likability vs. prop_correct") +
stat_smooth(method = "lm") +
annotate("text", x=2, y=.5, color = "red", size = 7,
label=paste("r=", round(cor(k$likable, k$prop_correct), 2))) +
themeML
ggplot(k, aes(y=prop_correct, x = likable)) +
geom_point(position="dodge", stat="identity") +
ggtitle("likability vs. prop_correct") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
ggplot(k, aes(y=prop_correct, x = howWell)) +
geom_point(position="dodge", stat="identity") +
ggtitle("workability vs. prop_correct") +
stat_smooth(method = "lm") +
annotate("text", x=2, y=.5, color = "red", size = 7,
label=paste("r=", round(cor(k$howWell, k$prop_correct), 2))) +
themeML
ggplot(k, aes(y=prop_correct, x = howWell)) +
geom_point(position="dodge", stat="identity") +
ggtitle("workability vs. prop_correct") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
social variables predict edits controling for rt
summary(lm(edits~rt + howWell, k))
##
## Call:
## lm(formula = edits ~ rt + howWell, data = k)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0122 -0.6597 0.0335 0.6523 2.6088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.18270 0.77035 14.516 <2e-16 ***
## rt -1.09367 0.09768 -11.196 <2e-16 ***
## howWell -0.07301 0.03538 -2.064 0.0402 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9292 on 234 degrees of freedom
## Multiple R-squared: 0.3665, Adjusted R-squared: 0.3611
## F-statistic: 67.69 on 2 and 234 DF, p-value: < 2.2e-16
summary(lm(edits~rt + likable, k))
##
## Call:
## lm(formula = edits ~ rt + likable, data = k)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02300 -0.66914 0.01895 0.66504 2.62469
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.98862 0.78331 14.03 <2e-16 ***
## rt -1.11780 0.09822 -11.38 <2e-16 ***
## likable 0.02719 0.03942 0.69 0.491
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9367 on 234 degrees of freedom
## Multiple R-squared: 0.3563, Adjusted R-squared: 0.3508
## F-statistic: 64.76 on 2 and 234 DF, p-value: < 2.2e-16
summary(lm(prop_correct~rt + likable, k))
##
## Call:
## lm(formula = prop_correct ~ rt + likable, data = k)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41662 -0.16020 -0.04344 0.12762 0.69704
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.521011 0.183437 -8.292 8.78e-15 ***
## rt 0.227099 0.023001 9.873 < 2e-16 ***
## likable -0.004907 0.009232 -0.532 0.596
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2194 on 234 degrees of freedom
## Multiple R-squared: 0.2941, Adjusted R-squared: 0.2881
## F-statistic: 48.75 on 2 and 234 DF, p-value: < 2.2e-16