load and define things
preprocess
#Pre-process from database
setwd('/Users/Allison/Documents/likeSpeak/experiment/psiturk_likespeak/')
#setwd('/Documents/GRADUATE_SCHOOL/Projects/likeSpeak/experiment/psiturk_likespeak/')
db_name = "MGEtest4.db"
table_name = "RCI5"
sqlite <- RSQLite::SQLite()
exampledb <- dbConnect(sqlite, db_name)
db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, " WHERE status = 4", sep = ""))
#db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, sep = ""))
d = data.frame()
for (i in 103:dim(db_query)[1]){
if (!is.na(db_query$datastring[i])) {
rthing = fromJSON(db_query$datastring[i]) # get datastring to r object
# get trial data
k = rthing$data['trialdata']$trialdata
k = k[k$phase != "INSTRUCTIONS",]
# add participant info
k$workerID = rthing$workerId
k$hitId = rthing$hitId
k$parentID = rthing$questiondata$parentID
k$gen = rthing$questiondata$gen
k$chain = rthing$questiondata$chain
d = rbind(d, k)
}
}
# drop weirdo columns
drops <- c("templates","template", "action")
d = d[,!(names(d) %in% drops)]
# make stuff factors
factor_cols <- names(d)[c(-5,-11:-14, -17:-19)]
numeric_cols <- names(d)[c(5,17:19)]
d[factor_cols] <- lapply(d[factor_cols], as.factor)
d[numeric_cols] <- lapply(d[numeric_cols], as.numeric)
Drop participants who missed social manipulation check (both self and partner)
d = d %>%
group_by(workerID) %>%
mutate(self_guess_accuracy = self_guess_accuracy[!is.na(partner_guess_accuracy)],
partner_guess_accuracy = partner_guess_accuracy[!is.na(partner_guess_accuracy)]) %>%
filter(self_guess_accuracy != "incorrect" & partner_guess_accuracy != "incorrect")
# get rid of multiword responses
d$guessed_label_Nwords = sapply(gregexpr("\\S+", d$guessedLabel), length)
d = d[d$guessed_label_Nwords != 2,]
Make all possible condition groups
d$groupCompare = as.factor(ifelse(d$particEstimator == "Overestimator" &
d$partnerEstimator == "Overestimator", "OO",
ifelse(d$particEstimator == "Overestimator" &
d$partnerEstimator == "Underestimator", "OU",
ifelse(d$particEstimator == "Underestimator" &
d$partnerEstimator == "Underestimator", "UU","UO"))))
d$groupCompare <- factor(d$groupCompare, levels=c("OO", "UU", "OU", "UO"))
ggplot(d[!is.na(d$likableRating),], aes(x=as.factor(likableRating),
fill = condition)) +
geom_bar(position="dodge") +
ggtitle("Likeability") +
xlab("rating") +
ylim(0,7) +
themeML
Likeability ratings by condition
likable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(likableRating)) %>%
multi_boot(column="likableRating",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(likable, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("Likeability") +
ylim(0,7) +
themeML
Likeability ratings by condition
likable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(likableRating)) %>%
multi_boot(column="likableRating",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(likable, aes(y=mean, x = groupCompare, fill=condition)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("Likeability") +
ylim(0,7) +
themeML
Workability ratings
ggplot(d[!is.na(d$howWellRating),], aes(x=as.factor(howWellRating), fill = condition)) +
geom_bar(position="dodge") +
ggtitle("How well worked together") +
xlab("rating") +
ylim(0,10) +
themeML
Workability ratings by condition
workable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(howWellRating)) %>%
multi_boot(column="howWellRating",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(workable, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("How well worked together") +
ylim(0,7) +
themeML
Workability ratings by condition
workable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(howWellRating)) %>%
multi_boot(column="howWellRating",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(workable, aes(y=mean, x = groupCompare, fill = condition)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("How well worked together") +
ylim(0,7) +
themeML
Edit distances
for (i in 1:dim(d)[1]){
d$lev2[i] = adist(d$word[i], d$guessedLabel[i])
d$ins[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = T), "counts"))[1]
d$del[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = T), "counts"))[2]
d$sub[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = T), "counts"))[3]
}
ggplot(d, aes(x=lev2, fill = condition)) +
geom_bar(position="dodge", binwidth = 1) +
ggtitle("Edit distance") +
xlab("edit distance") +
themeML
ggplot(d, aes(x=sub, fill = condition)) +
geom_bar(position="dodge", binwidth = 1) +
ggtitle("substitutions") +
xlab("subsitutions") +
themeML
led = d %>%
group_by(condition) %>%
filter(phase == "TEST" & !is.na(lev2)) %>%
multi_boot(column="lev2",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(led, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,6) +
ggtitle("Levenshtein edit distance") +
themeML
led = d %>%
filter(phase == "TEST" & !is.na(lev2)) %>%
multi_boot(column="lev2",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(led, aes(y=mean, x = groupCompare, fill = condition)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,4) +
ggtitle("Levenshtein edit distance") +
themeML
subs = d %>%
filter(phase == "TEST" & !is.na(lev2)) %>%
multi_boot(column="sub",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(subs, aes(y=mean, x = groupCompare, fill = condition)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,4) +
ggtitle("substitutions") +
themeML
RTs
d$rt.log = log(d$rt)
rts = d %>%
filter(!is.na(rt.log)) %>%
multi_boot(column="rt.log",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(rts, aes(y=mean, x = groupCompare, fill = "condition")) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("RTs") +
ylim(0,10) +
themeML
Accuracy by condition
acc = d %>%
group_by(workerID, condition) %>%
filter(!is.na(accuracy)) %>%
mutate(correct = length(which(accuracy == "correct"))) %>%
summarise(prop_correct = correct[1]/8) %>%
multi_boot(column="prop_correct",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(acc, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,1) +
ggtitle("Accuracy") +
themeML
Accuracy by condition
acc = d %>%
group_by(workerID, groupCompare, condition) %>%
filter(!is.na(accuracy)) %>%
mutate(correct = length(which(accuracy == "correct"))) %>%
summarise(prop_correct = correct[1]/8) %>%
multi_boot(column="prop_correct",
summary_groups = c("groupCompare", "condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(acc, aes(y=mean, x = groupCompare, fill = condition)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,1) +
ggtitle("Accuracy") +
themeML
#write.csv(d[d$phase == "TEST",c("workerID", "word", "guessedLabel")], "wordfilter.csv")
Correlation between likability and number of edits
p = d %>%
group_by(workerID) %>%
filter(length(likableRating[!is.na(likableRating)]) > 0) %>%
filter(length(howWellRating[!is.na(howWellRating)]) > 0) %>%
mutate(likableRating = likableRating[!is.na(likableRating)],
howWellRating = howWellRating[!is.na(howWellRating)])
k = p %>%
group_by(workerID, condition, groupCompare) %>%
mutate(correct = length(which(accuracy == "correct"))) %>%
summarise(edits = mean(lev2, na.rm = T),
likable = mean(likableRating),
howWell = mean(howWellRating),
rt = mean(rt.log, na.rm = T),
prop_correct = correct[1]/8) %>%
filter(!is.na(groupCompare))
ggplot(k, aes(y=edits, x = likable)) +
geom_point(position="dodge", stat="identity") +
ggtitle("likability vs. edits") +
stat_smooth(method = "lm") +
annotate("text", x=2, y=.5, color = "red", size = 7,
label=paste("r=", round(cor(k$likable, k$edits), 2))) +
themeML
ggplot(k, aes(y=edits, x = likable)) +
geom_point(position="dodge", stat="identity") +
ggtitle("likability vs. edits") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
ggplot(k, aes(y=edits, x = howWell)) +
geom_point(position="dodge", stat="identity") +
ggtitle("workability vs. edits") +
stat_smooth(method = "lm") +
annotate("text", x=2, y=.5, color = "red", size = 7,
label=paste("r=", round(cor(k$howWell, k$edits), 2))) +
themeML
ggplot(k, aes(y=edits, x = howWell)) +
geom_point(position="dodge", stat="identity") +
ggtitle("workability vs. edits") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
ggplot(k, aes(y=rt, x = likable)) +
geom_point(position="dodge", stat="identity") +
ggtitle("likability vs. rt") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
ggplot(k, aes(y=rt, x = howWell)) +
geom_point(position="dodge", stat="identity") +
ggtitle("workability vs. rt") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
ggplot(k, aes(y=prop_correct, x = likable)) +
geom_point(position="dodge", stat="identity") +
ggtitle("likability vs. prop_correct") +
stat_smooth(method = "lm") +
annotate("text", x=2, y=.5, color = "red", size = 7,
label=paste("r=", round(cor(k$likable, k$prop_correct), 2))) +
themeML
ggplot(k, aes(y=prop_correct, x = likable)) +
geom_point(position="dodge", stat="identity") +
ggtitle("likability vs. prop_correct") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
ggplot(k, aes(y=prop_correct, x = howWell)) +
geom_point(position="dodge", stat="identity") +
ggtitle("workability vs. prop_correct") +
stat_smooth(method = "lm") +
annotate("text", x=2, y=.5, color = "red", size = 7,
label=paste("r=", round(cor(k$howWell, k$prop_correct), 2))) +
themeML
ggplot(k, aes(y=prop_correct, x = howWell)) +
geom_point(position="dodge", stat="identity") +
ggtitle("workability vs. prop_correct") +
stat_smooth(method = "lm") +
facet_grid(.~groupCompare) +
themeML
social variables predict edits controling for rt
summary(lm(edits~rt + howWell, k))
##
## Call:
## lm(formula = edits ~ rt + howWell, data = k)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4998 -0.5730 -0.1807 0.5509 5.1903
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.18168 1.37550 8.129 2.52e-12 ***
## rt -0.99361 0.17620 -5.639 2.04e-07 ***
## howWell -0.26764 0.06737 -3.972 0.000145 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.14 on 88 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3899, Adjusted R-squared: 0.376
## F-statistic: 28.12 on 2 and 88 DF, p-value: 3.61e-10
summary(lm(edits~rt + likable, k))
##
## Call:
## lm(formula = edits ~ rt + likable, data = k)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3334 -0.7081 -0.1808 0.5999 5.3688
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.68094 1.48172 7.883 8.01e-12 ***
## rt -1.09486 0.18413 -5.946 5.43e-08 ***
## likable -0.15824 0.07338 -2.156 0.0338 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.206 on 88 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3166, Adjusted R-squared: 0.3011
## F-statistic: 20.39 on 2 and 88 DF, p-value: 5.313e-08
summary(lm(prop_correct~rt + likable, k))
##
## Call:
## lm(formula = prop_correct ~ rt + likable, data = k)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35162 -0.17901 -0.03374 0.10872 0.73278
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.52598 0.27764 -5.496 3.66e-07 ***
## rt 0.20454 0.03455 5.920 5.93e-08 ***
## likable 0.04129 0.01394 2.963 0.00391 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2291 on 89 degrees of freedom
## Multiple R-squared: 0.3356, Adjusted R-squared: 0.3207
## F-statistic: 22.48 on 2 and 89 DF, p-value: 1.25e-08