load and define things
preprocess
#Pre-process from database
#setwd('/Users/Allison/Documents/likeSpeak/experiment/psiturk_likespeak/')
setwd('/Documents/GRADUATE_SCHOOL/Projects/likeSpeak/experiment/psiturk_likespeak/')
db_name = "MGEtest2.db"
table_name = "RCI5"
sqlite <- RSQLite::SQLite()
exampledb <- dbConnect(sqlite, db_name)
db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, " WHERE status = 4", sep = ""))
#db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, sep = ""))
d = data.frame()
for (i in 12:dim(db_query)[1]){
if (!is.na(db_query$datastring[i])) {
rthing = fromJSON(db_query$datastring[i]) # get datastring to r object
# get trial data
k = rthing$data['trialdata']$trialdata
k = k[k$phase != "INSTRUCTIONS",]
# add participant info
k$workerID = rthing$workerId
k$hitId = rthing$hitId
k$parentID = rthing$questiondata$parentID
k$gen = rthing$questiondata$gen
k$chain = rthing$questiondata$chain
d = rbind(d, k)
}
}
# drop weirdo columns
drops <- c("templates","template", "action")
d = d[,!(names(d) %in% drops)]
# make stuff factors
factor_cols <- names(d)[c(-9:-12, -16:-17)]
numeric_cols <- names(d)[16:17]
d[factor_cols] <- lapply(d[factor_cols], as.factor)
d[numeric_cols] <- lapply(d[numeric_cols], as.numeric)
Make all possible condition groups
d$groupCompare = as.factor(ifelse(d$particEstimator == "Overestimator" & d$partnerEstimator == "Overestimator", "OO",
ifelse(d$particEstimator == "Overestimator" & d$partnerEstimator == "Underestimator", "OU",
ifelse(d$particEstimator == "Underestimator" & d$partnerEstimator == "Underestimator", "UU","UO"))))
Drop participants who missed social manipulation check (both self and partner)
d = d %>%
group_by(workerID) %>%
mutate(self_guess_accuracy = self_guess_accuracy[!is.na(partner_guess_accuracy)],
partner_guess_accuracy = partner_guess_accuracy[!is.na(partner_guess_accuracy)]) %>%
filter(self_guess_accuracy != "incorrect" & partner_guess_accuracy != "incorrect")
Accuracy by condition
acc = d %>%
group_by(workerID, condition) %>%
filter(!is.na(accuracy)) %>%
mutate(correct = length(which(accuracy == "correct"))) %>%
summarise(prop_correct = correct[1]/8) %>%
group_by(condition) %>%
multi_boot(column="prop_correct",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(acc, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,1) +
ggtitle("Accuracy") +
themeML
Likeability ratings
ggplot(d[!is.na(d$likableRating),], aes(x=as.factor(likableRating), fill = condition)) +
geom_bar(position="dodge") +
ggtitle("Likeability") +
xlab("rating") +
ylim(0,10) +
themeML
Likeability ratings by condition
likable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(likableRating)) %>%
group_by(condition) %>%
multi_boot(column="likableRating",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(likable, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("Likeability") +
ylim(0,10) +
themeML
Workability ratings
ggplot(d[!is.na(d$howWellRating),], aes(x=as.factor(howWellRating), fill = condition)) +
geom_bar(position="dodge") +
ggtitle("How well worked together") +
xlab("rating") +
ylim(0,10) +
themeML
Workability ratings by condition
workable = d %>%
filter(phase == "QUESTIONNAIRE" & !is.na(howWellRating)) %>%
group_by(condition) %>%
multi_boot(column="howWellRating",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(workable, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ggtitle("How well worked together") +
ylim(0,10) +
themeML
Edit distances
for (i in 1:dim(d)[1]){
d$lev2[i] = adist(d$word[i], d$guessedLabel[i])
d$ins[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = TRUE), "counts"))[1]
d$del[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = TRUE), "counts"))[2]
d$sub[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = TRUE), "counts"))[3]
}
ggplot(d, aes(x=lev2, fill = condition)) +
geom_bar(position="dodge", binwidth = 1) +
ggtitle("Edit distance") +
xlab("edit distance") +
themeML
ggplot(d, aes(x=ins, fill = condition)) +
geom_bar(position="dodge") +
ggtitle("insertions") +
xlab("insertions") +
themeML
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(d, aes(x=del, fill = condition)) +
geom_bar(position="dodge", binwidth= 1) +
ggtitle("deletions") +
xlab("deletions") +
themeML
ggplot(d, aes(x=sub, fill = condition)) +
geom_bar(position="dodge", binwidth = 1) +
ggtitle("substitutions") +
xlab("subsitutions") +
themeML
led = d %>%
group_by(condition) %>%
filter(phase == "TEST" & !is.na(lev2)) %>%
multi_boot(column="lev2",
summary_groups = c("condition"),
statistics_functions = c("mean", "ci_lower","ci_upper"))
ggplot(led, aes(y=mean, x = condition)) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper),
width=0.2, position="dodge") +
ylim(0,6) +
ggtitle("Levenshtein edit distance") +
themeML
Look at all words
print.data.frame(d[d$phase == "TEST",c("workerID", "word", "guessedLabel")])
## workerID word guessedLabel
## 1 A34DW7VIU89YW8 gabip bapim
## 2 A34DW7VIU89YW8 panur banur
## 3 A34DW7VIU89YW8 bipim mogup
## 4 A34DW7VIU89YW8 banop banur
## 5 A34DW7VIU89YW8 dabax agax
## 6 A34DW7VIU89YW8 gipim banir
## 7 A34DW7VIU89YW8 mogup mogup
## 8 A34DW7VIU89YW8 dapag agax
## 9 A251BVRUXN0QRW godax gogax
## 10 A251BVRUXN0QRW gumig gigup
## 11 A251BVRUXN0QRW tidut tidax
## 12 A251BVRUXN0QRW motud mutox
## 13 A251BVRUXN0QRW mumog gogax
## 14 A251BVRUXN0QRW ginop dix
## 15 A251BVRUXN0QRW godop gugong
## 16 A251BVRUXN0QRW gadut didong
## 17 A38HODAQUKJKKS nipag nitud
## 18 A38HODAQUKJKKS putud gobup
## 19 A38HODAQUKJKKS gidop nitud
## 20 A38HODAQUKJKKS gopag prodog
## 21 A38HODAQUKJKKS mubup gobup
## 22 A38HODAQUKJKKS nitud nitad
## 23 A38HODAQUKJKKS gobup bepo
## 24 A38HODAQUKJKKS nidut nidug
## 25 AKLV0WIZZ356X topum dunop
## 26 AKLV0WIZZ356X bidop kimag
## 27 AKLV0WIZZ356X kidut dunno
## 28 AKLV0WIZZ356X dunid tallyhup
## 29 AKLV0WIZZ356X nupag zortac
## 30 AKLV0WIZZ356X kimig kimdo
## 31 AKLV0WIZZ356X gamog whokno
## 32 AKLV0WIZZ356X gotob scallywag
## 33 AWVIOLZUKBNVU pimup pimup
## 34 AWVIOLZUKBNVU momup momog
## 35 AWVIOLZUKBNVU kunad kunad
## 36 AWVIOLZUKBNVU tukug babip
## 37 AWVIOLZUKBNVU pigup limup
## 38 AWVIOLZUKBNVU donad donad
## 39 AWVIOLZUKBNVU dabip dabip
## 40 AWVIOLZUKBNVU dapag cabip
## 41 AOPG07J95DDJT bapag pabig
## 42 AOPG07J95DDJT gigog gutok
## 43 AOPG07J95DDJT bagir pibu
## 44 AOPG07J95DDJT gagir gitub
## 45 AOPG07J95DDJT datib datuk
## 46 AOPG07J95DDJT gutud pepbu
## 47 AOPG07J95DDJT tibax tebig
## 48 AOPG07J95DDJT bubip sobig
## 49 A1KNXUNWWOK553 putib dapud
## 50 A1KNXUNWWOK553 dabup kupag
## 51 A1KNXUNWWOK553 nikug nikur
## 52 A1KNXUNWWOK553 pabax dutad
## 53 A1KNXUNWWOK553 kupag kutog
## 54 A1KNXUNWWOK553 kinur dinur
## 55 A1KNXUNWWOK553 dutud kuplag
## 56 A1KNXUNWWOK553 dagog yugog
## 57 A3L1VB6K50WQ44 kunid kitob
## 58 A3L1VB6K50WQ44 gotob gotob
## 59 A3L1VB6K50WQ44 budop mobit
## 60 A3L1VB6K50WQ44 migog mogog
## 61 A3L1VB6K50WQ44 mipim mitig
## 62 A3L1VB6K50WQ44 bipag kotob
## 63 A3L1VB6K50WQ44 kigup urbit
## 64 A3L1VB6K50WQ44 nigup kotib
## 65 ADDIGX3PJ5CA8 pupim pamig
## 66 ADDIGX3PJ5CA8 monur pumac
## 67 ADDIGX3PJ5CA8 kidut ligf
## 68 ADDIGX3PJ5CA8 pamig pummy
## 69 ADDIGX3PJ5CA8 pigup patuc
## 70 ADDIGX3PJ5CA8 pubax pumig
## 71 ADDIGX3PJ5CA8 gutib puion
## 72 ADDIGX3PJ5CA8 nitob pujj