load and define things

preprocess

#Pre-process from database          
#setwd('/Users/Allison/Documents/likeSpeak/experiment/psiturk_likespeak/')
setwd('/Documents/GRADUATE_SCHOOL/Projects/likeSpeak/experiment/psiturk_likespeak/')
db_name = "MGEtest2.db"
table_name = "RCI5"

sqlite    <- RSQLite::SQLite()
exampledb <- dbConnect(sqlite, db_name)
db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, " WHERE status = 4", sep = ""))
#db_query = dbGetQuery(exampledb, paste("SELECT datastring FROM ", table_name, sep = ""))

d = data.frame()
for (i in 12:dim(db_query)[1]){
  if (!is.na(db_query$datastring[i])) {
    
    rthing = fromJSON(db_query$datastring[i])  # get datastring to r object
    
    # get trial data 
    k = rthing$data['trialdata']$trialdata
    k = k[k$phase != "INSTRUCTIONS",]
    
    # add participant info
    k$workerID = rthing$workerId
    k$hitId = rthing$hitId
    k$parentID = rthing$questiondata$parentID
    k$gen = rthing$questiondata$gen
    k$chain = rthing$questiondata$chain
    d = rbind(d, k)
  }
}

# drop weirdo columns
drops <- c("templates","template", "action")
d = d[,!(names(d) %in% drops)]

# make stuff factors
factor_cols <- names(d)[c(-9:-12, -16:-17)] 
numeric_cols <- names(d)[16:17] 
d[factor_cols] <- lapply(d[factor_cols], as.factor) 
d[numeric_cols] <- lapply(d[numeric_cols], as.numeric)

Make all possible condition groups

d$groupCompare = as.factor(ifelse(d$particEstimator == "Overestimator" & d$partnerEstimator == "Overestimator", "OO",
                                ifelse(d$particEstimator == "Overestimator" & d$partnerEstimator == "Underestimator", "OU",
                                       ifelse(d$particEstimator == "Underestimator" & d$partnerEstimator == "Underestimator", "UU","UO"))))

Drop participants who missed social manipulation check (both self and partner)

d = d %>%
    group_by(workerID) %>%
      mutate(self_guess_accuracy = self_guess_accuracy[!is.na(partner_guess_accuracy)],
             partner_guess_accuracy = partner_guess_accuracy[!is.na(partner_guess_accuracy)]) %>%
      filter(self_guess_accuracy != "incorrect" & partner_guess_accuracy != "incorrect")

Accuracy by condition

acc = d %>%
        group_by(workerID, condition)  %>%
        filter(!is.na(accuracy))  %>%
        mutate(correct = length(which(accuracy == "correct"))) %>%
        summarise(prop_correct = correct[1]/8) %>%
        group_by(condition) %>%
        multi_boot(column="prop_correct",
                                summary_groups = c("condition"),
             statistics_functions = c("mean", "ci_lower","ci_upper"))


ggplot(acc, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,1) +
  ggtitle("Accuracy") +
  themeML 

Likeability ratings

ggplot(d[!is.na(d$likableRating),], aes(x=as.factor(likableRating), fill = condition)) +
  geom_bar(position="dodge") +
  ggtitle("Likeability") +
  xlab("rating") +
  ylim(0,10) +
  themeML                 

Likeability ratings by condition

likable = d %>%
  filter(phase == "QUESTIONNAIRE" & !is.na(likableRating)) %>%
  group_by(condition) %>%
  multi_boot(column="likableRating",
            summary_groups = c("condition"),
             statistics_functions = c("mean", "ci_lower","ci_upper"))

ggplot(likable, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("Likeability") +
  ylim(0,10) +
  themeML          

Workability ratings

ggplot(d[!is.na(d$howWellRating),], aes(x=as.factor(howWellRating), fill = condition)) +
  geom_bar(position="dodge") +  
  ggtitle("How well worked together") +
  xlab("rating") +
  ylim(0,10) +
  themeML                 

Workability ratings by condition

workable = d %>%
  filter(phase == "QUESTIONNAIRE" & !is.na(howWellRating)) %>%
  group_by(condition) %>%
  multi_boot(column="howWellRating",
            summary_groups = c("condition"),
             statistics_functions = c("mean", "ci_lower","ci_upper"))

ggplot(workable, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ggtitle("How well worked together") +
  ylim(0,10) +
  themeML                 

Edit distances

for (i in 1:dim(d)[1]){
      d$lev2[i] = adist(d$word[i], d$guessedLabel[i])
      d$ins[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = TRUE), "counts"))[1]
      d$del[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = TRUE), "counts"))[2]
      d$sub[i] = drop(attr(adist(d$word[i], d$guessedLabel[i], counts = TRUE), "counts"))[3]

}

ggplot(d, aes(x=lev2, fill = condition)) +
  geom_bar(position="dodge", binwidth = 1) +  
  ggtitle("Edit distance") +
  xlab("edit distance") +
  themeML  

ggplot(d, aes(x=ins, fill = condition)) +
  geom_bar(position="dodge") +  
  ggtitle("insertions") +
  xlab("insertions") +
  themeML  
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

ggplot(d, aes(x=del, fill = condition)) +
  geom_bar(position="dodge", binwidth= 1) +  
  ggtitle("deletions") +
  xlab("deletions") +
  themeML  

ggplot(d, aes(x=sub, fill = condition)) +
  geom_bar(position="dodge", binwidth = 1) +  
  ggtitle("substitutions") +
  xlab("subsitutions") +
  themeML  

led = d %>%
        group_by(condition)  %>%
        filter(phase == "TEST" & !is.na(lev2))  %>%
        multi_boot(column="lev2",
                                summary_groups = c("condition"),
             statistics_functions = c("mean", "ci_lower","ci_upper"))
        
ggplot(led, aes(y=mean, x = condition)) +
  geom_bar(position="dodge", stat="identity", fill = "red") +  
  geom_errorbar(aes(ymin = ci_lower, ymax= ci_upper), 
                width=0.2, position="dodge") +
  ylim(0,6) +
  ggtitle("Levenshtein edit distance") +
  themeML 

Look at all words

print.data.frame(d[d$phase == "TEST",c("workerID", "word", "guessedLabel")])
##          workerID  word guessedLabel
## 1  A34DW7VIU89YW8 gabip        bapim
## 2  A34DW7VIU89YW8 panur        banur
## 3  A34DW7VIU89YW8 bipim        mogup
## 4  A34DW7VIU89YW8 banop        banur
## 5  A34DW7VIU89YW8 dabax         agax
## 6  A34DW7VIU89YW8 gipim        banir
## 7  A34DW7VIU89YW8 mogup        mogup
## 8  A34DW7VIU89YW8 dapag         agax
## 9  A251BVRUXN0QRW godax        gogax
## 10 A251BVRUXN0QRW gumig        gigup
## 11 A251BVRUXN0QRW tidut        tidax
## 12 A251BVRUXN0QRW motud        mutox
## 13 A251BVRUXN0QRW mumog        gogax
## 14 A251BVRUXN0QRW ginop          dix
## 15 A251BVRUXN0QRW godop       gugong
## 16 A251BVRUXN0QRW gadut       didong
## 17 A38HODAQUKJKKS nipag        nitud
## 18 A38HODAQUKJKKS putud        gobup
## 19 A38HODAQUKJKKS gidop        nitud
## 20 A38HODAQUKJKKS gopag       prodog
## 21 A38HODAQUKJKKS mubup        gobup
## 22 A38HODAQUKJKKS nitud        nitad
## 23 A38HODAQUKJKKS gobup         bepo
## 24 A38HODAQUKJKKS nidut        nidug
## 25  AKLV0WIZZ356X topum        dunop
## 26  AKLV0WIZZ356X bidop        kimag
## 27  AKLV0WIZZ356X kidut        dunno
## 28  AKLV0WIZZ356X dunid     tallyhup
## 29  AKLV0WIZZ356X nupag       zortac
## 30  AKLV0WIZZ356X kimig        kimdo
## 31  AKLV0WIZZ356X gamog       whokno
## 32  AKLV0WIZZ356X gotob    scallywag
## 33  AWVIOLZUKBNVU pimup        pimup
## 34  AWVIOLZUKBNVU momup        momog
## 35  AWVIOLZUKBNVU kunad        kunad
## 36  AWVIOLZUKBNVU tukug        babip
## 37  AWVIOLZUKBNVU pigup        limup
## 38  AWVIOLZUKBNVU donad        donad
## 39  AWVIOLZUKBNVU dabip        dabip
## 40  AWVIOLZUKBNVU dapag        cabip
## 41  AOPG07J95DDJT bapag        pabig
## 42  AOPG07J95DDJT gigog        gutok
## 43  AOPG07J95DDJT bagir         pibu
## 44  AOPG07J95DDJT gagir        gitub
## 45  AOPG07J95DDJT datib        datuk
## 46  AOPG07J95DDJT gutud        pepbu
## 47  AOPG07J95DDJT tibax        tebig
## 48  AOPG07J95DDJT bubip        sobig
## 49 A1KNXUNWWOK553 putib        dapud
## 50 A1KNXUNWWOK553 dabup        kupag
## 51 A1KNXUNWWOK553 nikug        nikur
## 52 A1KNXUNWWOK553 pabax        dutad
## 53 A1KNXUNWWOK553 kupag        kutog
## 54 A1KNXUNWWOK553 kinur        dinur
## 55 A1KNXUNWWOK553 dutud       kuplag
## 56 A1KNXUNWWOK553 dagog        yugog
## 57 A3L1VB6K50WQ44 kunid        kitob
## 58 A3L1VB6K50WQ44 gotob        gotob
## 59 A3L1VB6K50WQ44 budop        mobit
## 60 A3L1VB6K50WQ44 migog        mogog
## 61 A3L1VB6K50WQ44 mipim        mitig
## 62 A3L1VB6K50WQ44 bipag        kotob
## 63 A3L1VB6K50WQ44 kigup        urbit
## 64 A3L1VB6K50WQ44 nigup        kotib
## 65  ADDIGX3PJ5CA8 pupim        pamig
## 66  ADDIGX3PJ5CA8 monur        pumac
## 67  ADDIGX3PJ5CA8 kidut         ligf
## 68  ADDIGX3PJ5CA8 pamig        pummy
## 69  ADDIGX3PJ5CA8 pigup        patuc
## 70  ADDIGX3PJ5CA8 pubax        pumig
## 71  ADDIGX3PJ5CA8 gutib        puion
## 72  ADDIGX3PJ5CA8 nitob         pujj