# trupol_ana_preprocess
rm(list=ls())
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(langcog)
## 
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
## 
##     scale
#source("/Users/ericang/Documents/Research/trupol/data/version 1/helper/useful.R")

raw.data.path <- "/Users/ericang/Documents/Research/trupol_GIT/experiment/adult/production-results/"

## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")

for (file.name in files) {
  
  ## these are the two functions that are most meaningful
  json_file <- readLines(paste(raw.data.path,file.name,sep=""))
  json_file_str = paste(json_file, collapse = "")
  json_file_str = gsub(",}", "}", json_file_str)
  jso = jsonlite::fromJSON(json_file_str)
  jso$answers$data$people <- NULL
  jso1 <- data.frame(jso)
  jso1$subid <- substring(file.name, 1, 6)
  
  ## now here's where data get bound together
  all.data <- rbind(all.data, jso1)
}

d <- all.data %>%
  select(subid, answers.data.age, answers.data.order, answers.data.cond, answers.data.practice1_mean, answers.data.practice1_nice, answers.data.practice2_mean, answers.data.practice2_nice, answers.data.practice3_truth, answers.data.practice4_truth, answers.data.trial1_2_play, answers.data.trial3_4_play, answers.data.trial1_nice, answers.data.trial2_nice, answers.data.trial3_nice, answers.data.trial4_nice, answers.data.trial1_mean, answers.data.trial2_mean, answers.data.trial3_mean, answers.data.trial4_mean, answers.data.trial1_truth, answers.data.trial2_truth, answers.data.trial3_truth, answers.data.trial4_truth) %>%
  filter(answers.data.practice1_mean == "0" &
           answers.data.practice1_nice == "1" &
           answers.data.practice2_mean == "1" &
           answers.data.practice2_nice == "0" &
           answers.data.practice3_truth == "1" &
           answers.data.practice4_truth == "0") %>%
  mutate(subid = as.factor(subid),
         order = as.factor(answers.data.order),
         cond = as.factor(answers.data.cond),
         age = as.factor(answers.data.age),
         trial1_2_play = as.factor(answers.data.trial1_2_play),
         trial3_4_play = as.factor(answers.data.trial3_4_play),
         trial1_nice = as.factor(answers.data.trial1_nice),
         trial2_nice = as.factor(answers.data.trial2_nice),
         trial3_nice = as.factor(answers.data.trial3_nice),
         trial4_nice = as.factor(answers.data.trial4_nice),
         trial1_mean = as.factor(answers.data.trial1_mean),
         trial2_mean = as.factor(answers.data.trial2_mean),
         trial3_mean = as.factor(answers.data.trial3_mean),
         trial4_mean = as.factor(answers.data.trial4_mean),
         trial1_truth = as.factor(answers.data.trial1_truth),
         trial2_truth = as.factor(answers.data.trial2_truth),
         trial3_truth = as.factor(answers.data.trial3_truth),
         trial4_truth = as.factor(answers.data.trial4_truth)
         ) %>%
  select(subid, age, cond, order, trial1_2_play, trial3_4_play, trial1_nice, trial2_nice, trial3_nice, trial4_nice, trial1_mean, trial2_mean, trial3_mean, trial4_mean, trial1_truth, trial2_truth, trial3_truth, trial4_truth)

# reshape data
d <- d %>%
  gather("q", "answer", 5:18)

# add columns to categorize vars
#levels(d[d$cond == 2,]$q) <- c("trial3_4_play", "trial1_2_play", "trial3_nice",   "trial4_nice",   "trial1_nice",  
#"trial2_nice",   "trial3_mean",   "trial4_mean",   "trial1_mean",   "trial2_mean",  
#"trial3_truth",  "trial4_truth",  "trial1_truth",  "trial2_truth" )

d1_0 <- d %>%
  filter(order == "1") %>%
  mutate(
#    ifelse(cond == 1, 
       polite = factor(substring(q, 1, 8),
                         levels = c("trial1_2", "trial3_4", 
                                    "trial1_n", "trial2_n", "trial3_n", "trial4_n",
                                    "trial1_m", "trial2_m", "trial3_m", "trial4_m",
                                    "trial1_t", "trial2_t", "trial3_t", "trial4_t"),
                         labels = c("NA", "NA",
                                    "honest", "polite", "polite", "honest",
                                    "honest", "polite", "polite", "honest",
                                    "honest", "polite", "polite", "honest")),
         q_kind = factor(substring(q, 8, 10),
                         levels = c("2_p", "4_p",
                                    "nic", "mea", "tru"),
                         labels = c("play", "play",
                                    "niceness", "meanness", "truth-telling")))
d1_1 <- d %>%
  filter(order == "2") %>%
  mutate(
#    ifelse(cond == 1, 
       polite = factor(substring(q, 1, 8),
                         levels = c("trial1_2", "trial3_4", 
                                    "trial1_n", "trial2_n", "trial3_n", "trial4_n",
                                    "trial1_m", "trial2_m", "trial3_m", "trial4_m",
                                    "trial1_t", "trial2_t", "trial3_t", "trial4_t"),
                         labels = c("NA", "NA",
                                    "polite", "honest","honest", "polite", 
                                    "polite", "honest","honest", "polite",
                                    "polite", "honest","honest", "polite")),
         q_kind = factor(substring(q, 8, 10),
                         levels = c("2_p", "4_p",
                                    "nic", "mea", "tru"),
                         labels = c("play", "play",
                                    "niceness", "meanness", "truth-telling")))

d1 <- rbind(d1_0, d1_1)

d1$age <- as.factor(as.character(d1$age))
d1$polite <- as.factor(as.character(d1$polite))
d1$q_kind <- as.factor(as.character(d1$q_kind))
d1$answer <- as.factor(as.character(d1$answer))
levels(d1$answer) <- c("0", "1", "1", "0") # honest coded as '1'
d1$answer <- as.numeric(as.character(d1$answer))

correct responses on eval and play questions

play: “who do you want to play with?”

# plot: eval and play
ms <- d1 %>%
  filter(q_kind == "play") %>%
  group_by(cond, subid) %>%
  summarise(
    answer = mean(answer)
  ) %>%
  group_by(cond) %>%
  multi_boot_standard(column = "answer") %>%
  mutate(answer = mean)
## Joining by: "cond"

barplot:

x-axis: cond y-axis: answer; 0 = incorrect, 1 = correct

levels(ms$cond) <- c("experimental", "control")
ms$cond <- relevel(ms$cond, ref="control")

p <- ggplot(ms, aes(x=cond, y=answer, fill=cond))
p + 
  geom_bar(position = "dodge", stat = "identity") +
  geom_errorbar(aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
  ggtitle("Proportion saying 'honest speaker' on play questions")

nice/mean/truth-telling rating

# plot: niceness
ms <- d1 %>%
  filter(q_kind == "niceness" | q_kind == "meanness" | q_kind == "truth-telling") %>%
  group_by(cond, polite, q_kind, subid) %>%
  summarize(
    answer = mean(answer)
  ) %>%
  group_by(cond, polite, q_kind) %>%
  multi_boot_standard(column = "answer") %>%
  mutate(answer = mean)
## Joining by: c("cond", "polite", "q_kind")

barplot

levels(ms$cond) <- c("experimental", "control")
ms$cond <- relevel(ms$cond, ref="control")
p <- ggplot(subset(ms, answer!="NA"), 
            aes(x=cond, y=answer, fill=polite))
p + 
  geom_bar(position=position_dodge(), stat = "identity") +
  facet_grid(.~q_kind) +
  geom_errorbar(position=position_dodge(.9), aes(ymin=ci_lower,ymax=ci_upper,width=.1)) +
  ggtitle("Proportion \"yes\"")