Load libraries

library(dplyr)
library(langcog)
library(tidyr)
library(magrittr)
library(lme4)
library(lmerTest)
library(jsonlite)
library(dplyr)
library(tidyr)
library(ggplot2)
library(readr)
library(jsonlite)

na.mean <- function(x){mean(x, na.rm = TRUE)}

Load child data

#######################################################################
############################ LOADING FILES ############################
#######################################################################
# grab all file names from data dir
child.files <- list.files(path = "experiment_nicolette/data/", 
                            pattern = 'results_*', all.files = FALSE)

child.demos <- read_csv('experiment_nicolette/data/noisy_replication_demos.csv') %>%
  filter(Data==TRUE)

child.overwrite <- read_csv('experiment_nicolette/data/overwrite.csv')

# function for reading one child's data
read.child <- function(filename) {
  #read raw data and break up by html lists
  data <- readLines(paste0("experiment_nicolette/data/",filename),warn=FALSE)
  data <- unlist(strsplit(data,'<li>'))
  data<- data[3:194]
  
  # clean up the data, getting rid of html escape characters, etc.
  data <- gsub("\\\\\"","",data)
  data <- gsub("</li>","",data)
  data <- gsub("</ul>\\},\\{<ul>","",data)
  data <- gsub("\"","",data)
  data <- gsub("</ul>}]</ul>}","",data)
  data <- gsub("{<ul>","",data,fixed=TRUE)
    
  # munge
  split.data <- strsplit(data,": ")
  split.frame <- data.frame(t(sapply(split.data,unlist)))
  names(split.frame) <- c("measure", "value")
  split.frame$subj <- filename
  split.frame$trial <- sort(rep(1:16,12))
  split.frame$order = split.frame$value[2]
return(split.frame)
}  

code.literal <- function(value,trial.type,condition) {
  
  literal = rep(0,length(value))
  
  literal[trial.type == "Training" & value == "Y"] <- 1
  literal[condition == "Control" & value == "Y"] <- 1
  literal[condition != "Control" & trial.type == "Test" & value != "Y"] <- 1
  
  return(literal)
}


child.long.data <- bind_rows(lapply(child.files,read.child)) %>%
  filter(measure == "response") %>%
  rowwise() %>%
  mutate(condition = if(order == "0") "Normal"
         else if(order == "1") "Implausible"
         else if(order == "2") "Control"
         else if(order == "3") "No Noise Normal"
         else "No Noise Implausible") 

child.demos$subj = unique(child.long.data$subj)

child.overwrite.data <- left_join(child.long.data,child.demos)

for(row in nrow(child.overwrite)) {
  child.overwrite.data[child.overwrite.data$subj == child.overwrite[row,]$subj &
                         child.overwrite.data$trial == child.overwrite[row,]$trial,
                       ]$value = child.overwrite[row,]$response
}

child.overwrite.data %<>%
  ungroup() %>%
  mutate(condition = factor(condition,
                            levels = c("Normal", "Implausible", 
                                       "Control", "No Noise Normal",
                                       "No Noise Implausible")),
         trial.type = factor(ifelse(trial <= 8, "Training", "Test"))) %>%
  filter(age >= 4, age <= 6, Exclude == FALSE) %>%
  mutate(Literal = code.literal(value,trial.type,condition)) %>%
  mutate(experiment = "replication") %>%
  select(experiment,subj,age,condition,trial.type,Literal)

Load original child data

original.child.data <- read.csv('sarah_honors_data_updated_5-11.csv')

original.child.long.data <- gather(original.child.data,item,
                                   response,cat_kittens:pen) %>%
  rename(condition = cond, subj = subj_id) %>%
  mutate(condition = factor(condition,levels=c("normal","implausible"),
                            labels = c("Normal", "Implausible"))) %>%
  group_by(condition,item,subj) %>%
  mutate(trial.type = ifelse(item %in% c("cat_kittens","book_table","shark_fish",
                                         "wooden_blocks", "flowers_basket",
                                         "house_door","bread_peanutbutter",
                                         "knife_fork"), 
                             "Training", "Test")) %>%
  rowwise() %>%
  mutate(Literal = ifelse(trial.type == "Training", 1 - response, response)) %>% 
  filter(age >= 4, age <= 6) %>%
  mutate(experiment = "original") %>%
  ungroup() %>%
  select(experiment, subj, age, condition,trial.type, Literal) 

all.child.long.data <- bind_rows(child.overwrite.data,original.child.long.data) %>%
  group_by(experiment,condition, trial.type) %>%
  ungroup() %>%
  mutate(trial.type = factor(trial.type, levels = c("Training", "Test"))) %>%
  group_by(experiment,condition,trial.type)

Load Adult data

all.adult.files <- paste0("adult_data/",
                          list.files(path = "adult_data/", pattern = '*.json', 
                                     all.files = FALSE))
jsons <- lapply(all.adult.files,fromJSON)
workers <- sapply(jsons,function(x) x$WorkerId)
adult.data <- bind_rows(sapply(jsons,function(x) x$answers$data,simplify=FALSE))

adult.data$worker <- unlist(lapply(workers, function (x) rep(x,15)))

Munge adult data

#Exclude multiple hits from a single participant
firstHits <- adult.data %>%
  group_by(worker) %>%
  summarise(timeStamp = min(timestamp))

adult.long.data <- inner_join(adult.data,firstHits) %>%
  ungroup() %>%
  rename(condition = order, subj = worker) %>%
  mutate(response = ifelse(response == "Y",TRUE,FALSE),
         condition = ifelse(condition == 0,"Normal","Implausible"),
         trial.type = ifelse(trialnum > 8, "Test", "Training"),
         experiment = "adult", age = NA) %>%
  mutate(condition = factor(condition,levels = c("Normal", "Implausible"))) %>%
  rename(Literal = response) %>%
  select(experiment, subj, age, condition, trial.type, Literal)

Analyze child data

all.long.data <- bind_rows(all.child.long.data,adult.long.data) %>%
  mutate(trial.type = factor(trial.type, levels = c("Training", "Test"))) %>%
  mutate(condition = factor(condition, levels = c("Control", "Normal", 
                                                  "Implausible", "No Noise Normal",
                                                  "No Noise Implausible"))) %>%
  group_by(experiment, condition, trial.type)

ns <- all.long.data %>% 
  ungroup() %>% 
  select(experiment,trial.type,condition,subj) %>% 
  distinct() %>% 
  group_by(experiment,condition, trial.type) %>% 
  summarise(n = n())

kable(filter(ns, trial.type == "Test"))
experiment condition trial.type n
adult Normal Test 27
adult Implausible Test 23
original Normal Test 23
original Implausible Test 20
replication Control Test 20
replication Normal Test 26
replication Implausible Test 24
replication No Noise Normal Test 7
replication No Noise Implausible Test 5
all.results <- multi_boot(all.long.data, column="Literal", 
                          summary_function = "na.mean",
                          statistics_functions = c("ci_lower","ci_upper")) %>%
  left_join(summarise(all.long.data,Literal = na.mean(Literal))) %>%
  left_join(ns) %>%
  ungroup() %>%
  mutate(condition = factor(condition, levels = c("Control", "Normal", 
                                                  "No Noise Normal", "Implausible",
                                                  "No Noise Implausible"),
                             labels = c("Control", "Normal","No Noise\nNormal",
                                                  "Implausible", 
                                        "No Noise\nImplausible")))

Plot

ggplot(all.results, aes(x=condition, y=Literal,fill=trial.type)) +
  geom_bar(stat="identity",position=position_dodge(1))+
  facet_grid(experiment ~ trial.type) +
  geom_linerange(aes(ymin = ci_lower,
                      ymax = ci_upper),
                  size = .8,
                  show_guide = FALSE,
                 position=position_dodge(1)) +
  scale_fill_brewer(palette="Set1") +
  geom_hline(aes(yintercept=.5),lty=2)+
  theme_bw(base_size=14) +
  theme(legend.position="none", panel.grid=element_blank()) +
  scale_x_discrete(name = "Condition")+
  scale_y_continuous(name = "Proportion choosing Literal",
                     limits=c(0,1))