Noisy Kids Replication Analysis

Load libraries

library(dplyr)
library(langcog)
library(tidyr)
library(magrittr)
library(lme4)
library(lmerTest)
library(jsonlite)
library(dplyr)
library(tidyr)
library(ggplot2)
library(readr)
library(jsonlite)

na.mean <- function(x){mean(x, na.rm = TRUE)}

Load child data

#######################################################################
############################ LOADING FILES ############################
#######################################################################
# grab all file names from data dir
child.files <- list.files(path = "experiment_nicolette/data/", 
                            pattern = 'results_*', all.files = FALSE)

child.demos <- read_csv('experiment_nicolette/data/noisy_replication_demos.csv') %>%
  filter(Data==TRUE)

child.overwrite <- read_csv('experiment_nicolette/data/overwrite.csv')

# function for reading one child's data
read.child <- function(filename) {
  #read raw data and break up by html lists
  data <- readLines(paste0("experiment_nicolette/data/",filename),warn=FALSE)
  data <- unlist(strsplit(data,'<li>'))
  data<- data[3:194]
  
  # clean up the data, getting rid of html escape characters, etc.
  data <- gsub("\\\\\"","",data)
  data <- gsub("</li>","",data)
  data <- gsub("</ul>\\},\\{<ul>","",data)
  data <- gsub("\"","",data)
  data <- gsub("</ul>}]</ul>}","",data)
  data <- gsub("{<ul>","",data,fixed=TRUE)
    
  # munge
  split.data <- strsplit(data,": ")
  split.frame <- data.frame(t(sapply(split.data,unlist)))
  names(split.frame) <- c("measure", "value")
  split.frame$subj <- filename
  split.frame$trial <- sort(rep(1:16,12))
  split.frame$order = split.frame$value[2]
return(split.frame)
}  

code.literal <- function(value,trial.type,condition) {
  
  literal = rep(0,length(value))
  
  literal[trial.type == "Training" & value == "Y"] <- 1
  literal[condition == "Control" & value == "Y"] <- 1
  literal[condition != "Control" & trial.type == "Test" & value != "Y"] <- 1
  
  return(literal)
}


child.long.data <- bind_rows(lapply(child.files,read.child)) %>%
  filter(measure == "response") %>%
  rowwise() %>%
  mutate(condition = if(order == "0") "Normal"
         else if(order == "1") "Implausible"
         else if(order == "2") "Control"
         else if(order == "3") "No Noise Normal"
         else "No Noise Implausible") 

child.demos$subj = unique(child.long.data$subj)

child.overwrite.data <- left_join(child.long.data,child.demos)

for(row in nrow(child.overwrite)) {
  child.overwrite.data[child.overwrite.data$subj == child.overwrite[row,]$subj &
                         child.overwrite.data$trial == child.overwrite[row,]$trial,
                       ]$value = child.overwrite[row,]$response
}

child.overwrite.data %<>%
  ungroup() %>%
  mutate(condition = factor(condition,
                            levels = c("Normal", "Implausible", 
                                       "Control", "No Noise Normal",
                                       "No Noise Implausible")),
         trial.type = factor(ifelse(trial <= 8, "Training", "Test"))) %>%
  filter(age >= 4, age <= 6, Exclude == FALSE) %>%
  mutate(Literal = code.literal(value,trial.type,condition)) %>%
  mutate(experiment = "replication") %>%
  select(experiment,subj,age,condition,trial.type,Literal)

Load original child data

original.child.data <- read.csv('sarah_honors_data_updated_5-11.csv')

original.child.long.data <- gather(original.child.data,item,
                                   response,cat_kittens:pen) %>%
  rename(condition = cond, subj = subj_id) %>%
  mutate(condition = factor(condition,levels=c("normal","implausible"),
                            labels = c("Normal", "Implausible"))) %>%
  group_by(condition,item,subj) %>%
  mutate(trial.type = ifelse(item %in% c("cat_kittens","book_table","shark_fish",
                                         "wooden_blocks", "flowers_basket",
                                         "house_door","bread_peanutbutter",
                                         "knife_fork"), 
                             "Training", "Test")) %>%
  rowwise() %>%
  mutate(Literal = ifelse(trial.type == "Training", 1 - response, response)) %>% 
  filter(age >= 4, age <= 6) %>%
  mutate(experiment = "original") %>%
  ungroup() %>%
  select(experiment, subj, age, condition,trial.type, Literal) 

all.child.long.data <- bind_rows(child.overwrite.data,original.child.long.data) %>%
  group_by(experiment,condition, trial.type) %>%
  ungroup() %>%
  mutate(trial.type = factor(trial.type, levels = c("Training", "Test"))) %>%
  group_by(experiment,condition,trial.type)

Load Adult data

all.adult.files <- paste0("adult_data/",
                          list.files(path = "adult_data/", pattern = '*.json', 
                                     all.files = FALSE))
jsons <- lapply(all.adult.files,fromJSON)
workers <- sapply(jsons,function(x) x$WorkerId)
adult.data <- bind_rows(sapply(jsons,function(x) x$answers$data,simplify=FALSE))

adult.data$worker <- unlist(lapply(workers, function (x) rep(x,15)))

Munge adult data

#Exclude multiple hits from a single participant
firstHits <- adult.data %>%
  group_by(worker) %>%
  summarise(timeStamp = min(timestamp))

adult.long.data <- inner_join(adult.data,firstHits) %>%
  ungroup() %>%
  rename(condition = order, subj = worker) %>%
  mutate(response = ifelse(response == "Y",TRUE,FALSE),
         condition = ifelse(condition == 0,"Normal","Implausible"),
         trial.type = ifelse(trialnum > 8, "Test", "Training"),
         experiment = "adult", age = NA) %>%
  mutate(condition = factor(condition,levels = c("Normal", "Implausible"))) %>%
  rename(Literal = response) %>%
  select(experiment, subj, age, condition, trial.type, Literal)

Analyze child data

all.long.data <- bind_rows(all.child.long.data,adult.long.data) %>%
  mutate(trial.type = factor(trial.type, levels = c("Training", "Test"))) %>%
  mutate(condition = factor(condition, levels = c("Control", "Normal", 
                                                  "Implausible", "No Noise Normal",
                                                  "No Noise Implausible"))) %>%
  group_by(experiment, condition, trial.type)

ns <- all.long.data %>% 
  ungroup() %>% 
  select(experiment,trial.type,condition,subj) %>% 
  distinct() %>% 
  group_by(experiment,condition, trial.type) %>% 
  summarise(n = n())

kable(filter(ns, trial.type == "Test"))

experiment	condition	trial.type	n
adult	Normal	Test	27
adult	Implausible	Test	23
original	Normal	Test	23
original	Implausible	Test	20
replication	Control	Test	20
replication	Normal	Test	26
replication	Implausible	Test	24
replication	No Noise Normal	Test	7
replication	No Noise Implausible	Test	5

all.results <- multi_boot(all.long.data, column="Literal", 
                          summary_function = "na.mean",
                          statistics_functions = c("ci_lower","ci_upper")) %>%
  left_join(summarise(all.long.data,Literal = na.mean(Literal))) %>%
  left_join(ns) %>%
  ungroup() %>%
  mutate(condition = factor(condition, levels = c("Control", "Normal", 
                                                  "No Noise Normal", "Implausible",
                                                  "No Noise Implausible"),
                             labels = c("Control", "Normal","No Noise\nNormal",
                                                  "Implausible", 
                                        "No Noise\nImplausible")))

Plot

ggplot(all.results, aes(x=condition, y=Literal,fill=trial.type)) +
  geom_bar(stat="identity",position=position_dodge(1))+
  facet_grid(experiment ~ trial.type) +
  geom_linerange(aes(ymin = ci_lower,
                      ymax = ci_upper),
                  size = .8,
                  show_guide = FALSE,
                 position=position_dodge(1)) +
  scale_fill_brewer(palette="Set1") +
  geom_hline(aes(yintercept=.5),lty=2)+
  theme_bw(base_size=14) +
  theme(legend.position="none", panel.grid=element_blank()) +
  scale_x_discrete(name = "Condition")+
  scale_y_continuous(name = "Proportion choosing Literal",
                     limits=c(0,1))

Noisy Kids Replication Analysis

Dan Yurovsky

August 03, 2015