Load libraries
library(dplyr)
library(langcog)
library(tidyr)
library(magrittr)
library(lme4)
library(lmerTest)
library(jsonlite)
library(dplyr)
library(tidyr)
library(ggplot2)
library(readr)
library(jsonlite)
na.mean <- function(x){mean(x, na.rm = TRUE)}
Load child data
#######################################################################
############################ LOADING FILES ############################
#######################################################################
# grab all file names from data dir
child.files <- list.files(path = "experiment_nicolette/data/",
pattern = 'results_*', all.files = FALSE)
child.demos <- read_csv('experiment_nicolette/data/noisy_replication_demos.csv') %>%
filter(Data==TRUE)
child.overwrite <- read_csv('experiment_nicolette/data/overwrite.csv')
# function for reading one child's data
read.child <- function(filename) {
#read raw data and break up by html lists
data <- readLines(paste0("experiment_nicolette/data/",filename),warn=FALSE)
data <- unlist(strsplit(data,'<li>'))
data<- data[3:194]
# clean up the data, getting rid of html escape characters, etc.
data <- gsub("\\\\\"","",data)
data <- gsub("</li>","",data)
data <- gsub("</ul>\\},\\{<ul>","",data)
data <- gsub("\"","",data)
data <- gsub("</ul>}]</ul>}","",data)
data <- gsub("{<ul>","",data,fixed=TRUE)
# munge
split.data <- strsplit(data,": ")
split.frame <- data.frame(t(sapply(split.data,unlist)))
names(split.frame) <- c("measure", "value")
split.frame$subj <- filename
split.frame$trial <- sort(rep(1:16,12))
split.frame$order = split.frame$value[2]
return(split.frame)
}
code.literal <- function(value,trial.type,condition) {
literal = rep(0,length(value))
literal[trial.type == "Training" & value == "Y"] <- 1
literal[condition == "Control" & value == "Y"] <- 1
literal[condition != "Control" & trial.type == "Test" & value != "Y"] <- 1
return(literal)
}
child.long.data <- bind_rows(lapply(child.files,read.child)) %>%
filter(measure == "response") %>%
rowwise() %>%
mutate(condition = if(order == "0") "Normal"
else if(order == "1") "Implausible"
else if(order == "2") "Control"
else if(order == "3") "No Noise Normal"
else "No Noise Implausible")
child.demos$subj = unique(child.long.data$subj)
child.overwrite.data <- left_join(child.long.data,child.demos)
for(row in nrow(child.overwrite)) {
child.overwrite.data[child.overwrite.data$subj == child.overwrite[row,]$subj &
child.overwrite.data$trial == child.overwrite[row,]$trial,
]$value = child.overwrite[row,]$response
}
child.overwrite.data %<>%
ungroup() %>%
mutate(condition = factor(condition,
levels = c("Normal", "Implausible",
"Control", "No Noise Normal",
"No Noise Implausible")),
trial.type = factor(ifelse(trial <= 8, "Training", "Test"))) %>%
filter(age >= 4, age <= 6, Exclude == FALSE) %>%
mutate(Literal = code.literal(value,trial.type,condition)) %>%
mutate(experiment = "replication") %>%
select(experiment,subj,age,condition,trial.type,Literal)
Load original child data
original.child.data <- read.csv('sarah_honors_data_updated_5-11.csv')
original.child.long.data <- gather(original.child.data,item,
response,cat_kittens:pen) %>%
rename(condition = cond, subj = subj_id) %>%
mutate(condition = factor(condition,levels=c("normal","implausible"),
labels = c("Normal", "Implausible"))) %>%
group_by(condition,item,subj) %>%
mutate(trial.type = ifelse(item %in% c("cat_kittens","book_table","shark_fish",
"wooden_blocks", "flowers_basket",
"house_door","bread_peanutbutter",
"knife_fork"),
"Training", "Test")) %>%
rowwise() %>%
mutate(Literal = ifelse(trial.type == "Training", 1 - response, response)) %>%
filter(age >= 4, age <= 6) %>%
mutate(experiment = "original") %>%
ungroup() %>%
select(experiment, subj, age, condition,trial.type, Literal)
all.child.long.data <- bind_rows(child.overwrite.data,original.child.long.data) %>%
group_by(experiment,condition, trial.type) %>%
ungroup() %>%
mutate(trial.type = factor(trial.type, levels = c("Training", "Test"))) %>%
group_by(experiment,condition,trial.type)
Load Adult data
all.adult.files <- paste0("adult_data/",
list.files(path = "adult_data/", pattern = '*.json',
all.files = FALSE))
jsons <- lapply(all.adult.files,fromJSON)
workers <- sapply(jsons,function(x) x$WorkerId)
adult.data <- bind_rows(sapply(jsons,function(x) x$answers$data,simplify=FALSE))
adult.data$worker <- unlist(lapply(workers, function (x) rep(x,15)))
Munge adult data
#Exclude multiple hits from a single participant
firstHits <- adult.data %>%
group_by(worker) %>%
summarise(timeStamp = min(timestamp))
adult.long.data <- inner_join(adult.data,firstHits) %>%
ungroup() %>%
rename(condition = order, subj = worker) %>%
mutate(response = ifelse(response == "Y",TRUE,FALSE),
condition = ifelse(condition == 0,"Normal","Implausible"),
trial.type = ifelse(trialnum > 8, "Test", "Training"),
experiment = "adult", age = NA) %>%
mutate(condition = factor(condition,levels = c("Normal", "Implausible"))) %>%
rename(Literal = response) %>%
select(experiment, subj, age, condition, trial.type, Literal)
Analyze child data
all.long.data <- bind_rows(all.child.long.data,adult.long.data) %>%
mutate(trial.type = factor(trial.type, levels = c("Training", "Test"))) %>%
mutate(condition = factor(condition, levels = c("Control", "Normal",
"Implausible", "No Noise Normal",
"No Noise Implausible"))) %>%
group_by(experiment, condition, trial.type)
ns <- all.long.data %>%
ungroup() %>%
select(experiment,trial.type,condition,subj) %>%
distinct() %>%
group_by(experiment,condition, trial.type) %>%
summarise(n = n())
kable(filter(ns, trial.type == "Test"))
| experiment | condition | trial.type | n |
|---|---|---|---|
| adult | Normal | Test | 27 |
| adult | Implausible | Test | 23 |
| original | Normal | Test | 23 |
| original | Implausible | Test | 20 |
| replication | Control | Test | 20 |
| replication | Normal | Test | 26 |
| replication | Implausible | Test | 24 |
| replication | No Noise Normal | Test | 7 |
| replication | No Noise Implausible | Test | 5 |
all.results <- multi_boot(all.long.data, column="Literal",
summary_function = "na.mean",
statistics_functions = c("ci_lower","ci_upper")) %>%
left_join(summarise(all.long.data,Literal = na.mean(Literal))) %>%
left_join(ns) %>%
ungroup() %>%
mutate(condition = factor(condition, levels = c("Control", "Normal",
"No Noise Normal", "Implausible",
"No Noise Implausible"),
labels = c("Control", "Normal","No Noise\nNormal",
"Implausible",
"No Noise\nImplausible")))
Plot
ggplot(all.results, aes(x=condition, y=Literal,fill=trial.type)) +
geom_bar(stat="identity",position=position_dodge(1))+
facet_grid(experiment ~ trial.type) +
geom_linerange(aes(ymin = ci_lower,
ymax = ci_upper),
size = .8,
show_guide = FALSE,
position=position_dodge(1)) +
scale_fill_brewer(palette="Set1") +
geom_hline(aes(yintercept=.5),lty=2)+
theme_bw(base_size=14) +
theme(legend.position="none", panel.grid=element_blank()) +
scale_x_discrete(name = "Condition")+
scale_y_continuous(name = "Proportion choosing Literal",
limits=c(0,1))