Script to munge ipad data for soc-xsit project
Load libraries for data manipulation
library(plyr)
library(dplyr)
library(directlabels)
library(stringr)
Read in the data, which are stored in separate .txt files for each participant.
# Create empty arrays for binding
all.data <- as.data.frame(matrix(ncol = 0, nrow = 0))
#grab all file names from data dir.
#These are all of the kids for whom we have ipad data
all_results <- list.files(path = "/Users/kmacdonald/Documents/Projects/SOC_XSIT/raw_data/child/",
pattern = 'results_*', all.files = FALSE)
#function to munge the data
#takes in a list of filenames (.txt of each kid's data),
#strips html escape characters, and grabs the relevant info for each trial
#returns data frame with all the data
bing_clean <- function(filename) {
x <- readLines(paste("/Users/kmacdonald/Documents/Projects/SOC_XSIT/raw_data/child/",
filename,sep=""),warn=FALSE)
x <- unlist(strsplit(x,'<li>'))
#grab condition
condition <- x[3]
if (grepl(pattern="Social", condition)) {
condition <- str_extract(pattern="Social", condition)
} else {
condition <- str_extract(pattern="No-social", condition)
}
# grab trial information
x <- x[11:150]
#strip html characters
x <- gsub('\\"',"", x)
x <- gsub("\\\\\"","",x)
x <- gsub("</li>","",x)
x <- gsub("</ul>\\},\\{<ul>","",x)
x[length(x)] <- gsub("</ul>\\}]","",x[length(x)])
x <- gsub("^.*?: ","",x)
#grab date
date <- str_sub(filename,20,36)
date <- gsub('_','',date)
date <- gsub('\\.','',date)
#grab id
id <- str_sub(filename,0,2)
id <- gsub('-','',id)
#bind to data frame
x <- as.data.frame(matrix(x,14,10,byrow=TRUE), stringsAsFactors=F)
x$V11 <- date
x$V12 <- id
x$V13 <- condition
return(x) ## need this return to get the data frame!
}
#apply munging function to each kid's data file
all.data <- ldply(
.data = all_results,
.fun = bing_clean
)
#add variable names to columns
names(all.data) <- c("itemNum","trialType","samePos","chosen","chosen_idx",
"kept","kept_idx","rt","faceVid","faceIdx", "date", "id", "condition")
Tag the different trial types:
* example * exposure * test
Arrange the data file with sensible column order.
#dplyr syntax
all.data <- all.data %>%
group_by(date) %>%
mutate(trial = c(1:14)) %>%
mutate(example_trial = ifelse(trial %in% c(1:2),1,0),
exposure_trial = ifelse(trial %in% c(3,5,7,9,11,13),1,0),
test_trial = ifelse(trial %in% c(4,6,8,10,12,14),1,0)) %>%
arrange(date, trial)
Clean up the data: * figure out if child answered correctly on that trial * relabel variables and change variable types for analysis
#find whether child's choice was correct
#for exposure trials, we check the index of eye gaze against child's choice
#for test trials, we check the kept image against the child's choice
all.data <- all.data %>%
group_by(id) %>%
mutate(correct = ifelse(exposure_trial == 1, chosen_idx == faceIdx,
ifelse(example_trial == 1, chosen[1] == "flower" & chosen[2] == "truck",
chosen == kept)))
#relabel variables and variable types for analysis
all.data$rt <- as.integer(all.data$rt)
all.data$condition <- as.factor(all.data$condition)
Add demographic data for each child (age and gender).
#read in demo csv
demo <- read.csv("/Users/kmacdonald/Documents/Projects/SOC_XSIT/raw_data/child/soc-xsit-ipad-demo.csv")
all.data <- join(all.data,demo)
## Joining by: id
#reorder columns to put id and demo variables first
all.data <- all.data[c("id","birthday","gender","age","age.group","date", "condition",
"trial","itemNum","trialType","samePos","chosen","chosen_idx","kept",
"kept_idx","rt","faceVid","faceIdx","example_trial",
"exposure_trial","test_trial", "correct")]
Fix trial type labels -> not tracked correctly during experiment.
exposure.trials <- all.data %>%
filter(exposure_trial == 1) %>%
mutate(trial_type_redo = ifelse(chosen == kept,1,2)) %>%
select(id, itemNum, trial_type_redo, chosen_exposure = chosen,
kept_exposure = kept, correct_exposure = correct)
test.trials <- all.data %>%
filter(test_trial == 1)
test.trials <- join(test.trials,exposure.trials, by=c("id","itemNum"))
test.trials$trial_type_redo <- factor(test.trials$trial_type_redo,
labels = c('Same','Switch'))
Save tidy data files.
write.csv(all.data, file="/Users/kmacdonald/Documents/Projects/SOC_XSIT/processed_data/child/soc-xsit-ipad-alldata.csv")
write.csv(test.trials, file="/Users/kmacdonald/Documents/Projects/SOC_XSIT/processed_data/child/soc-xsit-ipad-testdata.csv")