Do preprocessing on data or just read in data?
preprocess.data = FALSE
setwd("/Documents/GRADUATE_SCHOOL/Projects/sorterator/sorterator_2/processed_data/")
if (preprocess.data) {
# loop to read in files
all.data <- data.frame()
files <- dir(raw.data.path_2pt,pattern="*.txt")
for (file.name in files) {
print(file.name)
d <- read.smi.idf(paste(raw.data.path_2pt,file.name,sep=""),header.rows=35)
d <- preprocess.data(d)
d$subid <- file.name
## now here's where data get bound together
all.data <- rbind(all.data, d)
}
files <- dir(raw.data.path_5pt,pattern="*.txt")
for (file.name in files) {
print(file.name)
d <- read.smi.idf(paste(raw.data.path_5pt,file.name,sep=""),header.rows=38)
d <- preprocess.data(d)
d$subid <- file.name
## now here's where data get bound together
all.data <- rbind(all.data, d)
}
write.csv(all.data,paste(processed.data.path,
"all_data.csv",sep=""),
row.names=FALSE)
}
#d <- read.csv("processed_data/all_data.csv")
d <- read.csv("all_data.csv")
d$y[d$y == "1050"] <- NA
d$x[d$x == "0"] <- NA
d$count.na <- is.na(d$x) | is.na(d$y) # make a column that shows whether NA (true) or not (false)
setwd("/Documents/GRADUATE_SCHOOL/Projects/sorterator/sorterator_2/analysis/")
#order <- read.csv("analysis/stim_order.csv")
order <- read.csv("stim_order.csv")
d <- left_join(d, order)
d$subid = as.factor(sapply(as.character(d$subid),
function(a) strsplit(a,"-")[[1]][1]))
demo <- read.csv("demographics.csv")
d <- left_join(d, demo)
m = d[d$keep_drop == "keep",] # drop bad subjects
d = d[!is.na(d$shape),]
d = get.trial.order(d)
d$trial.set = as.factor(ifelse(grepl("training",d$stimulus), "training", "testing"))
d$block <- factor (d$block, levels = c("training","testing"))
rois <- list()
rois[[1]] <- c(0,400,650,650)
rois[[2]] <- c(850,400,650,650)
names(rois) <- c("L","R")
#roi.image(rois)
d$roi <- roi.check(d,rois)
d$roi_left <- d$roi == "L"
summary(d$roi_left)
## Mode FALSE TRUE NA's
## logical 72408 63497 212537
# in one of the test videos (YC) the object emerges earlier than the others - fix this
d$t.stim = ifelse(d$trial.type == "yellow_cross", d$t.stim + .33, d$t.stim)
subsample.hz <- 10
d$t.stim.binned <- round(d$t.stim*subsample.hz)/subsample.hz
allS <- d # save dataframe before exclusions
t.reject.threshold = .5
# get proportion nas for each trial
trial.nas <- ddply(d,c("subid", "stimulus"),
function (temp) {"prop.nas" = length(which(is.na(temp$x)))/length(temp$x)})
names(trial.nas)[3] = "prop.nas"
reject.trials <- trial.nas[trial.nas$prop.nas > t.reject.threshold,]
# add a column in data that combines subid and stim (trial)
reject.trials$subtrial <- paste(reject.trials$subid,"_",reject.trials$stimulus,sep="")
d$subtrial <- paste(d$subid,"_",d$stimulus,sep="")
# reject trials
d <- d[!(d$subtrial %in% reject.trials$subtrial),]
print(paste("Dropped", round(dim(reject.trials)[1]/dim(trial.nas)[1],2), "trials", sep = " "))
## [1] "Dropped 0.23 trials"
s.reject.threshold = .7
# proportion nas for each subject
subject.nas <- ddply(trial.nas, "subid", function (temp){mean(temp$prop.nas)})
# reject subjects
reject.subjects <- subject.nas[subject.nas$prop.nas > s.reject.threshold,]
d <- d[!(d$subid %in% reject.subjects$subid),]
# print rejects
print(paste("Dropped",
round(dim(reject.subjects)[1]/
length(which(summary(d$subid)>0)),2),
"subjects", sep = " "))
## [1] "Dropped 0 subjects"
n.trials.threshold = 10
total_n = length(unique(d$subid))
# reject subjects
d = d %>%
group_by(subid) %>%
summarise(n.trials = length(unique(stimulus))) %>% # get number of trials/subj
left_join(d) %>% # merge num trials back into giant data frame
filter(n.trials > n.trials.threshold) # then exclude subjects based on criteria
# print rejects
print(paste("Dropped", total_n -length(unique(d$subid)) ,
"subjects", sep = " "))
## [1] "Dropped 5 subjects"
# print rejects
print(paste("TOTAL:", length(unique(d$subid)) ,
"subjects", sep = " "))
## [1] "TOTAL: 17 subjects"
d$age.binned <- cut(d$age_group,
seq(floor(min(d$age_group)),ceiling(max(d$age_group)),.5),
include.lowest = TRUE)
sub.by.age = d %>%
group_by(age.binned) %>%
summarise(num = length(unique(subid))) %>%
separate(age.binned, c("min", "max"), sep = ",") %>%
mutate(min = gsub("[(]","",min))
qplot(x=min,y=num, data=sub.by.age, stat="identity",
geom="bar",
xlab="Age Group",
ylab="Number of Participants")+
scale_y_continuous(expand=c(0,0)) +
themeML
includes.by.order <- d %>%
group_by(subid, trial.order) %>%
summarise(x = na.mean(x)) %>%
filter(!is.na(x)) %>%
group_by(trial.order) %>%
summarise(n = n())
ggplot(includes.by.order, aes(x = trial.order, y = n)) +
geom_bar(stat="identity") +
ylim(0,15) +
ylab("number of participants")
includes.by.stimulus <- d %>%
group_by(subid, stimulus) %>%
summarise(x = na.mean(x)) %>%
filter(!is.na(x)) %>%
group_by(stimulus) %>%
summarise(n = n())
ggplot(includes.by.stimulus, aes(x = stimulus, y = n))+
geom_bar(stat="identity") +
ylim(0,15) +
ylab("number of participants") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
includes.by.each.subj = d %>%
group_by(subid, trial.order, age_group) %>%
summarise(x = na.mean(x)) %>%
filter(!is.na(x)) %>%
group_by(subid, age_group) %>%
summarise(n = n()) %>%
ungroup() %>%
arrange(n)
mean_trials = mean(includes.by.each.subj$n)
includes.by.each.subj$subid <- reorder(includes.by.each.subj$subid,
includes.by.each.subj$n)
ggplot(includes.by.each.subj, aes(y=n, x=subid, fill = age_group, order = n)) +
geom_bar(position="dodge", stat="identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_continuous(low="beige", high="red") +
geom_hline(yintercept=mean_trials) +
themeML
#creates new data frame with only one unique observation for each subject
#newd <- d
#newd <- newd[!duplicated(newd[,"subid",]),]
#qplot(x=subid,y=n.trials, data = newd , stat="identity",
# geom="bar",
# xlab="subject id",
# ylab="Number of Trials")+
#scale_y_continuous(expand=c(0,0)) +
# themeML
includes.by.subj <- d %>%
group_by(block, subid, trial.order, age.binned) %>%
summarise(x = na.mean(x)) %>%
filter(!is.na(x)) %>%
group_by(block, subid, age.binned) %>%
summarise(n = n())
#aged <- d
#aged <- aged[!duplicated(aged[,"subid",]),]
#aged <- aged[,c("subid","n.trials","age.binned")]
trials.by.age = d %>%
group_by(age.binned) %>%
summarise(avg.trials = mean(n.trials))
#histogram of average number of trials completed by age group
qplot(x=age.binned,y=avg.trials, data = trials.by.age , stat="identity",
geom="bar",
xlab="age",
ylab="Number of Trials")+
scale_y_continuous(expand=c(0,0)) +
themeML
ggplot(includes.by.subj, aes(y=n, x=block) ) +
geom_bar(position="dodge", stat="identity", fill = "red") +
facet_grid( ~ age.binned) +
themeML +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
includes.by.subj.block = includes.by.subj %>%
group_by(block) %>%
summarise(cih = ci.high(n),
cil = ci.low(n),
n = na.mean(n))
ggplot(includes.by.subj.block, aes(y=n, x=block), ) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = n - cil, ymax= n + cih), width=0.2, position="dodge") +
themeML
window<-c(1.8,6)
qplot(x,y,geom="density2d",
data=d,
xlim=c(0,1680),ylim=c(0,1050), facets=~subid) +
annotate("rect", 0,0,0,650,400,1000, alpha=0.3) +
annotate("rect", 0,0,850,1500,400,1000, alpha=0.3)
time_enter = 2.28
time_emerge = 3.08
ms <- d %>%
group_by(t.stim.binned, trial.type, block) %>%
summarise(cih = ci.high(x),
cil = ci.low(x),
x = mean(x, na.rm = T))
qplot(t.stim.binned,x,colour= trial.type,
geom="line", lty=trial.type, facets= block~.,
data=ms) +
geom_ribbon(aes(ymin=x-cil, ymax=x+cih, fill=trial.type),
alpha=.2, colour=NA) +
geom_hline(yintercept=740, lty=4)+
annotate("rect", 0, 0, time_emerge, time_enter, -Inf, Inf, alpha=0.3)+
ylim(c(300, 1150)) +
scale_fill_manual(values=c("yellow_sphere"= "blue",
"red_cross"= "red",
"red_sphere" = "darkred",
"yellow_cross"= "cyan")) +
scale_color_manual(values=c("red", "red", "blue",
"blue", "red", "blue")) +
scale_linetype_manual(values = c("solid", "dashed", "solid",
"dashed","solid", "dashed")) +
xlab("Time (s)") +
ylab("X-Position of Gaze") +
themeML
training.by.block <- d %>%
mutate(first.half = trial.order < 10) %>%
filter(block == "training") %>%
filter(!is.na(trial.order)) %>%
group_by(t.stim.binned, trial.type, first.half) %>%
summarise(x = na.mean(x),
n = n())
training.by.block$first.half <- factor(training.by.block$first.half,
labels = c("second", "first"))
qplot(t.stim.binned,x,colour= trial.type,
geom="line", lty=trial.type, facets= first.half~.,
data=training.by.block) +
geom_hline(yintercept=740, lty=4)+
annotate("rect", 0, 0, time_emerge, time_enter, -Inf, Inf, alpha=0.3)+
ylim(c(300, 1150)) +
scale_fill_manual(values=c("yellow_sphere"= "blue",
"red_cross"= "red",
"red_sphere" = "darkred",
"yellow_cross"= "cyan")) +
scale_color_manual(values=c("red", "red", "blue",
"blue", "red", "blue")) +
scale_linetype_manual(values = c("solid", "dashed", "solid",
"dashed","solid", "dashed")) +
xlab("Time (s)") +
ylab("X-Position of Gaze") +
themeML
ms4 <- d %>%
mutate(first.half = trial.order < 10) %>%
filter(block == "training") %>%
group_by(t.stim.binned, trial.type, first.half, subid) %>%
summarise(x = na.mean(x),
n = n())
qplot(t.stim.binned,x,colour= trial.type,
geom="line", lty=trial.type, facets= subid ~ first.half,
data=ms4) +
geom_hline(yintercept=740, lty=4)+
annotate("rect", 0,0, time_emerge, time_enter,-Inf,Inf, alpha=0.3)+
ylim(c(300, 1150)) +
scale_fill_manual(values=c("yellow_sphere"= "blue",
"red_cross"= "red",
"red_sphere" = "darkred",
"yellow_cross"= "cyan")) +
scale_color_manual(values=c("red", "red", "blue",
"blue", "red", "blue"))+
scale_linetype_manual(values = c("solid", "dashed", "solid",
"dashed","solid", "dashed")) +
xlab("Time (s)") +
ylab("X-Position of Gaze")
allS$age.binned <- cut(allS$age_group,
seq(floor(min(allS$age_group)),ceiling(max(allS$age_group)),.5),
include.lowest = TRUE)
sub.by.age = allS %>%
group_by(age.binned) %>%
summarise(num = length(unique(subid))) %>%
separate(age.binned, c("min", "max"), sep = ",") %>%
mutate(min = gsub("[(]","",min))
qplot(x=min,y=num, data=sub.by.age, stat="identity",
geom="bar",
xlab="Age Group",
ylab="Number of Participants")+
scale_y_continuous(expand=c(0,0)) +
themeML
includes.by.order <- allS %>%
group_by(subid, trial.order) %>%
summarise(x = na.mean(x)) %>%
filter(!is.na(x)) %>%
group_by(trial.order) %>%
summarise(n = n())
ggplot(includes.by.order, aes(x = trial.order, y = n)) +
geom_bar(stat="identity") +
ylim(0,25) +
ylab("number of participants")
includes.by.stimulus <- allS %>%
group_by(subid, stimulus) %>%
summarise(x = na.mean(x)) %>%
filter(!is.na(x)) %>%
group_by(stimulus) %>%
summarise(n = n())
ggplot(includes.by.stimulus, aes(x = stimulus, y = n))+
geom_bar(stat="identity") +
ylim(0,25) +
ylab("number of participants") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
includes.by.each.subj = allS %>%
group_by(subid, trial.order, age_group) %>%
summarise(x = na.mean(x)) %>%
filter(!is.na(x)) %>%
group_by(subid, age_group) %>%
summarise(n = n()) %>%
ungroup() %>%
arrange(n)
mean_trials = mean(includes.by.each.subj$n)
includes.by.each.subj$subid <- reorder(includes.by.each.subj$subid,
includes.by.each.subj$n)
ggplot(includes.by.each.subj, aes(y=n, x=subid, fill = age_group, order = n)) +
geom_bar(position="dodge", stat="identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_continuous(low="beige", high="red") +
geom_hline(yintercept=mean_trials) +
themeML
#creates new data frame with only one unique observation for each subject
#newd <- d
#newd <- newd[!duplicated(newd[,"subid",]),]
#qplot(x=subid,y=n.trials, data = newd , stat="identity",
# geom="bar",
# xlab="subject id",
# ylab="Number of Trials")+
#scale_y_continuous(expand=c(0,0)) +
# themeML
includes.by.subj <- allS %>%
group_by(block, subid, trial.order, age.binned) %>%
summarise(x = na.mean(x)) %>%
filter(!is.na(x)) %>%
group_by(block, subid, age.binned) %>%
summarise(n = n())
#aged <- allS
#aged <- aged[!duplicated(aged[,"subid",]),]
#aged <- aged[,c("subid","n.trials","age.binned")]
#trials.by.age = aged %>%
# group_by(age.binned) %>%
# summarise(avg.trials = mean(n.trials))
#histogram of average number of trials completed by age group
#qplot(x=age.binned,y=avg.trials, data = trials.by.age , stat="identity",
# geom="bar",
# xlab="age",
# ylab="Number of Trials")+
## themeML
ggplot(includes.by.subj, aes(y=n, x=block) ) +
geom_bar(position="dodge", stat="identity", fill = "red") +
facet_grid( ~ age.binned) +
themeML +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
includes.by.subj.block = includes.by.subj %>%
group_by(block) %>%
summarise(cih = ci.high(n),
cil = ci.low(n),
n = na.mean(n))
ggplot(includes.by.subj.block, aes(y=n, x=block), ) +
geom_bar(position="dodge", stat="identity", fill = "red") +
geom_errorbar(aes(ymin = n - cil, ymax= n + cih), width=0.2, position="dodge") +
themeML
window<-c(1.8,6)
qplot(x,y,geom="density2d",
data=allS,
xlim=c(0,1680),ylim=c(0,1050), facets=~subid) +
annotate("rect", 0,0,0,650,400,1000, alpha=0.3) +
annotate("rect", 0,0,850,1500,400,1000, alpha=0.3)
time_enter = 2.28
time_emerge = 3.08
ms <- allS %>%
group_by(t.stim.binned, trial.type, block) %>%
summarise(cih = ci.high(x),
cil = ci.low(x),
x = mean(x, na.rm = T))
qplot(t.stim.binned,x,colour= trial.type,
geom="line", lty=trial.type, facets= block~.,
data=ms) +
geom_ribbon(aes(ymin=x-cil, ymax=x+cih, fill=trial.type),
alpha=.2, colour=NA) +
geom_hline(yintercept=740, lty=4)+
annotate("rect", 0, 0, time_emerge, time_enter, -Inf, Inf, alpha=0.3)+
ylim(c(300, 1150)) +
scale_fill_manual(values=c("yellow_sphere"= "blue",
"red_cross"= "red",
"red_sphere" = "darkred",
"yellow_cross"= "cyan")) +
scale_color_manual(values=c("red", "red", "blue",
"blue", "red", "blue")) +
scale_linetype_manual(values = c("solid", "dashed", "solid",
"dashed","solid", "dashed")) +
xlab("Time (s)") +
ylab("X-Position of Gaze") +
themeML
training.by.block <- allS %>%
mutate(first.half = trial.order < 10) %>%
filter(block == "training") %>%
filter(!is.na(trial.order)) %>%
group_by(t.stim.binned, trial.type, first.half) %>%
summarise(x = na.mean(x),
n = n())
training.by.block$first.half <- factor(training.by.block$first.half,
labels = c("second", "first"))
qplot(t.stim.binned,x,colour= trial.type,
geom="line", lty=trial.type, facets= first.half~.,
data=training.by.block) +
geom_hline(yintercept=740, lty=4)+
annotate("rect", 0, 0, time_emerge, time_enter, -Inf, Inf, alpha=0.3)+
ylim(c(300, 1150)) +
scale_fill_manual(values=c("yellow_sphere"= "blue",
"red_cross"= "red",
"red_sphere" = "darkred",
"yellow_cross"= "cyan")) +
scale_color_manual(values=c("red", "red", "blue",
"blue", "red", "blue")) +
scale_linetype_manual(values = c("solid", "dashed", "solid",
"dashed","solid", "dashed")) +
xlab("Time (s)") +
ylab("X-Position of Gaze") +
themeML
ms4 <- allS %>%
mutate(first.half = trial.order < 10) %>%
filter(block == "training") %>%
group_by(t.stim.binned, trial.type, first.half, subid) %>%
summarise(x = na.mean(x),
n = n())
qplot(t.stim.binned,x,colour= trial.type,
geom="line", lty=trial.type, facets= subid ~ first.half,
data=ms4) +
geom_hline(yintercept=740, lty=4)+
annotate("rect", 0,0, time_emerge, time_enter,-Inf,Inf, alpha=0.3)+
ylim(c(300, 1150)) +
scale_fill_manual(values=c("yellow_sphere"= "blue",
"red_cross"= "red",
"red_sphere" = "darkred",
"yellow_cross"= "cyan")) +
scale_color_manual(values=c("red", "red", "blue",
"blue", "red", "blue"))+
scale_linetype_manual(values = c("solid", "dashed", "solid",
"dashed","solid", "dashed")) +
xlab("Time (s)") +
ylab("X-Position of Gaze")