Do preprocessing on data or just read in data?
preprocess.data = FALSE
if (preprocess.data) {
# loop to read in files
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.txt")
for (file.name in files) {
print(file.name)
d <- read.smi.idf(paste(raw.data.path,file.name,sep=""),header.rows=35)
d <- preprocess.data(d)
d$subid <- file.name
## now here's where data get bound together
all.data <- rbind(all.data, d)
}
write.csv(all.data,paste(processed.data.path,
"all_data.csv",sep=""),
row.names=FALSE)
}
d <- read.csv("/Documents/GRADUATE_SCHOOL/Projects/Sorterator/Analysis/processed_data/all_data.csv")
d$x[d$y== "1050" & d$x=="0"] <- NA
d$y[d$y== "1050"] <- NA
d$count.na <- is.na(d$x) # make a column that shows whether NA (true) or not (false)
order <- read.csv("/Documents/GRADUATE_SCHOOL/Projects/Sorterator/Analysis/info/order.csv")
d <- join(d, order) # use join rather than merge because it doesn't sort
## Joining by: stimulus
order2 <- read.csv("/Documents/GRADUATE_SCHOOL/Projects/Sorterator/Analysis/info/order2.csv")
d <- join(d, order2) # use join rather than merge because it doesn't sort
## Joining by: subid
d = d[d$keep_drop == "keep",] # drop bad subjects
d=d[!is.na(d$shape),]
d = get.trial.order(d)
d$trial.set = as.factor(ifelse(d$trial.order <11, "training1", ifelse(d$trial.order<21, "training2", "testing")))
t.reject.threshold = .5
# get proportion nas for each trial
trial.nas <- ddply(d,c("subid", "stimulus"),
function (temp) {length(which(is.na(temp$x)))/length(temp$x)})
names(trial.nas)[3] = "prop.nas"
reject.trials <- trial.nas[trial.nas$prop.nas > t.reject.threshold,]
# add a column in data that combines subid and stim (trial)
d$subtrial <- paste(d$subid,"_",d$stimulus,sep="")
# reject trials
reject.trials$subtrial <- paste(reject.trials$subid,"_",reject.trials$stimulus,sep="")
for(i in reject.trials$subtrial) {
d <- d[d$subtrial !=i,]
}
print(paste("Dropped", round(dim(reject.trials)[1]/dim(trial.nas)[1],2), "trials", sep = " "))
## [1] "Dropped 0.13 trials"
s.reject.threshold = .7
subject.nas <- ddply(trial.nas, "subid", function (temp){mean(temp$prop.nas)})
#reject subjects
reject.subjects <- subject.nas[subject.nas$prop.nas > s.reject.threshold,]
for(i in reject.subjects$subid) {
d <- d[d$subid !=i,]
}
#print(paste("Dropped", length(reject.subjects), "Subjects", sep = " "))
rois <- list()
rois[[1]] <- c(0,400,650,650)
rois[[2]] <- c(850,400,650,650)
names(rois) <- c("L","R")
roi.image(rois)
d$roi <- roi.check(d,rois)
d$roi_left <- d$roi == "L"
summary(d$roi_left)
## Mode FALSE TRUE NA's
## logical 39142 36842 112950
subsample.hz <- 10
d$t.stim.binned <- round(d$t.stim*subsample.hz)/subsample.hz
Subset dataframe
#d = d[d$y > 400,] #restrict to top half?
d.kids<-subset(d,status =="Child")
d.adults<-subset(d,status =="Adult")
window<-c(1.8,6)
qplot(x,y,geom="density2d",
data=d.kids,
xlim=c(0,1680),ylim=c(0,1050), facets=~subid) +
annotate("rect", 0,0,0,650,400,1000, alpha=0.3) +
annotate("rect", 0,0,850,1500,400,1000, alpha=0.3)
## Warning: Removed 2537 rows containing non-finite values (stat_density2d).
## Warning: Removed 3454 rows containing non-finite values (stat_density2d).
## Warning: Removed 3506 rows containing non-finite values (stat_density2d).
## Warning: Removed 1789 rows containing non-finite values (stat_density2d).
## Warning: Removed 2028 rows containing non-finite values (stat_density2d).
## Warning: Removed 765 rows containing non-finite values (stat_density2d).
## Warning: Removed 3720 rows containing non-finite values (stat_density2d).
## Warning: Removed 3612 rows containing non-finite values (stat_density2d).
## Warning: Removed 3155 rows containing non-finite values (stat_density2d).
qplot(d.kids$stimulus) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
qplot(d.kids$trial.order) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
qplot(roi_left,facets= .~subid, data=d.kids, main="Prelimary CDM Data, Side Bias")
ms3 <- aggregate(x ~ t.stim.binned + trial.type + block + trial.set, d.kids, mean)
ms3$trial.type <- factor (ms3$trial.type)
levels(ms3$trial.type) <- c("yellow sphere", "red cross", "yellow sphere", "red cross", "red sphere", "yellow cross")
ms3$block<- factor (ms3$block)
levels(ms3$block) <- c("training","testing")
#ms3$cih <- aggregate(x ~ t.stim.binned + trial.type + block + trial.set, d.kids, ci.high)$x
#ms3$cil <- aggregate(x ~ t.stim.binned + trial.type + block + trial.set,d.kids, ci.low)$x
# 900 x 600 plots
qplot(t.stim.binned,x,colour= trial.type,
geom="line", lty=trial.type,size=block,facets= trial.set ~block, main= "CDM Data",
data=ms3) +
xlab("Time (s)") + ylab("X-Position of Gaze") +
geom_vline(xintercept = 1.6) +
geom_vline(xintercept = 2.8) +
scale_color_manual(values=c("blue", "red", "red", "blue", "red", "blue"))+
scale_linetype_manual(values = c("solid", "dashed","solid", "dashed","solid", "dashed"))+
scale_fill_manual(values=c("yellow sphere"= "blue", "red cross"= "red", "red sphere" = "darkred" , "yellow cross"= "cyan")) +
scale_size_manual(values=c(3,1))+
geom_hline(yintercept=840, lty=4)+
theme_bw(base_size=25) +
theme(text = element_text(size=20),
axis.text.x = element_text(vjust=.7),
axis.ticks = element_line(size = 1),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line = element_line(color = 'black', size = 1.1))+
annotate("rect", 0,0,2.8,1.6,-Inf,Inf, alpha=0.3)+
geom_text(colour="black", cex=3, aes(0.75,1.06, label="Pre-occlusion"), data=ms3)+
geom_text(colour="black", cex=3, aes(2.20,1.06, label="Occlusion"), data=ms3)+
geom_text(colour="black", cex=3, aes(3.75,1.06, label="Post-occlusion"), data=ms3)+
coord_cartesian(ylim=c(400, 1150))
#geom_ribbon(aes(ymin=x-cil, ymax=x+cih, colour=trial.type, fill=trial.type), alpha=.2, colour=NA) +