Sorterator AEM - Data Analysis

Ben Morris and Molly Lewis

Summer 2014

(1) SCRIPT SETUP

Do preprocessing on data or just read in data?

preprocess.data = FALSE

(2) PREPROCESSING

if (preprocess.data) {
  # loop to read in files
  all.data <- data.frame()
  files <- dir(raw.data.path,pattern="*.txt")

  for (file.name in files) {
    print(file.name)
    d <- read.smi.idf(paste(raw.data.path,file.name,sep=""),header.rows=35)
    d <- preprocess.data(d)
    d$subid <- file.name

    ## now here's where data get bound together
    all.data <- rbind(all.data, d)
    }

  write.csv(all.data,paste(processed.data.path,
                           "all_data.csv",sep=""),
            row.names=FALSE) 
}

d <- read.csv("/Documents/GRADUATE_SCHOOL/Projects/Sorterator/Analysis/processed_data/all_data.csv")

– Change no-data cordinates to “NA” –

d$x[d$y== "1050" & d$x=="0"] <- NA
d$y[d$y== "1050"] <- NA
d$count.na <- is.na(d$x) # make a column that shows whether NA (true) or not (false)

(3) DATAFRAME SETUP

–Add item information–

order <- read.csv("/Documents/GRADUATE_SCHOOL/Projects/Sorterator/Analysis/info/order.csv")
d <- join(d, order) # use join rather than merge because it doesn't sort

## Joining by: stimulus

–Add subject drop information and drop bad subjects–

order2 <- read.csv("/Documents/GRADUATE_SCHOOL/Projects/Sorterator/Analysis/info/order2.csv")
d <- join(d, order2) # use join rather than merge because it doesn't sort

## Joining by: subid

d = d[d$keep_drop == "keep",] # drop bad subjects

– Drop Fillers –

d=d[!is.na(d$shape),]

– Get trial order –

d = get.trial.order(d)
d$trial.set = as.factor(ifelse(d$trial.order <11, "training1", ifelse(d$trial.order<21, "training2", "testing")))

– Drop trials based on number of NAs –

t.reject.threshold = .5

# get proportion nas for each trial
trial.nas <- ddply(d,c("subid", "stimulus"), 
                   function (temp) {length(which(is.na(temp$x)))/length(temp$x)})
names(trial.nas)[3] = "prop.nas"
reject.trials <- trial.nas[trial.nas$prop.nas > t.reject.threshold,]

# add a column in data that combines subid and stim (trial)
d$subtrial <- paste(d$subid,"_",d$stimulus,sep="")
# reject trials
reject.trials$subtrial <- paste(reject.trials$subid,"_",reject.trials$stimulus,sep="")
for(i in reject.trials$subtrial) {
  d <- d[d$subtrial !=i,]
}
print(paste("Dropped", round(dim(reject.trials)[1]/dim(trial.nas)[1],2), "trials", sep = " "))

## [1] "Dropped 0.13 trials"

– Drop subjects based on number of NAs–

s.reject.threshold = .7

subject.nas <- ddply(trial.nas, "subid", function (temp){mean(temp$prop.nas)})

#reject subjects
reject.subjects <- subject.nas[subject.nas$prop.nas > s.reject.threshold,]
for(i in reject.subjects$subid) {
  d <- d[d$subid !=i,]
}
#print(paste("Dropped", length(reject.subjects), "Subjects", sep = " "))

– Define regions of interest (roi) –

rois <- list()
rois[[1]] <- c(0,400,650,650)
rois[[2]] <- c(850,400,650,650)
names(rois) <- c("L","R")
roi.image(rois)

plot of chunk unnamed-chunk-11

d$roi <- roi.check(d,rois)
d$roi_left <- d$roi == "L"
summary(d$roi_left)

##    Mode   FALSE    TRUE    NA's 
## logical   39142   36842  112950

– Define sampling rate –

subsample.hz <- 10
d$t.stim.binned <- round(d$t.stim*subsample.hz)/subsample.hz

(4) PLOT

Subset dataframe

#d = d[d$y > 400,] #restrict to top half?
d.kids<-subset(d,status =="Child")
d.adults<-subset(d,status  =="Adult")

(1) Figure showing the distribution of gazes across all trials.

window<-c(1.8,6)
qplot(x,y,geom="density2d",
      data=d.kids,
      xlim=c(0,1680),ylim=c(0,1050), facets=~subid)  +
  annotate("rect", 0,0,0,650,400,1000, alpha=0.3) +
  annotate("rect", 0,0,850,1500,400,1000, alpha=0.3)

## Warning: Removed 2537 rows containing non-finite values (stat_density2d).
## Warning: Removed 3454 rows containing non-finite values (stat_density2d).
## Warning: Removed 3506 rows containing non-finite values (stat_density2d).
## Warning: Removed 1789 rows containing non-finite values (stat_density2d).
## Warning: Removed 2028 rows containing non-finite values (stat_density2d).
## Warning: Removed 765 rows containing non-finite values (stat_density2d).
## Warning: Removed 3720 rows containing non-finite values (stat_density2d).
## Warning: Removed 3612 rows containing non-finite values (stat_density2d).
## Warning: Removed 3155 rows containing non-finite values (stat_density2d).

plot of chunk unnamed-chunk-15

(2) Histogram of trials by trial number

qplot(d.kids$stimulus) + theme(axis.text.x = element_text(angle = 90, hjust = 1))

plot of chunk unnamed-chunk-16

qplot(d.kids$trial.order) + theme(axis.text.x = element_text(angle = 90, hjust = 1))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-16

(3) Histogram of trials by trial number

qplot(roi_left,facets= .~subid,  data=d.kids, main="Prelimary CDM Data, Side Bias")

plot of chunk unnamed-chunk-17

(4) PLOT OF X-POSITION OF GAZE

ms3 <- aggregate(x ~ t.stim.binned + trial.type + block + trial.set, d.kids, mean)
ms3$trial.type <- factor (ms3$trial.type)
levels(ms3$trial.type) <- c("yellow sphere", "red cross", "yellow sphere", "red cross", "red sphere", "yellow cross")
ms3$block<- factor (ms3$block)
levels(ms3$block) <- c("training","testing")
#ms3$cih <- aggregate(x ~ t.stim.binned + trial.type + block + trial.set, d.kids, ci.high)$x
#ms3$cil <- aggregate(x ~ t.stim.binned + trial.type + block + trial.set,d.kids, ci.low)$x

# 900 x 600 plots
qplot(t.stim.binned,x,colour= trial.type, 
      geom="line", lty=trial.type,size=block,facets= trial.set  ~block, main= "CDM Data",
      data=ms3) + 
  xlab("Time (s)") + ylab("X-Position of Gaze") +
  geom_vline(xintercept = 1.6) +  
  geom_vline(xintercept = 2.8) +
  scale_color_manual(values=c("blue", "red", "red", "blue", "red", "blue"))+
  scale_linetype_manual(values = c("solid", "dashed","solid", "dashed","solid", "dashed"))+
  scale_fill_manual(values=c("yellow sphere"= "blue", "red cross"= "red", "red sphere" = "darkred" , "yellow cross"= "cyan")) +
  scale_size_manual(values=c(3,1))+
  geom_hline(yintercept=840, lty=4)+
  theme_bw(base_size=25)  + 
  theme(text = element_text(size=20),
        axis.text.x = element_text(vjust=.7), 
        axis.ticks = element_line(size = 1), 
        plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(color = 'black', size = 1.1))+ 
  annotate("rect", 0,0,2.8,1.6,-Inf,Inf, alpha=0.3)+
  geom_text(colour="black", cex=3, aes(0.75,1.06, label="Pre-occlusion"), data=ms3)+
  geom_text(colour="black", cex=3, aes(2.20,1.06, label="Occlusion"), data=ms3)+
  geom_text(colour="black", cex=3, aes(3.75,1.06, label="Post-occlusion"), data=ms3)+
  coord_cartesian(ylim=c(400, 1150))

plot of chunk unnamed-chunk-18

  #geom_ribbon(aes(ymin=x-cil, ymax=x+cih, colour=trial.type, fill=trial.type), alpha=.2, colour=NA) +