Sorterator_3 Data Analysis

Summer 2015

(1) SETUP

Do preprocessing on data or just read in data?

preprocess_data = F

(2) PREPROCESSING

a_files = list.files('../data/raw_data/version_a/')
b_files = list.files('../data/raw_data/version_b/')
files = c(a_files, b_files)
exp_version = c()
exp_version[(1:length(a_files))] = "a"
exp_version[(length(a_files)+1):(length(a_files) + length(b_files))] = "b"

if (preprocess_data) {
  
  # loop to read in files
  all.data <- data.frame()
  for (i in 1:length(files)) {
    file.name = files[i]
    print(file.name)
    if (exp_version[i] == "a"){
      d <- read.smi.idf(paste("../data/raw_data/version_a/",file.name,sep=""),
                        header.rows=37)
    } else {
      d <- read.smi.idf(paste("../data/raw_data/version_b/",file.name,sep=""),
                        header.rows=37)
    }
    d <- preprocess.data(d)
    d$subid <- file.name
    d$list  <- exp_version[i]
    
    
    ## now here's where data get bound together
    all.data <- rbind(all.data, d)
  }
  
  write.csv(all.data,"../data/processed_data/all_data.csv",
            row.names=FALSE) 
}

d <- read.csv("../data/processed_data/all_data.csv")

Change no-data coordinates to “NA”

d$y[d$y == "1050"] <- NA 
d$x[d$x == "0"] <- NA
d$count.na <- is.na(d$x) && is.na(d$y) # make a column that shows whether NA (true) or not (false)

Drop trials based on number of NAs

t.reject.threshold = .4

# get proportion nas for each trial
trial.nas <- ddply(d,c("subid", "stimulus"), 
                   function (temp) {"prop.nas" = length(which(is.na(temp$x)))/length(temp$x)})
names(trial.nas)[3] = "prop.nas"
reject.trials <- trial.nas[trial.nas$prop.nas > t.reject.threshold,]

# add a column in data that combines subid and stim (trial)
reject.trials$subtrial <- paste(reject.trials$subid,"_",reject.trials$stimulus,sep="")
d$subtrial <- paste(d$subid,"_",d$stimulus,sep="")

# reject trials
d <- d[!(d$subtrial %in% reject.trials$subtrial),]

print(paste("Dropped", round(dim(reject.trials)[1]/dim(trial.nas)[1],2), "trials", sep = " "))

## [1] "Dropped 0.07 trials"

(3) DATAFRAME SETUP

add stimulus information based on time

stim_info <- read.csv("stim_info.csv")
d["stimulus"] <- NA

# get trial intervals
d$trial =  cut(d$t, c(stim_info$time, 300),
               include.lowest = T, labels = 1:40)
d$trial = as.numeric(as.character(d$trial))
d = d %>% left_join(stim_info, by="trial")

d$t.trial = d$t - d$time

# get subsampled t
subsample.hz <- 10 
d$t.trial.binned <- round(d$t.trial*subsample.hz)/subsample.hz

# reorder block variable
d$block= factor(d$block,levels(d$block)[c(2,1)])

Define times and regions of interest (roi)

# times
time_enter = 2.28
time_emerge = 3.08
time_notice = time_emerge + .2

# regions
height = 1080
width = 1920
mid_point = width/2

rois <- list()
rois[[1]] <- c(mid_point-500,550,440,450) #x, y, +x, +y
rois[[2]] <- c(mid_point+60,550,440,450)
names(rois) <- c("L","R")
#roi.image(rois)
d$roi <- roi.check(d,rois)

(4) PLOTS

Distribution of gazes across all trials by subjects and lists

qplot(x,y,geom="density2d",
      data=d,
      xlim=c(0,1920),ylim=c(0,1080))  +
  annotate("rect", 0, 0, mid_point-500,  #0, 0, x1, x2, y1, y2
           mid_point-60, 550, 1000, alpha=0.3) +
  annotate("rect", 0, 0, mid_point+500,
           mid_point+60,550,1000, alpha=0.3) +
  geom_vline(xintercept = mid_point)

Plots by lists

raw x-position of gaze across all time

ms <- d %>% 
  group_by(t, trial.type, list, trial.type) %>% 
  summarise(x = mean(x, na.rm = T))

qplot(t,x, 
      geom="point", color = trial.type, facets= list~.,
      data=ms) + 
  geom_hline(yintercept=mid_point, lty=4) +
  geom_vline(xintercept = c(0,stim_info$time)) +
  xlab("Time (s)") + 
  ylab("X-Position of Gaze") + 
  themeML

x-position across trials

ms <- d %>% 
  group_by(t.trial.binned, trial.type,list) %>% 
  summarise(x = mean(x, na.rm = T))

qplot(t.trial.binned,x,colour= trial.type, 
      geom="line", lty=trial.type, facets= list~.,
      data=ms) + 
  geom_hline(yintercept = mid_point, lty=4) +
  annotate("rect", 0, 0, time_notice, time_enter, -Inf, Inf, alpha=0.3)+
  ylim(0,width) +
  xlab("Time (s)") + 
  ylab("X-Position of Gaze") + 
  themeML

x-position by proportion across trials

props <- d %>%
  filter(!is.na(roi))%>%
  group_by(t.trial.binned, trial.type, block, list) %>% 
  summarise(p.right = length(which(roi == "R"))/ 
              (length(which(roi == "L")) + length(which(roi == "R"))))


qplot(t.trial.binned,p.right,colour= trial.type, 
      geom="line", lty=trial.type, facets= list + block~.,
      data=props) + 
  annotate("rect", 0, 0, time_notice, time_enter, -Inf, Inf, alpha=0.3)+
  ylim(c(0, 1)) +
  xlab("Time (s)") + 
  ylab("Proportion looking to right") + 
  themeML

Collapsed across two lists

# flip coordinates for b list so can collapse
d$xc <- ifelse(d$list=="b", abs(d$x-width), d$x)
d$roic <- as.factor(ifelse(d$list=="b", 
                           ifelse(d$roi == "L", "R", "L"), 
                           as.character(d$roi)))

x-position across trials

ms <- d %>% 
  group_by(t.trial.binned, trial.type) %>% 
  summarise(cih = ci.high(xc),
            cil = ci.low(xc),
            x = mean(xc, na.rm = T))

qplot(t.trial.binned,x,colour= trial.type, 
      geom="line", lty=trial.type, 
      data=ms) + 
  geom_ribbon(aes(ymin=x-cil, ymax=x+cih, fill=trial.type), 
              alpha=.2, colour=NA) +
  geom_hline(yintercept=mid_point, lty=4) +
  annotate("rect", 0, 0, time_notice, time_enter, -Inf, Inf, alpha=0.3)+
  ylim(c(0, width)) +
  xlab("Time (s)") + 
  ylab("X-Position of Gaze") + 
  themeML

x-position by proportion across trials

props <- d %>%
  filter(!is.na(roic))%>%
  group_by(t.trial.binned, trial.type, block) %>% 
  summarise(p.right = length(which(roic == "R"))/ 
              (length(which(roic == "L")) + length(which(roic == "R"))))

qplot(t.trial.binned,p.right,colour= trial.type, 
      geom="line", lty=trial.type, facets= block~.,
      data=props) + 
  annotate("rect", 0, 0, time_notice, time_enter, -Inf, Inf, alpha=0.3)+
  ylim(c(0, 1)) +
  xlab("Time (s)") + 
  ylab("Proportion looking to right") + 
  themeML

occlusion plot by proportion

occlusion.props <- d %>%
  filter(t.trial.binned > time_enter & t.trial.binned < time_notice) %>%
  filter(!is.na(roic))%>%
  group_by(trial.type, block) %>%
  summarise(p.right = length(which(roic == "R"))/ 
              (length(which(roic == "L")) + length(which(roic == "R"))),
            ciwl = boot.ci(boot(roic, function(u,i)
              table(u[i])["R"]/length(u), R = 1000) , type =  "basic")$basic[4],
            ciul = boot.ci(boot(roic, function(u,i)
              table(u[i])["R"]/length(u), R = 1000) , type =  "basic")$basic[5])

ggplot(occlusion.props, aes(y=p.right, x=trial.type, fill = trial.type)) +
  geom_bar(position="dodge", stat="identity") +  
  geom_linerange(aes(ymin=ciwl,ymax=ciul), position=position_dodge(.9)) +
  facet_grid(~ block ) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  themeML

by subject plots

mss <- d %>%
  group_by(t.trial.binned, trial.type, subid, block) %>%
  summarise(x = na.mean(xc),
            n = n()) 

qplot(t.trial.binned,x,colour= trial.type, 
      geom="line", lty=trial.type, facets= subid ~ block,
      data=mss) + 
  geom_hline(yintercept= mid_point, lty=4)+
  annotate("rect", 0,0, time_notice, time_enter,-Inf,Inf, alpha=0.3)+
  ylim(c(500, 1500)) +
  scale_fill_manual(values=c("yellow_sphere"= "blue", 
                             "red_cross"= "red", 
                             "red_sphere" = "darkred", 
                             "yellow_cross"= "cyan")) +
  scale_color_manual(values=c("red", "red", "blue", 
                              "blue", "red", "blue"))+
  scale_linetype_manual(values = c("solid", "dashed", "solid", 
                                   "dashed","solid", "dashed")) + 
  xlab("Time (s)") + 
  ylab("X-Position of Gaze")