Read in data

rm(list=ls())

setwd('/Users/Allison/Documents/Jobs')
NYUdata <- read.csv("/Users/Allison/Documents/Jobs/R_Assignment_data.csv", header=TRUE, nrows=40000)

Load packages and set columns

library(ggplot2)
library(plyr)
library(knitr)

attach(NYUdata) 
NYUdata$subjectNum<-NYUdata$subject #set column name individually because masked by global envmt
NYUdata$looking_time_s <- looking_time/1000 #convert from ms to s

Two-way within subjects ANOVA

looking_anova <- aov(looking_time_s ~ outcome * vocalization + Error(subjectNum/(outcome)), data=NYUdata)
summary(looking_anova)
## 
## Error: subjectNum
##              Df Sum Sq Mean Sq F value Pr(>F)
## vocalization  1   55.7   55.74   1.225  0.277
## Residuals    30 1364.7   45.49               
## 
## Error: subjectNum:outcome
##                      Df Sum Sq Mean Sq F value Pr(>F)  
## outcome               1   19.0   18.98   1.794 0.1905  
## outcome:vocalization  1   44.8   44.79   4.234 0.0484 *
## Residuals            30  317.4   10.58                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Function for graphs

summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {

    # New version of length which can handle NA's: if na.rm==T, don't count them
    length2 <- function (x, na.rm=FALSE) {
        if (na.rm) sum(!is.na(x))
        else       length(x)
    }

    # For each group's data frame, return a vector with N, mean, and sd
    datac <- ddply(data, groupvars, .drop=.drop,
      .fun = function(xx, col) {
        c(N    = length2(xx[[col]], na.rm=na.rm),
          mean = mean   (xx[[col]], na.rm=na.rm),
          sd   = sd     (xx[[col]], na.rm=na.rm)
        )
      },
      measurevar
    )

    # Rename the mean column    
    datac <- rename(datac, c("mean" = measurevar))

    datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

    # Confidence interval multiplier for standard error
    # Calculate t-statistic for confidence interval: 
    # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
    ciMult <- qt(conf.interval/2 + .5, datac$N-1)
    datac$ci <- datac$se * ciMult

    return(datac)
}

Bargraph of mean looking time

looking <- summarySE(NYUdata, measurevar='looking_time_s', groupvars=c('vocalization','outcome'))

ggplot(looking, aes(y=looking_time_s, x = vocalization, fill=outcome)) +
  xlab(label = "Condition") +
  ylab(label = "Mean looking time (s)") +
  geom_bar(position="dodge", stat="identity") +
  scale_fill_manual(values=c("#99CCFF","#003399"),
                    name="Outcome",labels=c("Non-target","Target")) +
  scale_x_discrete(labels=c("Cough","Speech")) +
  geom_errorbar(aes(ymin=looking_time_s-se, ymax=looking_time_s+se),
                width=.2,                    # Width of the error bars
                position=position_dodge(.9)) +
  ggtitle("Mean looking time by outcome and condition")