Pilot Data Analysis- AN

This is a markdown document for taking a look at the data from the Pilot of our Crossmodality Toolkit study, which is part of the September Tutorial in Empricism, the summer school attached to the MMIEL workshop.

First lets load in the data and do some sanity checks, to see how many participants we have, make sure we have the right amount of data (96 trials per participant), etc.

Note that in this data frame, “0” means that left matched with left, right with right - so we need to recode that for correctness and for other ways of looking at the data

library(tidyr)
library(plyr)
library(doBy)
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.4.1

pilotdata <- read.csv("F:/Google Drive/GitHub Repos/Crossmodality-Toolkit/data/pilotData.csv")

length(unique(pilotdata$subject))

## [1] 61

nrow(pilotdata) / length(unique(pilotdata$subject))

## [1] 96

pilotdata <- separate(data = pilotdata, col = condition, into = c('Focal1', 'Focal2', "Focal3", "ParticipantNum"), sep = "-", remove = FALSE)

pilotdata$condition <- paste(pilotdata$Focal1, pilotdata$Focal2, pilotdata$Focal3, sep='-')

table(pilotdata$condition)

## 
##         Amp-Size-Speed  Brightness-Amp-Affect Noise-Brightness-Color 
##                    960                    960                    960 
##      Noise-Shape-Speed     Pitch-Shape-Affect       Pitch-Size-Color 
##                    960                   1056                    960

pilotdata <- separate(data = pilotdata, col = InducerL, into = c('IndL', 'IndTokenL', "IndVL"), sep = "-", remove = FALSE)
pilotdata <- separate(data = pilotdata, col = InducerR, into = c('IndR', 'IndTokenR', "IndVR"), sep = "-", remove = FALSE)
pilotdata <- separate(data = pilotdata, col = ConcurrentL, into = c('ConL', 'ConTokenL', "ConVL"), sep = "-", remove = FALSE)
pilotdata <- separate(data = pilotdata, col = ConcurrentR, into = c('ConR', 'ConTokenR', "ConVR"), sep = "-", remove = FALSE)

pilotdata$LeftPair <- paste(pilotdata$IndVL, pilotdata$ConVL)
pilotdata$Resp <- ifelse(pilotdata$choice == 0, paste(pilotdata$IndVL, pilotdata$ConVL), paste(pilotdata$IndVL, pilotdata$ConVR))

pilotdata$RespCorr <- as.numeric(mapvalues(pilotdata$Resp,
                                        from= c("H H", "L L", "H L", "L H"),
                                        to= c(1, 1, 0, 0)))

table(pilotdata$RespCorr)

## 
##    0    1 
## 2214 3642

pilotdata <- subset(pilotdata, select = -c(X))

So, participants are responding “correctly” a pretty substantial portion of the time - that’s good, but obviously we can take a much better look at that with some stats and some much pretty graphs.

pilotdataagg <- summaryBy(RespCorr ~ subject + condition + IndL + ConL , data= pilotdata, Fun = mean, na.rm=TRUE)
pilotdataagg$Block <- as.factor(1)

ggplot(data=pilotdataagg, aes(x = Block, y = RespCorr.mean)) + 
  geom_boxplot(aes(),size = 0.4, alpha =0.4) +
  ggtitle("Pilot Data - Proportion Correct by Comparison by Condition") +
  scale_y_continuous("Proportion Correct", breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1"), limits = c(0, 1)) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666")) +
  facet_grid(IndL ~ ConL) +
  theme(strip.text.x = element_text(size = 8, colour = "black"),
        strip.text.y = element_text(size = 8, colour = "black", angle = 0),
        strip.background = element_rect(fill= "#FFFFFF")) +
  theme(legend.position="none")

ggplot(data=pilotdataagg, aes(x = Block, y = RespCorr.mean)) + 
  geom_jitter(aes(colour= subject),size = 0.4, alpha =0.4) +
  ggtitle("Pilot Data - Proportion Correct by Comparison by Condition") +
  scale_y_continuous("Proportion Correct", breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1"), limits = c(0, 1)) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666")) +
  facet_grid(IndL ~ ConL) +
  theme(strip.text.x = element_text(size = 8, colour = "black"),
        strip.text.y = element_text(size = 8, colour = "black", angle = 0),
        strip.background = element_rect(fill= "#FFFFFF")) +
  theme(legend.position="none")

## Warning: Removed 893 rows containing missing values (geom_point).

So these are pretty ugly - but every participant only makes each comparison a few times, so their scores can only be 0. 0.25, 0.5, 0.75, or 1.0

Hence the bins for a boxplot are pretty damn wide

What happens if we collapse across participants?

pilotdataagg2 <- summaryBy(RespCorr ~ condition + IndL + ConL , data= pilotdata, Fun = mean, na.rm=TRUE)
pilotdataagg2$Block <- as.factor(1)


ggplot(data=pilotdataagg2, aes(x = Block, y = RespCorr.mean)) + 
  geom_boxplot(aes(colour=condition),size = 0.4, alpha =0.4) +
  ggtitle("Pilot Data - Proportion Correct by Comparison by Condition") +
  scale_y_continuous("Proportion Correct", breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1"), limits = c(0, 1)) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666")) +
  facet_grid(IndL ~ ConL) +
  theme(strip.text.x = element_text(size = 8, colour = "black"),
        strip.text.y = element_text(size = 8, colour = "black", angle = 0),
        strip.background = element_rect(fill= "#FFFFFF"))

ggplot(data=pilotdataagg2, aes(x = Block, y = RespCorr.mean)) + 
  geom_jitter(aes(colour= condition),size = 1) +
  ggtitle("Pilot Data - Proportion Correct by Comparison by Condition") +
  scale_y_continuous("Proportion Correct", breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1"), limits = c(0, 1)) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666")) +
  facet_grid(IndL ~ ConL) +
  theme(strip.text.x = element_text(size = 8, colour = "black"),
        strip.text.y = element_text(size = 8, colour = "black", angle = 0),
        strip.background = element_rect(fill= "#FFFFFF"))

## Warning: Removed 4 rows containing missing values (geom_point).

One thing we were worried about is that maybe the data would be weird for auditory-auditory trials specifically- as a subject those trials definitley felt super strange

So lets further aggregate things and see what that looks like

pilotdataagg2$IndLType <- mapvalues(pilotdataagg2$IndL,
                                        from= c("Affect", "Amp", "Brightness", "Color", "Noise", "Pitch", "Shape", "Size", "Speed"),
                                        to= c("Affect", "Audio", "Visual", "Visual", "Audio", "Audio", "Visual", "Visual", "Visual"))

pilotdataagg2$ConLType <- mapvalues(pilotdataagg2$ConL,
                                        from= c("Affect", "Amp", "Brightness", "Color", "Noise", "Pitch", "Shape", "Size", "Speed"),
                                        to= c("Affect", "Audio", "Visual", "Visual", "Audio", "Audio", "Visual", "Visual", "Visual"))


ggplot(data=pilotdataagg2, aes(x = Block, y = RespCorr.mean)) + 
  geom_boxplot(aes(),size = 0.4, alpha =0.4) +
  ggtitle("Pilot Data - Proportion Correct by Comparison by Condition") +
  scale_y_continuous("Proportion Correct", breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1"), limits = c(0, 1)) +
  theme(axis.text = element_text(size=8)) +
  theme(plot.title = element_text(size=16, face="bold", hjust=0, color="#666666")) +
  facet_grid(IndLType ~ ConLType) +
  theme(strip.text.x = element_text(size = 8, colour = "black"),
        strip.text.y = element_text(size = 8, colour = "black", angle = 0),
        strip.background = element_rect(fill= "#FFFFFF"))

Pilot Data Analysis- AN

Alan Nielsen

September 10, 2017