analysis.R

msbernst — Aug 12, 2013, 4:59 PM

library(plyr)
library(ggplot2)
library(MASS)
library(car)
Loading required package: nnet

# Analyze the cognitive load study results
results = read.csv("~/Documents/projects/twitch/working-memory/sample.csv")
results$isCorrect = (results$correctAnswer==results$userGuess)
backtasks = subset(results, task=='2back' & !is.na(correctAnswer) & !is.na(userGuess))
head(backtasks)
   X_id  phoneID number  task  previousTask duration userGuess
4     4 db95fbf3      3 2back Census/People     3057         1
5     5 db95fbf3      4 2back         2back     2884         0
7     7 db95fbf3      6 2back Census/People     2630         0
8     8 db95fbf3      7 2back         2back     4060         0
10   10 db95fbf3      9 2back SlideToUnlock     2207         0
11   11 db95fbf3     10 2back         2back     1164         0
   correctAnswer isCorrect
4              1      TRUE
5              0      TRUE
7              0      TRUE
8              0      TRUE
10             0      TRUE
11             0      TRUE

# Analysis of reaction time data: http://opensiuc.lib.siu.edu/cgi/viewcontent.cgi?article=1077&context=tpr

# Test for normality --- this should be a straight line and not significant
# if it's normally distributed
qqnorm(y=backtasks$duration)
qqline(y=backtasks$duration)

plot of chunk unnamed-chunk-1

shapiro.test(backtasks$duration)

    Shapiro-Wilk normality test

data:  backtasks$duration
W = 0.7208, p-value < 2.2e-16

# Box-Cox transformation to make data normal (varsity maneuver!)
boxcox <- powerTransform(backtasks$duration)
backtasks$transformed <- bcPower(backtasks$duration, boxcox$lambda)
qqnorm(y=backtasks$transformed)
qqline(y=backtasks$transformed)

plot of chunk unnamed-chunk-1

shapiro.test(backtasks$transformed)

    Shapiro-Wilk normality test

data:  backtasks$transformed
W = 0.9921, p-value = 0.1185

# Alternate: drop outliers >= 2 s.d. from mean
filtered <- subset(backtasks, duration <= mean(duration) + 2*sd(duration))
qqnorm(y=filtered$duration)
qqline(y=filtered$duration)

plot of chunk unnamed-chunk-1

shapiro.test(filtered$duration)

    Shapiro-Wilk normality test

data:  filtered$duration
W = 0.8721, p-value = 1.725e-14

# Conclusion: dropping outliers doesn't help; let's use Box-Cox

# --------------------------------
# What's the mean/median delay?
delay <- ddply(backtasks,
  c('previousTask'), summarise,
  p.05=quantile(duration, prob=0.05),
  p.25=quantile(duration, prob=0.25),
  p.50=quantile(duration, prob=0.50),
  p.75=quantile(duration, prob=0.75),
  p.95=quantile(duration, prob=0.95),
  meanDelay=mean(duration),
  meanAccuracy=mean((correctAnswer == userGuess))
)
delay
   previousTask p.05 p.25 p.50 p.75 p.95 meanDelay meanAccuracy
1         2back 1257 1597 2063 2932 5901      2625        0.992
2 Census/People 1714 2108 2760 3761 7166      3435        0.950
3 SlideToUnlock 1385 1829 2477 3404 5133      2979        1.000

# Plot the delays
qplot(duration, data=backtasks, geom="histogram")
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1

qplot(previousTask, duration, data=backtasks, geom=c('boxplot', 'jitter'))

plot of chunk unnamed-chunk-1


transformedDelay <- ddply(backtasks,
                          c('previousTask'), summarise,
                          p.05=quantile(transformed, prob=0.05),
                          p.25=quantile(transformed, prob=0.25),
                          p.50=quantile(transformed, prob=0.50),
                          p.75=quantile(transformed, prob=0.75),
                          p.95=quantile(transformed, prob=0.95),
                          meanDelay=mean(transformed),
                          meanAccuracy=mean((correctAnswer == userGuess))
)
transformedDelay
   previousTask  p.05  p.25  p.50  p.75  p.95 meanDelay meanAccuracy
1         2back 1.435 1.436 1.438 1.439 1.442     1.438        0.992
2 Census/People 1.437 1.438 1.439 1.440 1.442     1.439        0.950
3 SlideToUnlock 1.436 1.437 1.439 1.440 1.441     1.439        1.000

qplot(transformed, data=backtasks, geom="histogram")
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.

plot of chunk unnamed-chunk-1

qplot(previousTask, transformed, data=backtasks, geom=c('boxplot', 'jitter'))

plot of chunk unnamed-chunk-1



###########
# Tests

# ANOVA predicting transformed time using the previous task
model <- aov(transformed ~ previousTask, data=backtasks)
untransformedModel <- aov(duration ~ previousTask, data=backtasks)

# Original model is conservative, not significant
anova(untransformedModel)
Analysis of Variance Table

Response: duration
              Df   Sum Sq Mean Sq F value Pr(>F)
previousTask   2 1.41e+07 7038750    2.26   0.11
Residuals    290 9.03e+08 3114100               

# Transformed model is significant
anova(model)
Analysis of Variance Table

Response: transformed
              Df   Sum Sq  Mean Sq F value Pr(>F)  
previousTask   2 0.000037 1.83e-05    4.17  0.016 *
Residuals    290 0.001274 4.39e-06                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Which pairs of factor levels are significantly different from each other?
TukeyHSD(model)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = transformed ~ previousTask, data = backtasks)

$previousTask
                                  diff        lwr       upr  p adj
Census/People-2back          0.0012405  9.313e-05 0.0023879 0.0305
SlideToUnlock-2back          0.0006985 -3.567e-04 0.0017537 0.2650
SlideToUnlock-Census/People -0.0005420 -2.037e-03 0.0009527 0.6695



#########
# Accuracy investigation
accuracyLogit <- glm(isCorrect ~ factor(previousTask), data=backtasks, family='binomial')
summary(accuracyLogit)

Call:
glm(formula = isCorrect ~ factor(previousTask), family = "binomial", 
    data = backtasks)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-3.106   0.127   0.127   0.127   0.320  

Coefficients:
                                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)                           4.82       0.71    6.78  1.2e-11 ***
factor(previousTask)Census/People    -1.87       1.25   -1.50     0.13    
factor(previousTask)SlideToUnlock    14.75    2195.15    0.01     0.99    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 33.459  on 292  degrees of freedom
Residual deviance: 31.222  on 290  degrees of freedom
AIC: 37.22

Number of Fisher Scoring iterations: 18
# None of these are significant either