Script for visualizing and analyzing soc-xsit-ipad data.

Load libraries and helper functions.

library(plyr)
library(dplyr)
library(bootstrap)
library(lme4)
library(ggplot2)
source("/Users/kmacdonald/Documents/Projects/SOC_XSIT/XSIT-MIN/analysis/Ranalysis/useful.R")

Read in the data.

data.all_df <- read.csv("/Users/kmacdonald/Documents/Projects/SOC_XSIT/processed_data/child/soc-xsit-ipad-alldata.csv")
data.test_df <- read.csv("/Users/kmacdonald/Documents/Projects/SOC_XSIT/processed_data/child/soc-xsit-ipad-testdata.csv")

Summarize data set.

Get the number of subjects

data.test_df %>%
        summarise(n_subjects = n_distinct(id)) %>%
        print()
##   n_subjects
## 1         25

Get condition breakdown

data.test_df %>%
        group_by(condition) %>%
        summarise(n_subjects = n_distinct(id)) %>%
        print()
## Source: local data frame [2 x 2]
## 
##   condition n_subjects
## 1 No-social         10
## 2    Social         15

Get age by condition breakdown

data.test_df %>%
        group_by(age_group, condition) %>%
        summarise(n_subject = n_distinct(id)) %>%
        print()
## Source: local data frame [5 x 3]
## Groups: age_group
## 
##   age_group condition n_subject
## 1         3    Social         3
## 2         4 No-social         6
## 3         4    Social         8
## 4         5 No-social         4
## 5         5    Social         4

Get trial by kid breakdown. We will use this to decide which kids to keep in the analysis.

include_df <- data.test_df %>%
                group_by(id, trial_type_redo) %>%
                summarise(n_trial = n()) %>%
                mutate(include_trial = ifelse(n_trial >= 2, 1,0)) %>%
                summarise(sum(include_trial))

include_df <- include_df %>% 
                        mutate(include_kid = ifelse(include_df[,2] == 2, 1,0)) %>%
                        select(id, include_kid)

#Keep only kids who got at least 2 same/switch trials
data.test_df <- join(data.test_df, include_df, by="id")

Keep only the first two test trials for each trial type.

## this is tricky!
# function that takes in a data frame
flag_trials <- function(df) {
        # iterate over trial_type_redo column
        # build array for same and switch
        # if array length is greater than 2, then we filter out that trial
}

data.test_df %>%
        group_by(id, trial_type_redo) %>%
        summarise(n_trial = n()) %>%
        head()
## Source: local data frame [6 x 3]
## Groups: id
## 
##   id trial_type_redo n_trial
## 1  1            Same       3
## 2  1          Switch       3
## 3  2            Same       2
## 4  2          Switch       4
## 5  3            Same       3
## 6  3          Switch       3

Compute means: not collapsing by kid

means <- data.test_df %>%
                filter(include == 1) %>%
                group_by(condition, trial_type_redo) %>%
                summarise(mean(correct))

Compute means: filtering out younger kids and kids who got exposure trial wrong

means.filtered <- data.test_df %>%
                        filter(
                                include == 1, 
                                age_group == 4 | age_group == 5, 
                                condition == "No-social" | correct_exposure == TRUE 
                                ) %>%
                        group_by(condition, trial_type_redo) %>%
                        summarise(mean_correct = mean(correct), 
                                  ci_high = ci.high(correct), 
                                  ci_low = ci.low(correct), n_trial = n()
                                  )

acc.expo <- data.test_df %>%
                filter(condition == "Social") %>%
                group_by(include) %>%
                summarise(mean_correct = mean(correct_exposure),
                          ci_high = ci.high(correct_exposure),
                          ci_low = ci.low(correct_exposure)
                          )
acc.expo$include <- factor(acc.expo$include, 
                                      labels = c('One','This'))

acc.expo
## Source: local data frame [2 x 4]
## 
##   include mean_correct ci_high  ci_low
## 1     One       0.5000 0.20833 0.20833
## 2    This       0.9545 0.04545 0.04545

Plot the data.

acc.expo <- ggplot(data = acc.expo, aes(x = include, y = mean_correct, fill=include)) +
                geom_bar(stat="identity") + 
                geom_errorbar(aes(ymin = mean_correct - ci_low, 
                                  ymax = mean_correct + ci_high), width = 0.1) +
                geom_hline(yintercept = 0.5, linetype = "dashed")

acc.expo

plot of chunk plots

acc.test <- ggplot(data = means.filtered, 
                   aes(x = condition, y = mean_correct, group = trial_type_redo, 
                       colour = trial_type_redo)) +
                        geom_errorbar(aes(ymin = mean_correct - ci_low, 
                                          ymax = mean_correct + ci_high), width = 0.05) +
                        geom_point() + 
                        geom_line() +
                        geom_hline(yintercept = 0.5, linetype = "dashed") +
                        scale_y_continuous(limits=c(0,1))

acc.test      

plot of chunk plots

Run stats.

Mixed models.