Data processing, plotting, and analysis script for SOL PEEK data

rm(list=ls())

Read in data

## [1] "loading library"

Demographics table

sol_demo <- read.csv("../../raw_data/sol_demo_all.csv", stringsAsFactors = F)
sol_demo$Sub.Num <- as.character(sol_demo$Sub.Num)

Eye movement data

iChart <- read.csv("processed_data/sol-ichart-merged.csv", 
                   check.names=F, stringsAsFactors=F)

iChart$Sub.Num <- as.character(iChart$Sub.Num)

Filter dataset

Filter out participants that should not go into analyses based on exclusionary criteria: a) age, b) didn’t know signs in the task, c) not enough ASL exposure.

exclude <- sol_demo %>% 
    filter(include == "no") %>% 
    select(Sub.Num, reason_excluded, stimuli, age_peek_months)

include <- sol_demo %>%
    filter(include == "yes") %>% 
    select(Sub.Num, include,
           reason_excluded, stimuli,
           age_code, signs_produced)

iChart <- left_join(iChart, include, by = c("Sub.Num", "stimuli"))

iChart <- filter(iChart, include == "yes")

Filter unknown signs

Create a clean target image variable.

targets <- c("juice", "cookie", "cup", "ball", "shoe", "kitty",
             "doll", "teddy", "book", "birdy", "car", "sock")

make_clean_target <- function (target_image, targets) {
    target_img_clean <- targets[str_detect(target_image, targets)]
    return(target_img_clean)
}

iChart$clean_target_img <- unlist(sapply(iChart$Target.Image, 
                                  function (x) make_clean_target(x, targets)))

Add unknown signs variable. Taken from sol_demo data frame.

ss_unknown_signs <- sol_demo %>% 
    filter(include == "yes") %>% 
    select(Sub.Num, parent_report_unknown_signs)
    
iChart <- left_join(iChart, ss_unknown_signs, by = "Sub.Num")

Now we can filter the iChart, removing the “unknown” signs.

ss_trials_df <- iChart %>% 
    group_by(Sub.Num, parent_report_unknown_signs) %>% 
    summarise(Trials = n()) 

# flag unknown trials
ss_unknown <- iChart %>% 
    rowwise() %>% 
    mutate(unknown_trial = ifelse(clean_target_img %in% parent_report_unknown_signs,
                                  "unknown_sign", "known_sign")) %>%
    select(Sub.Num, Tr.Num, unknown_trial)

# join this info with the original iChart
iChart$unknown_trial <- ss_unknown$unknown_trial
    
# now filter
iChart <- filter(iChart, unknown_trial == "known_sign")

# check to make sure our filtering worked correctly
post_filter_n <- iChart %>% 
    group_by(Sub.Num) %>% 
    summarise(Trials = n()) %>% 
    select(Trials)

ss_trials_df$post_filter <- post_filter_n$Trials

ss_trials_df <- ss_trials_df %>% 
    mutate(trials_removed = as.integer(Trials) - as.integer(post_filter))

Get total number of trials removed because of known signs

sum(ss_trials_df$trials_removed)

## [1] 22

Remove prescreened out trials

ss_prescreened <- iChart %>% 
    group_by(Sub.Num) %>% 
    filter(Prescreen.Notes != "") %>% 
    summarise(num_prescreened = n())

ss_trials_df <- left_join(ss_trials_df, ss_prescreened, by = "Sub.Num")

ss_trials_df <- ss_trials_df %>% 
    mutate(good_trials = Trials - sum(trials_removed, num_prescreened, na.rm=T))

iChart <- filter(iChart, Prescreen.Notes == "")

Remove participants for having too few trials

We define too few trials as less than or equal to 25% of the total number of trials in the task.

total_trials <- 32
trials_cut_point <- total_trials * .25

trials_filter <- ss_trials_df %>% 
    mutate(exclude_few_trials = ifelse(good_trials <= trials_cut_point, 
                                       "exclude", "include")) %>% 
    select(Sub.Num, good_trials, exclude_few_trials)

# get the number of participants removed by filter
trials_filter %>% group_by(exclude_few_trials) %>% summarise(n())

## Source: local data frame [2 x 2]
## 
##   exclude_few_trials n()
## 1            exclude   5
## 2            include  45

# merge filtering information with iChart
iChart <- left_join(iChart, trials_filter, by = "Sub.Num")

# now filter
iChart <- filter(iChart, exclude_few_trials == "include")

Get median split by age

kids_age_descriptives <- iChart %>%
    filter(age_code == "child") %>%
    select(Sub.Num, Months) %>% 
    distinct() %>% 
    summarise(median(Months), 
              max(Months),
              min(Months), 
              sd(Months),
              n()) %>% 
    print()

##   median(Months) max(Months) min(Months) sd(Months) n()
## 1             27          53          16   9.140259  29

# add median split variable to iChart
iChart$age_group <- ifelse(iChart$Months < kids_age_descriptives$`median(Months)`, 
                           "< 26.5 Months", 
                           ifelse(iChart$Months >= kids_age_descriptives$`median(Months)` & 
                                      iChart$Months <= kids_age_descriptives$`max(Months)`, 
                                  "> 26.5 Months",
                                  "Adults"))

gender_df <- iChart %>%
    filter(age_code == "child") %>%
    distinct(Sub.Num) %>% 
    group_by(Sex, age_group) %>% 
    summarise(count = unique(n())) %>% 
    print()

## Source: local data frame [4 x 3]
## Groups: Sex
## 
##   Sex     age_group count
## 1   F < 26.5 Months     8
## 2   F > 26.5 Months     9
## 3   M < 26.5 Months     6
## 4   M > 26.5 Months     6

iChart %>% group_by(age_group) %>% 
    summarise(n_distinct(Sub.Num),
              mean(Months), 
              min(Months), 
              max(Months))

## Source: local data frame [3 x 5]
## 
##       age_group n_distinct(Sub.Num) mean(Months) min(Months) max(Months)
## 1 < 26.5 Months                  14     20.71308          16          26
## 2 > 26.5 Months                  15     36.18715          27          53
## 3        Adults                  16    430.57634         246         695

Process iChart

First, we need to process the data, keeping only those trials on which the child was looking at the signer at F0.

C: Center
D: Distractor
T: Target
A: Away

includeOffCenter == FALSE -> only include trials child was looking at center at F0

includeOffCenter == TRUE -> include trials child was looking at center, target, or distractor at F0

iChart %>% group_by("0", Response) %>% summarise(Trials = n())

## Source: local data frame [4 x 3]
## Groups: "0"
## 
##   "0" Response Trials
## 1   0        A     28
## 2   0        C   1015
## 3   0        D     34
## 4   0        T     42

# change all trials to "Vanilla" 
iChart$Condition <- "Vanilla"

## define critical onset, change Cs to Ds and everything else to As
iChart <- defineOnsetSOL(iChart, critonset=0, end_critonset=300, 
                         includeOffCenter=FALSE, includeWindow = FALSE)

iChart %>% group_by("0", Response) %>% summarise(Trials = n())

## Source: local data frame [2 x 3]
## Groups: "0"
## 
##   "0" Response Trials
## 1   0        A    104
## 2   0        D   1015

Flag C_T and C_D Trials

Datawiz does not tell us which shifts land on a target vs. a disctractor. So we need to use a function that flags each trial as one of the following:

C_T: center to target
C_D: center to distractor
C-C: center to center (child leaves the signer, goes away, and comes back to signer)
no_shift
off_signer

# apply it to each row in our datase
trial_types <- apply(iChart, 1, trial_type_fun) 

# merge this information back with the iChart
iChart <- cbind(iChart, trial_types)

iChart %>% group_by(trial_types) %>% summarise(Trials = n())

## Source: local data frame [4 x 2]
## 
##   trial_types Trials
## 1         C_D    131
## 2         C_T    822
## 3    no_shift     62
## 4  off_signer    104

Next, we compute statistics over long window 0-5000 ms. This will allow us to see a distribution of RTs, which we will use to determine our analysis window.

iChart <- computeStatistics(iChart, startWindow=0, endWindow=5000)

## [1] "### Trials left ###"
## [1] 1100
## [1] 1050
## [1] 1000
## [1] 950
## [1] 900
## [1] 850
## [1] 800
## [1] 750
## [1] 700
## [1] 650
## [1] 600
## [1] 550
## [1] 500
## [1] 450
## [1] 400
## [1] 350
## [1] 300
## [1] 250
## [1] 200
## [1] 150
## [1] 100
## [1] 50
## [1] 0

# get analyisis window where 90% of RTs occur for kids, include all shifts
rts <- filter(iChart, trial_types %in% c("C_T"), age_group != "Adults")

analysis.window <- quantile(rts$RT, probs=c(0.05, 0.95), na.rm=T)

qplot(rts$RT) +
    geom_vline(x=analysis.window[1], col="red", lwd=1.5) +
    geom_vline(x=analysis.window[2], col="red", lwd=1.5) +
    annotate("text", x = 2500, y = 30, 
              label = "Analysis Window \n (95% RTs)")

Compute statistics over analysis window: 0-2600ms. We use 2600 ms because it is 500 ms longer than the end of our analysis window (2100ms). This allows us to include trials in which the participant to initiates and completes a shift at the very end of the analysis window.

iChart <- computeStatistics(iChart, startWindow=0, endWindow=2500)

## [1] "### Trials left ###"
## [1] 1100
## [1] 1050
## [1] 1000
## [1] 950
## [1] 900
## [1] 850
## [1] 800
## [1] 750
## [1] 700
## [1] 650
## [1] 600
## [1] 550
## [1] 500
## [1] 450
## [1] 400
## [1] 350
## [1] 300
## [1] 250
## [1] 200
## [1] 150
## [1] 100
## [1] 50
## [1] 0

Reject trials with really long RTs and with long gaps. Gaps are defined as a sequence of frames when the child is not looking at either picture or at the signer.

iChart <- filteriChart(iChart, minRT = analysis.window[1], maxRT = analysis.window[2], maxfirstgap=15, maxlonggap=15)

Get mean Accuracy and RT for each participant

acc_ss <- poolData(meanAccuracy(iChart, startWindowAcc=500, endWindowAcc=2000), 
                RejectFirstGap=TRUE,RejectLongestGap=TRUE, 
                RejectRT=FALSE, color=TRUE, dependent="Accuracy", 
                group="", facet="", dodge="", 
                xlab="", ylab= "Proportion\n  Looking\n  to target", 
                paired=TRUE, miny = 0.2, maxy = 0.80, 
                size=13, legend.direction="horizontal", 
                legend.position="bottom", 
                breaks=c(0.25, 0.50, 0.75))

## Compute reaction time for each age group only on C_T trials
rt_ss <- poolData(filter(iChart, trial_types %in% c("C_T")), 
               RejectFirstGap=TRUE, RejectLongestGap=TRUE,
               RejectRT=TRUE, color=FALSE, dependent="RT", group="", 
               facet="", dodge="Response",
               xlab="", ylab="mean RT (ms)", 
               paired=TRUE, 
               miny = 400, maxy=1300, 
               size=13, 
               legend.direction = "horizontal", 
               legend.position="bottom", 
               breaks=c(400, 800, 1200))

Get mean Acc and RT for each participant by condition

acc <- poolData(meanAccuracy(iChart, startWindowAcc=500, endWindowAcc=2000), 
                RejectFirstGap=TRUE,RejectLongestGap=TRUE, 
                RejectRT=FALSE, color=TRUE, dependent="Accuracy", 
                group="age_group", facet="", dodge="", 
                xlab="", ylab= "Proportion\n  Looking\n  to target", 
                paired=TRUE, miny = 0.2, maxy = 0.80, 
                size=13, legend.direction="horizontal", 
                legend.position="bottom", 
                breaks=c(0.25, 0.50, 0.75))

## Compute reaction time for each age group only on C_T trials
rt <- poolData(filter(iChart, trial_types %in% c("C_T")), 
               RejectFirstGap=TRUE, RejectLongestGap=TRUE,
               RejectRT=TRUE, color=FALSE, dependent="RT", group="age_group", 
               facet="", dodge="Response",
               xlab="", ylab="mean RT (ms)", 
               paired=TRUE, 
               miny = 400, maxy=1300, 
               size=13, 
               legend.direction = "horizontal", 
               legend.position="bottom", 
               breaks=c(400, 800, 1200))

Statistics

Get mean accuracy and rt for each participant

Some munging to get data frame for analysis. Variables needed for each subject:

Mean acc
Mean rt
Signs produced
Age
Age condition

# merge acc/rt van1
ss <- merge(acc_ss, rt_ss, by="Sub.Num")

# merge with demo info
ss <- merge(ss, filter(sol_demo, include=="yes"), by="Sub.Num")

# get age bins
ss_age_bins <- iChart %>%
    select(Sub.Num, age_group, Months) %>%
    distinct() 
    
ss <- merge(ss, ss_age_bins, by="Sub.Num")

# clean up variable names in data frame
names(ss)[names(ss)=="Vanilla"] <- "mean_accuracy"
names(ss)[names(ss)=="Vanilla_D"] <- "mean_rt"

First shifts

ss_first_shifts <- iChart %>%
    filter(trial_types %in% c("C_T", "C_D")) %>% 
    group_by(Sub.Num, trial_types, age_group) %>% 
    summarise(count = n())

ss_first_shifts_prop <- ss_first_shifts %>% 
    group_by(Sub.Num) %>% 
    summarise(total_trials_shifting = sum(count)) %>% 
    left_join(ss_first_shifts, by = "Sub.Num") %>% 
    mutate(shift_prop = round(count / total_trials_shifting, 2))

# add to ss full data frame
ss <- ss_first_shifts %>% 
    select(Sub.Num, count, trial_types) %>% 
    spread(trial_types, count) %>% 
    transmute(Sub.Num = Sub.Num, C_D_count = C_D, C_T_count = C_T) %>% 
    left_join(ss, by = "Sub.Num")

# add to ss full data frame
ss <- ss_first_shifts_prop %>% 
    select(Sub.Num, shift_prop, trial_types, 
           total_trials_shifting) %>% 
    spread(trial_types, shift_prop) %>% 
    transmute(Sub.Num = Sub.Num, C_D_prop = C_D, C_T_prop = C_T) %>% 
    left_join(ss, by = "Sub.Num")

# flag chance first shifters -- need to find a principled way to do this
ss <- ss %>% 
    mutate(C_D_prop = ifelse(C_T_prop == 1, 0, C_D_prop),
           exclude_chance_shifter = ifelse(Sub.Num %in% c("30050", "30018", "30051", "30086", "30088"), "exclude", "include"))

Set up filter to just include kids for correlation analyses.

ss_kids <- filter(ss, age_group != "Adults")
ss_rt <- filter(ss, age_group != "Adults", exclude_chance_shifter == "include")

write.csv(ss_kids, "../bayesian-data-analysis/sol_ss_kids.csv", row.names = F)

Look at all correlations

Hmisc::rcorr(as.matrix(select(ss_kids, mean_accuracy, C_T_prop, mean_rt, signs_produced, Months)))

##                mean_accuracy C_T_prop mean_rt signs_produced Months
## mean_accuracy           1.00     0.55   -0.53           0.46   0.64
## C_T_prop                0.55     1.00   -0.12           0.32   0.35
## mean_rt                -0.53    -0.12    1.00          -0.49  -0.34
## signs_produced          0.46     0.32   -0.49           1.00   0.76
## Months                  0.64     0.35   -0.34           0.76   1.00
## 
## n
##                mean_accuracy C_T_prop mean_rt signs_produced Months
## mean_accuracy             29       29      29             28     29
## C_T_prop                  29       29      29             28     29
## mean_rt                   29       29      29             28     29
## signs_produced            28       28      28             28     28
## Months                    29       29      29             28     29
## 
## P
##                mean_accuracy C_T_prop mean_rt signs_produced Months
## mean_accuracy                0.0020   0.0030  0.0142         0.0002
## C_T_prop       0.0020                 0.5351  0.0964         0.0628
## mean_rt        0.0030        0.5351           0.0079         0.0747
## signs_produced 0.0142        0.0964   0.0079                 0.0000
## Months         0.0002        0.0628   0.0747  0.0000

ggpairs(data = select(ss_kids, mean_accuracy, C_T_prop, mean_rt, signs_produced, Months),
        upper = list(continuous = "smooth", combo = "cor", 
                     params = c(method = "lm", color="darkblue")),
        lower = list(continuous = "cor"),
        diag = list(continuous = "density")) +
    theme_bw()

Just RT.

Hmisc::rcorr(as.matrix(select(ss_rt, mean_rt, signs_produced, Months)))

##                mean_rt signs_produced Months
## mean_rt           1.00          -0.51  -0.38
## signs_produced   -0.51           1.00   0.72
## Months           -0.38           0.72   1.00
## 
## n
##                mean_rt signs_produced Months
## mean_rt             24             23     24
## signs_produced      23             23     23
## Months              24             23     24
## 
## P
##                mean_rt signs_produced Months
## mean_rt                0.0132         0.0663
## signs_produced 0.0132                 0.0001
## Months         0.0663  0.0001

ggpairs(data = select(ss_rt, mean_rt, signs_produced, Months),
        upper = list(continuous = "smooth", combo = "cor", 
                     params = c(method = "lm", color="darkblue")),
        lower = list(continuous = "cor"),
        diag = list(continuous = "density")) +
    theme_bw()

Test difference in Acc/RT between age groups

t.test(mean_accuracy ~ age_group, data=ss_kids)

## 
##  Welch Two Sample t-test
## 
## data:  mean_accuracy by age_group
## t = -3.3317, df = 26.887, p-value = 0.00252
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.11969437 -0.02844552
## sample estimates:
## mean in group < 26.5 Months mean in group > 26.5 Months 
##                   0.5935824                   0.6676523

t.test(mean_rt ~ age_group, data=ss_kids)

## 
##  Welch Two Sample t-test
## 
## data:  mean_rt by age_group
## t = 1.8546, df = 18.91, p-value = 0.07931
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -16.59593 274.07901
## sample estimates:
## mean in group < 26.5 Months mean in group > 26.5 Months 
##                    1242.685                    1113.943

Plot individual participants performance

createPlots(iChart, startWindow=0, endWindow=2000, RejectLongestGap=FALSE, 
            RejectFirstGap=FALSE, RejectRT=FALSE, color=TRUE, smooth=400, 
            targetEnd=800, carrier="", targets=c(""), 
            group="age_group",  plotStats="PP", miny = 0.4, maxy=0.95, size=15, 
            legend.direction = "vertical", legend.position=c(0.85, 0.9), 
            breaks=c(0.25, 0.50, 0.75), x.target=0.33)

ss.et <- read.table("sol-ichart-v1-processed_PP_graphValues_by_subs_0_2000_minRT_33_maxRT_2433_lg_59_fg_34_n_45.txt", header = T)
ss.et$Sub.Num <- as.character(ss.et$Sub.Num)

Some munging to format data for plotting

detach("package:reshape", unload=TRUE)

#strip X from header
names(ss.et) <- gsub("X", "", names(ss.et))

ss.et <- left_join(ss.et, ss_kids, by="Sub.Num")

#melt to long form for plotting
ss.et.long <- reshape2::melt(ss.et, 
                   id.vars=c("Sub.Num", "Condition", 
                             "Months", "signs_produced", 
                             "groupping", "stimuli"),
                   variable.name = "Time.ms",
                   value.name = "accuracy")

#convert sub.num to a factor
ss.et.long <- ss.et.long %>% 
    mutate(Sub.Num = as.factor(Sub.Num), 
           Time.ms = as.numeric(as.character(Time.ms)),
           Months = as.factor(Months),
           accuracy = as.numeric(accuracy))

Exploratory Plots

# set breaks for x-axis
breaks <- seq(0,2000, by=500)

qplot(data = ss.et.long, x = Time.ms, y = accuracy,
      color=groupping, geom="smooth", method = "loess",
      ylim=c(0.2,1), xlab=c("Time (ms) from noun onset"), 
      ylab=c("Proportion Looking to target")) + 
    geom_hline(yintercept = 0.5, linetype="dashed") +
    scale_x_discrete(breaks = breaks) +
    facet_wrap(Months+signs_produced~Sub.Num, ncol=8) +
    #guides(color=FALSE) +
    geom_vline(xintercept=1200, linetype = "dashed") +
    theme(axis.text.x = element_text(angle = 75, hjust = 1)) +
    theme_bw()

Get confidence interval for graph values

ms_graph_values <- ss.et.long %>% 
    group_by(groupping, Time.ms) %>% 
    summarise(mean_accuracy = mean(accuracy),
              ci.high = ci.high(accuracy),
              ci.low = ci.low(accuracy))

breaks <- seq(0,2000, by=500)
y_breaks <- seq(0.25,1.0, by = 0.25)
points <- seq(0,1900, by = 100)

qplot(data = ms_graph_values, x = Time.ms, y = mean_accuracy,
      color=groupping, geom = "blank", xlab=c("Time in msec from onset of noun"), 
      ylab=c("ACCURACY\nProportion\nlooking\nto target")) + 
    geom_point(data = filter(ms_graph_values, Time.ms %in% points), size=3) +
    geom_line(size=0.7) +
    geom_linerange(data = filter(ms_graph_values, Time.ms %in% points),
                   aes(ymin=mean_accuracy - ci.low, 
                       ymax=mean_accuracy + ci.high), 
                   width = .03, size=0.3, alpha = 1) +
    scale_color_brewer(type = "qual", palette = "Set1") +
    scale_x_discrete(breaks = breaks) +
    scale_y_continuous(limits=c(0.25, 1.0), breaks = y_breaks) +
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_vline(xintercept=1150, linetype = "dashed") +
    guides(color=F) +
    theme_bw() +
    theme(axis.title.x = element_text(colour="grey40",size=22,
                                      angle=0,hjust=0.5,vjust=0,face="plain"),
          axis.title.y = element_text(colour="grey40",size=22,
                                      angle=0,hjust=0.5,vjust=0.5,face="plain"),
          axis.text.x = element_text(colour="grey20",size=18,
                                     angle=0,hjust=0.5,vjust=0,face="plain"),
          axis.text.y = element_text(colour="grey20",size=18,
                                     angle=0,hjust=0.5,vjust=0,face="plain"),
         plot.margin = unit(c(0.5,2,1,1), "cm"))

SOL-analysis

Kyle MacDonald

June 9, 2015