Data analysis of toddler uncertainty monitoring study.

1 Data preprocessing

Preliminaries.

Read in trial info, demographics, and word familiarity.

demo <- read.csv("demographics.csv") 
trial_info <- read.csv("conditions.csv", header = TRUE)
trial_info$trial <- as.character(trial_info$trial)

word_fam_key <- read.csv("word_fam_key.csv")
word_fam_key$trial <- as.character(word_fam_key$trial)

word_fam <- read.csv("word_familiarity.csv") %>%
  gather("target", "familiarity", 2:41 )

response_key <- read.csv("response_key.csv")
response_key$trial <- as.character(response_key$trial)

Read in TS data file.

d.raw_ts <- read.table("AllSubs_TS.txt", header = TRUE) 

Make TS data tidy.

d_acc_ts<- d.raw_ts %>%
  select(Subject_ID, acc1, acc2, acc3, acc4, acc5, acc6, acc7, acc8, acc9, acc10, acc11, acc12, acc13, acc14, acc15, acc16, acc17, acc18, acc19, acc20)%>%
  gather(trial, acc, acc1:acc20) 

d_rt <- d.raw_ts %>%
  select(Subject_ID, RT1, RT2, RT3, RT4, RT5, RT6, RT7, RT8, RT9, RT10, RT11, RT12, RT13, RT14, RT15, RT16, RT17, RT18, RT19, RT20) %>%
  gather(trial, RT, RT1:RT20) 

d_acc_ts$trial <-str_replace(d_acc_ts$trial, "acc","")
d_rt$trial <- str_replace(d_rt$trial, "RT","")

d_ts <- d_acc_ts %>%
  left_join(d_rt) %>%
  mutate(ID = Subject_ID) %>%
  select(ID, RT, acc, trial)

d_ts$acc[d_ts$acc==999] <- NA

Join trial info and demographics with raw TS data.

d_ts <- left_join(d_ts, trial_info) %>%
  left_join(demo) %>%
  select(-participation_date, -birthdate) %>%
  left_join(word_fam_key) %>%
  left_join(word_fam) %>%
  left_join(response_key) %>%
  filter(drop != 1)

d_ts <- d_ts %>% 
  mutate(similar = ifelse(similarity == 1, "similar",
                                       "dissimilar")) %>%
  select(-similarity)
d_ts$ID <- as.factor(as.character(d_ts$ID))

There are 73 touchscreen participants.

Read in ET data.

d.raw_et <- read.csv("AllSubs_ET.csv", header = TRUE) 
d.raw_sw <- read.csv("perceptual_transitions.csv", header = TRUE)
d.raw_sw$ID <- d.raw_sw$Subject

Make data ET tidy.

d.raw_sw <- d.raw_sw %>%
  gather(trial, trans, Trial1:Trial20) %>%
  select(-Subject) 
d.raw_sw$ID <- as.factor(as.character(d.raw_sw$ID))
d.raw_et$ID <- as.factor(as.character(d.raw_et$ID))
d.raw_sw$trial <- str_replace(d.raw_sw$trial, "Trial","")
d.raw_et$trial <- as.character(d.raw_et$trial)
d.raw_sw$ID <- as.character(d.raw_sw$ID)
d.raw_et$ID <- as.character(d.raw_et$ID)
demo$ID <- as.character(demo$ID)
word_fam$ID <- as.character(word_fam$ID)

Join ET transitions and accuracy data and drop excluded participants (including pilots).

d.raw_et <- d.raw_et %>%
  select(ID, trial, acc)

d_et <- d.raw_et %>%
  inner_join(d.raw_sw) %>%
  mutate(acc_et = acc) %>%
  left_join(demo) %>%
  left_join(trial_info)%>%
  select(-participation_date, -birthdate) %>%
  left_join(word_fam_key) %>%
  left_join(word_fam) %>%
  left_join(response_key) %>%
  filter(drop != 1) %>%
  select(-acc)

#recode similarity from 1s and 0s to string.
d_et <- d_et %>% 
  mutate(similar = ifelse(similarity == 1, "similar",
                                       "dissimilar")) %>%
  select(-similarity)
d_et$ID <- as.factor(as.character(d_et$ID))

There are 71 eyetracker participants.

Merge TS and ET data and drop excluded participants (including pilots).

d <- d_ts %>%
  full_join(d_et) %>%
  filter(drop != 1)
d$ID <- as.factor(as.character(d$ID))

There are 80 participants with either eyetracker or touchscreen data.

2 Demographics

Summaries of income, race, and gender

##                     <15,000 15,000-25,000 25,000-40,000 40,000-60,000 
##             3             1             4             7            16 
## 60,000-90,000       >90,000 
##            16            33
##                                        
##                                     12 
##                       African American 
##                                      1 
##             African American/Caucasian 
##                                      3 
## American Indian or Alaskan / Caucasian 
##                                      1 
##                                  Asian 
##                                      1 
##                      Asian & Caucasian 
##                                      2 
##                    Asian and Caucasian 
##                                      1 
##                       Asian, Caucasian 
##                                      1 
##                        Asian/Caucasian 
##                                      1 
##                              Caucasian 
##                                     46 
##                             Caucasian  
##                                      3 
##            Native Hawaiian & Caucasian 
##                                      3 
##                       Pacific Islander 
##                                      0 
##                                SKIPPED 
##                                      0 
##                                   NA's 
##                                      5
##        Female   Male 
##      1     38     41

Age in months

##   vars  n  mean   sd median trimmed  mad   min   max range skew kurtosis
## 1    1 80 28.84 1.79  28.62   28.73 1.96 25.68 34.16  8.48 0.48    -0.34
##    se
## 1 0.2

3 Eyetracker task

Filter missing data and unfamiliar trials.

d_et_filt <- d_et %>% 
  mutate(acc = acc_et)%>%
  select(-acc_et)%>%
  filter(!is.na(trans),
         !is.na(acc),
         familiarity != 0)

3.1 Descriptives

Transitions for accurate and inaccurate

## group: 0
##   vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## 1    1 253 1.32 1.48      1    1.08 1.48   0   9     9  1.7      4.1 0.09
## -------------------------------------------------------- 
## group: 1
##   vars    n mean   sd median trimmed  mad min max range skew kurtosis   se
## 1    1 1086 1.15 1.23      1    0.96 1.48   0   9     9 1.61      4.5 0.04

Transitions for similar and dissimilar

## group: dissimilar
##   vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## 1    1 668  1.1 1.16      1    0.96 1.48   0   9     9 1.67      5.9 0.04
## -------------------------------------------------------- 
## group: similar
##   vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## 1    1 671 1.26 1.39      1    1.04 1.48   0   9     9 1.61     3.61 0.05

Accuracy for similar and dissimilar

## group: dissimilar
##   vars   n mean   sd median trimmed mad min max range  skew kurtosis   se
## 1    1 668 0.86 0.34      1    0.95   0   0   1     1 -2.12     2.48 0.01
## -------------------------------------------------------- 
## group: similar
##   vars   n mean   sd median trimmed mad min max range  skew kurtosis   se
## 1    1 671 0.76 0.43      1    0.82   0   0   1     1 -1.21    -0.55 0.02

Plot accuracy.

Plot distribution (density) of switches by accuracy and similarity.

3.2 Analyses

3.2.1 Repeated measures ANOVA

We want to collapse across inaccurate trials because there are few of them. Is there an effect of similarity among dissimilar trials?

mss_et <-  d_et_filt %>%
  group_by(ID, acc, similar)%>%
  summarise(trans = mean(trans))

t.test(subset(mss_et, acc==0 & similar == "dissimilar")$trans, 
       subset(mss_et, acc==0 & similar == "similar")$trans)
## 
##  Welch Two Sample t-test
## 
## data:  subset(mss_et, acc == 0 & similar == "dissimilar")$trans and subset(mss_et, acc == 0 & similar == "similar")$trans
## t = -0.28357, df = 108.99, p-value = 0.7773
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5517763  0.4136482
## sample estimates:
## mean of x mean of y 
##  1.429592  1.498656

No difference, makes sense to collapse across inaccurate trials.

repeated-measures ANOVA with trial type (similar and accurate, dissimilar and accurate, and inaccurate) as the independent variable.

mss_et_acc <-  d_et_filt

mss_et_acc$acc <- str_replace(mss_et_acc$acc, "0", "inacc")
mss_et_acc$acc <- str_replace(mss_et_acc$acc, "1", "acc")

mss_et_inacc <-  mss_et_acc%>%
  group_by(ID, acc)%>%
  summarise(trans = mean(trans))%>%
  spread(acc, trans)%>%
  select(-acc)

mss_et_acc <-  mss_et_acc%>%
  group_by(ID, acc)%>%
  summarise(trans = mean(trans))%>%
  spread(acc, trans)

mss_et_sim <-  d_et_filt %>%
  group_by(ID, acc, similar)%>%
  summarise(trans = mean(trans))%>%
  filter(acc == 1)%>%
  spread(similar, trans)%>%
  mutate(similar_acc = similar)%>%
  mutate(dissimilar_acc = dissimilar)%>%
  select(-similar, -dissimilar, -acc)

mss_et_aov <- mss_et_inacc %>%
  left_join(mss_et_sim)%>%
  filter(!is.na(similar_acc),
         !is.na(dissimilar_acc),
         !is.na(inacc))%>%
  gather("trial_type", "trans", inacc:dissimilar_acc)

ET.aov <- with(mss_et_aov,
                   aov(trans ~ trial_type +
                       Error(ID /trial_type)))
summary(ET.aov)
## 
## Error: ID
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 65  79.64   1.225               
## 
## Error: ID:trial_type
##             Df Sum Sq Mean Sq F value Pr(>F)  
## trial_type   2    4.0  2.0019   3.635 0.0291 *
## Residuals  130   71.6  0.5508                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.2.2 T-tests

t.test(subset(mss_et_aov, trial_type=="inacc")$trans, 
       subset(mss_et_aov, trial_type=="similar_acc")$trans, paired = TRUE)
## 
##  Paired t-test
## 
## data:  subset(mss_et_aov, trial_type == "inacc")$trans and subset(mss_et_aov, trial_type == "similar_acc")$trans
## t = 0.42357, df = 65, p-value = 0.6733
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2362104  0.3633749
## sample estimates:
## mean of the differences 
##              0.06358225
t.test(subset(mss_et_aov, trial_type=="inacc")$trans, 
       subset(mss_et_aov, trial_type=="dissimilar_acc")$trans, paired = TRUE)
## 
##  Paired t-test
## 
## data:  subset(mss_et_aov, trial_type == "inacc")$trans and subset(mss_et_aov, trial_type == "dissimilar_acc")$trans
## t = 2.3814, df = 65, p-value = 0.02018
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.05298939 0.60376866
## sample estimates:
## mean of the differences 
##                0.328379
t.test(subset(mss_et_aov, trial_type=="similar_acc")$trans, 
       subset(mss_et_aov, trial_type=="dissimilar_acc")$trans, paired = TRUE)
## 
##  Paired t-test
## 
## data:  subset(mss_et_aov, trial_type == "similar_acc")$trans and subset(mss_et_aov, trial_type == "dissimilar_acc")$trans
## t = 2.868, df = 65, p-value = 0.005564
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0804042 0.4491894
## sample estimates:
## mean of the differences 
##               0.2647968

3.2.3 Individual differences

Do individual differences in RT differential (for similar vs. dissimilar) correlate with accuracy?

d_et_filt$acc <- as.numeric(as.character(d_et_filt$acc))

d_et_filt_id <- d_et_filt %>%
  group_by(ID) %>%
  mutate(trans_acc = mean(trans[acc == 1])) %>%
  mutate(trans_inacc = mean(trans[acc == 0])) %>%
  mutate(trans_sim = mean(trans[similar == "similar"])) %>%
  mutate(trans_dissim = mean(trans[similar == "dissimilar"])) %>%
  mutate(trans_acc_diff = trans_inacc - trans_acc) %>%
  mutate(trans_sim_diff = trans_sim - trans_dissim) %>%
  mutate(avg_acc = mean(acc))

Correlate differentials with accuracy.

##                trans_acc_diff trans_sim_diff
## trans_acc_diff                              
## trans_sim_diff         0.26*                
## avg_acc                 0.10          -0.05

3.3 Summary of Eyetracker Results

Accuracy was higher for dissimilar items.

Repeated-measures ANOVA shows that there is a difference between means for similar-accurate, dissimilar-accurate, and inaccurate.

T-test summary:

inaccurate - similar-accurate

inaccurate - dissimilar-accurate*

similar-accurate - dissimilar-accurate**

Individual differences in transitions differentials (i.e., transitions for similar - similar; trans for inacc - acc) were correlated with each other but not with accuracy. This is expected as accuracy and similarity are correlated.

4 Touchscreen task

Filter fast and slow RTs, missing data, unfamiliar trials.

top_bound <- mean(log(d_ts$RT)) + 3*sd(log(d_ts$RT))
bottom_bound <- mean(log(d_ts$RT)) - 3*sd(log(d_ts$RT))

d_filt_rt <- d_ts %>%
  filter(log(RT) < top_bound, 
         log(RT) > bottom_bound,
         !is.na(acc),
         familiarity != 0)

d_filt_rt_log <- d_filt_rt %>%
  mutate(RT = log(RT))

4.1 Descriptives

RTs for accurate and inaccurate

## group: 0
##   vars   n mean      sd median trimmed     mad min   max range skew
## 1    1 312 4459 3536.01   3308 3817.99 2257.26 698 22075 21377 2.15
##   kurtosis     se
## 1     5.77 200.19
## -------------------------------------------------------- 
## group: 1
##   vars    n    mean      sd median trimmed    mad min   max range skew
## 1    1 1071 3702.75 2404.54   3105 3296.17 1405.5 577 22183 21606 2.99
##   kurtosis    se
## 1    13.07 73.47

RTs for similar and dissimilar

## group: dissimilar
##   vars   n    mean      sd median trimmed    mad min   max range skew
## 1    1 691 3783.56 2732.25   3052 3274.82 1482.6 577 20608 20031 2.83
##   kurtosis     se
## 1    10.24 103.94
## -------------------------------------------------------- 
## group: similar
##   vars   n    mean      sd median trimmed    mad min   max range skew
## 1    1 692 3963.03 2703.68 3229.5  3502.1 1674.6 718 22183 21465 2.79
##   kurtosis     se
## 1    11.65 102.78

Accuracy for similar and dissimilar

## group: dissimilar
##   vars   n mean   sd median trimmed mad min max range  skew kurtosis   se
## 1    1 691 0.84 0.36      1    0.93   0   0   1     1 -1.87     1.51 0.01
## -------------------------------------------------------- 
## group: similar
##   vars   n mean   sd median trimmed mad min max range  skew kurtosis   se
## 1    1 692 0.71 0.46      1    0.76   0   0   1     1 -0.91    -1.18 0.02

Plot accuracy for touchscreen.

4.2 Analyses

We want to collapse across inaccurate trials because there are few of them. Is there an effect of similarity among dissimilar trials?

mss_ts <-  d_filt_rt %>%
  group_by(ID, acc, similar)%>%
  summarise(RT = mean(RT))

t.test(subset(mss_ts, acc==0 & similar == "dissimilar")$RT, 
       subset(mss_ts, acc==0 & similar == "similar")$RT)
## 
##  Welch Two Sample t-test
## 
## data:  subset(mss_ts, acc == 0 & similar == "dissimilar")$RT and subset(mss_ts, acc == 0 & similar == "similar")$RT
## t = -1.0556, df = 105.12, p-value = 0.2936
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1470.1487   448.6413
## sample estimates:
## mean of x mean of y 
##  4253.841  4764.595

No difference, makes sense to collapse across inaccurate trials.

4.2.1 Repeated-measures ANOVA

repeated-measures ANOVA with trial type (similar and accurate, dissimilar and accurate, and inaccurate) as the independent variable.

mss_ts_acc <-  d_filt_rt

mss_ts_acc$acc <- str_replace(mss_ts_acc$acc, "0", "inacc")
mss_ts_acc$acc <- str_replace(mss_ts_acc$acc, "1", "acc")

mss_ts_inacc <-  mss_ts_acc%>%
  group_by(ID, acc)%>%
  summarise(RT = mean(RT))%>%
  spread(acc, RT)%>%
  select(-acc)

mss_ts_acc <-  mss_ts_acc%>%
  group_by(ID, acc)%>%
  summarise(RT = mean(RT))%>%
  spread(acc, RT)

mss_ts_sim <-  d_filt_rt %>%
  group_by(ID, acc, similar)%>%
  summarise(RT = mean(RT))%>%
  filter(acc == 1)%>%
  spread(similar, RT)%>%
  mutate(similar_acc = similar)%>%
  mutate(dissimilar_acc = dissimilar)%>%
  select(-similar, -dissimilar, -acc)

mss_ts_aov <- mss_ts_inacc %>%
  left_join(mss_ts_sim)%>%
  filter(!is.na(similar_acc),
         !is.na(dissimilar_acc),
         !is.na(inacc))%>%
  gather("trial_type", "RT", inacc:dissimilar_acc)

TS.aov <- with(mss_ts_aov,
                   aov(RT ~ trial_type +
                       Error(ID /trial_type)))
summary(TS.aov)
## 
## Error: ID
##           Df    Sum Sq Mean Sq F value Pr(>F)
## Residuals 72 429285964 5962305               
## 
## Error: ID:trial_type
##             Df    Sum Sq  Mean Sq F value   Pr(>F)    
## trial_type   2  50080294 25040147   16.36 3.97e-07 ***
## Residuals  144 220461786  1530985                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.2.2 T-tests

t.test(subset(mss_ts_aov, trial_type=="inacc")$RT, 
       subset(mss_ts_aov, trial_type=="similar_acc")$RT, paired = TRUE)
## 
##  Paired t-test
## 
## data:  subset(mss_ts_aov, trial_type == "inacc")$RT and subset(mss_ts_aov, trial_type == "similar_acc")$RT
## t = 4.9725, df = 72, p-value = 4.332e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   643.2736 1504.1750
## sample estimates:
## mean of the differences 
##                1073.724
t.test(subset(mss_ts_aov, trial_type=="inacc")$RT, 
       subset(mss_ts_aov, trial_type=="dissimilar_acc")$RT, paired = TRUE)
## 
##  Paired t-test
## 
## data:  subset(mss_ts_aov, trial_type == "inacc")$RT and subset(mss_ts_aov, trial_type == "dissimilar_acc")$RT
## t = 3.976, df = 72, p-value = 0.0001649
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   469.8532 1414.7576
## sample estimates:
## mean of the differences 
##                942.3054
t.test(subset(mss_ts_aov, trial_type=="similar_acc")$RT, 
       subset(mss_ts_aov, trial_type=="dissimilar_acc")$RT, paired = TRUE)
## 
##  Paired t-test
## 
## data:  subset(mss_ts_aov, trial_type == "similar_acc")$RT and subset(mss_ts_aov, trial_type == "dissimilar_acc")$RT
## t = -0.86582, df = 72, p-value = 0.3895
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -433.9976  171.1598
## sample estimates:
## mean of the differences 
##               -131.4189

4.2.3 Individual differences

Do individual differences in RT differential (for similar vs. dissimilar) correlate with accuracy?

d_filt_rt$acc <- as.numeric(as.character(d_filt_rt$acc))

d_filt_rt_id <- d_filt_rt %>%
  group_by(ID) %>%
  mutate(rt_acc = mean(RT[acc == 1])) %>%
  mutate(rt_inacc = mean(RT[acc == 0])) %>%
  mutate(rt_sim = mean(RT[similar == "similar"])) %>%
  mutate(rt_dissim = mean(RT[similar == "dissimilar"])) %>%
  mutate(rt_acc_diff = rt_inacc - rt_acc) %>%
  mutate(rt_sim_diff = rt_sim - rt_dissim) %>%
  mutate(avg_acc = mean(acc))

Correlate differentials with accuracy.

##             rt_acc_diff rt_sim_diff
## rt_acc_diff                        
## rt_sim_diff      0.29*             
## avg_acc          -0.06        0.15

Individual differences in RT differentials (i.e., RT for similar - similar; RT for inacc - acc) were correlated with each other but not with accuracy. This is expected as accuracy and similarity are correlated.

4.3 Summary of Touchscreen Results

Accuracy was higher for dissimilar items.

Repeated-measures ANOVA shows that there is a difference between means for similar-accurate, dissimilar-accurate, and inaccurate.

T-test summary:

inaccurate - similar-accurate**

inaccurate - dissimilar-accurate**

similar-accurate - dissimilar-accurate

Individual differences in RT differentials (i.e., RT for similar - similar; RT for inacc - acc) were correlated with each other but not with accuracy. This is expected as accuracy and similarity are correlated.