Data analysis of toddler uncertainty monitoring study.
Preliminaries.
Read in trial info, demographics, and word familiarity.
demo <- read.csv("demographics.csv")
trial_info <- read.csv("conditions.csv", header = TRUE)
trial_info$trial <- as.character(trial_info$trial)
word_fam_key <- read.csv("word_fam_key.csv")
word_fam_key$trial <- as.character(word_fam_key$trial)
word_fam <- read.csv("word_familiarity.csv") %>%
gather("target", "familiarity", 2:41 )
response_key <- read.csv("response_key.csv")
response_key$trial <- as.character(response_key$trial)
Read in TS data file.
d.raw_ts <- read.table("AllSubs_TS.txt", header = TRUE)
Make TS data tidy.
d_acc_ts<- d.raw_ts %>%
select(Subject_ID, acc1, acc2, acc3, acc4, acc5, acc6, acc7, acc8, acc9, acc10, acc11, acc12, acc13, acc14, acc15, acc16, acc17, acc18, acc19, acc20)%>%
gather(trial, acc, acc1:acc20)
d_rt <- d.raw_ts %>%
select(Subject_ID, RT1, RT2, RT3, RT4, RT5, RT6, RT7, RT8, RT9, RT10, RT11, RT12, RT13, RT14, RT15, RT16, RT17, RT18, RT19, RT20) %>%
gather(trial, RT, RT1:RT20)
d_acc_ts$trial <-str_replace(d_acc_ts$trial, "acc","")
d_rt$trial <- str_replace(d_rt$trial, "RT","")
d_ts <- d_acc_ts %>%
left_join(d_rt) %>%
mutate(ID = Subject_ID) %>%
select(ID, RT, acc, trial)
d_ts$acc[d_ts$acc==999] <- NA
Join trial info and demographics with raw TS data.
d_ts <- left_join(d_ts, trial_info) %>%
left_join(demo) %>%
select(-participation_date, -birthdate) %>%
left_join(word_fam_key) %>%
left_join(word_fam) %>%
left_join(response_key) %>%
filter(drop != 1)
d_ts <- d_ts %>%
mutate(similar = ifelse(similarity == 1, "similar",
"dissimilar")) %>%
select(-similarity)
d_ts$ID <- as.factor(as.character(d_ts$ID))
There are 73 touchscreen participants.
Read in ET data.
d.raw_et <- read.csv("AllSubs_ET.csv", header = TRUE)
d.raw_sw <- read.csv("perceptual_transitions.csv", header = TRUE)
d.raw_sw$ID <- d.raw_sw$Subject
Make data ET tidy.
d.raw_sw <- d.raw_sw %>%
gather(trial, trans, Trial1:Trial20) %>%
select(-Subject)
d.raw_sw$ID <- as.factor(as.character(d.raw_sw$ID))
d.raw_et$ID <- as.factor(as.character(d.raw_et$ID))
d.raw_sw$trial <- str_replace(d.raw_sw$trial, "Trial","")
d.raw_et$trial <- as.character(d.raw_et$trial)
d.raw_sw$ID <- as.character(d.raw_sw$ID)
d.raw_et$ID <- as.character(d.raw_et$ID)
demo$ID <- as.character(demo$ID)
word_fam$ID <- as.character(word_fam$ID)
Join ET transitions and accuracy data and drop excluded participants (including pilots).
d.raw_et <- d.raw_et %>%
select(ID, trial, acc)
d_et <- d.raw_et %>%
inner_join(d.raw_sw) %>%
mutate(acc_et = acc) %>%
left_join(demo) %>%
left_join(trial_info)%>%
select(-participation_date, -birthdate) %>%
left_join(word_fam_key) %>%
left_join(word_fam) %>%
left_join(response_key) %>%
filter(drop != 1) %>%
select(-acc)
#recode similarity from 1s and 0s to string.
d_et <- d_et %>%
mutate(similar = ifelse(similarity == 1, "similar",
"dissimilar")) %>%
select(-similarity)
d_et$ID <- as.factor(as.character(d_et$ID))
There are 71 eyetracker participants.
Merge TS and ET data and drop excluded participants (including pilots).
d <- d_ts %>%
full_join(d_et) %>%
filter(drop != 1)
d$ID <- as.factor(as.character(d$ID))
There are 80 participants with either eyetracker or touchscreen data.
Summaries of income, race, and gender
## <15,000 15,000-25,000 25,000-40,000 40,000-60,000
## 3 1 4 7 16
## 60,000-90,000 >90,000
## 16 33
##
## 12
## African American
## 1
## African American/Caucasian
## 3
## American Indian or Alaskan / Caucasian
## 1
## Asian
## 1
## Asian & Caucasian
## 2
## Asian and Caucasian
## 1
## Asian, Caucasian
## 1
## Asian/Caucasian
## 1
## Caucasian
## 46
## Caucasian
## 3
## Native Hawaiian & Caucasian
## 3
## Pacific Islander
## 0
## SKIPPED
## 0
## NA's
## 5
## Female Male
## 1 38 41
Age in months
## vars n mean sd median trimmed mad min max range skew kurtosis
## 1 1 80 28.84 1.79 28.62 28.73 1.96 25.68 34.16 8.48 0.48 -0.34
## se
## 1 0.2
Filter missing data and unfamiliar trials.
d_et_filt <- d_et %>%
mutate(acc = acc_et)%>%
select(-acc_et)%>%
filter(!is.na(trans),
!is.na(acc),
familiarity != 0)
Transitions for accurate and inaccurate
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 253 1.32 1.48 1 1.08 1.48 0 9 9 1.7 4.1 0.09
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 1086 1.15 1.23 1 0.96 1.48 0 9 9 1.61 4.5 0.04
Transitions for similar and dissimilar
## group: dissimilar
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 668 1.1 1.16 1 0.96 1.48 0 9 9 1.67 5.9 0.04
## --------------------------------------------------------
## group: similar
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 671 1.26 1.39 1 1.04 1.48 0 9 9 1.61 3.61 0.05
Accuracy for similar and dissimilar
## group: dissimilar
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 668 0.86 0.34 1 0.95 0 0 1 1 -2.12 2.48 0.01
## --------------------------------------------------------
## group: similar
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 671 0.76 0.43 1 0.82 0 0 1 1 -1.21 -0.55 0.02
Plot accuracy.
Plot distribution (density) of switches by accuracy and similarity.
We want to collapse across inaccurate trials because there are few of them. Is there an effect of similarity among dissimilar trials?
mss_et <- d_et_filt %>%
group_by(ID, acc, similar)%>%
summarise(trans = mean(trans))
t.test(subset(mss_et, acc==0 & similar == "dissimilar")$trans,
subset(mss_et, acc==0 & similar == "similar")$trans)
##
## Welch Two Sample t-test
##
## data: subset(mss_et, acc == 0 & similar == "dissimilar")$trans and subset(mss_et, acc == 0 & similar == "similar")$trans
## t = -0.28357, df = 108.99, p-value = 0.7773
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.5517763 0.4136482
## sample estimates:
## mean of x mean of y
## 1.429592 1.498656
No difference, makes sense to collapse across inaccurate trials.
repeated-measures ANOVA with trial type (similar and accurate, dissimilar and accurate, and inaccurate) as the independent variable.
mss_et_acc <- d_et_filt
mss_et_acc$acc <- str_replace(mss_et_acc$acc, "0", "inacc")
mss_et_acc$acc <- str_replace(mss_et_acc$acc, "1", "acc")
mss_et_inacc <- mss_et_acc%>%
group_by(ID, acc)%>%
summarise(trans = mean(trans))%>%
spread(acc, trans)%>%
select(-acc)
mss_et_acc <- mss_et_acc%>%
group_by(ID, acc)%>%
summarise(trans = mean(trans))%>%
spread(acc, trans)
mss_et_sim <- d_et_filt %>%
group_by(ID, acc, similar)%>%
summarise(trans = mean(trans))%>%
filter(acc == 1)%>%
spread(similar, trans)%>%
mutate(similar_acc = similar)%>%
mutate(dissimilar_acc = dissimilar)%>%
select(-similar, -dissimilar, -acc)
mss_et_aov <- mss_et_inacc %>%
left_join(mss_et_sim)%>%
filter(!is.na(similar_acc),
!is.na(dissimilar_acc),
!is.na(inacc))%>%
gather("trial_type", "trans", inacc:dissimilar_acc)
ET.aov <- with(mss_et_aov,
aov(trans ~ trial_type +
Error(ID /trial_type)))
summary(ET.aov)
##
## Error: ID
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 65 79.64 1.225
##
## Error: ID:trial_type
## Df Sum Sq Mean Sq F value Pr(>F)
## trial_type 2 4.0 2.0019 3.635 0.0291 *
## Residuals 130 71.6 0.5508
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(subset(mss_et_aov, trial_type=="inacc")$trans,
subset(mss_et_aov, trial_type=="similar_acc")$trans, paired = TRUE)
##
## Paired t-test
##
## data: subset(mss_et_aov, trial_type == "inacc")$trans and subset(mss_et_aov, trial_type == "similar_acc")$trans
## t = 0.42357, df = 65, p-value = 0.6733
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2362104 0.3633749
## sample estimates:
## mean of the differences
## 0.06358225
t.test(subset(mss_et_aov, trial_type=="inacc")$trans,
subset(mss_et_aov, trial_type=="dissimilar_acc")$trans, paired = TRUE)
##
## Paired t-test
##
## data: subset(mss_et_aov, trial_type == "inacc")$trans and subset(mss_et_aov, trial_type == "dissimilar_acc")$trans
## t = 2.3814, df = 65, p-value = 0.02018
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.05298939 0.60376866
## sample estimates:
## mean of the differences
## 0.328379
t.test(subset(mss_et_aov, trial_type=="similar_acc")$trans,
subset(mss_et_aov, trial_type=="dissimilar_acc")$trans, paired = TRUE)
##
## Paired t-test
##
## data: subset(mss_et_aov, trial_type == "similar_acc")$trans and subset(mss_et_aov, trial_type == "dissimilar_acc")$trans
## t = 2.868, df = 65, p-value = 0.005564
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.0804042 0.4491894
## sample estimates:
## mean of the differences
## 0.2647968
Do individual differences in RT differential (for similar vs. dissimilar) correlate with accuracy?
d_et_filt$acc <- as.numeric(as.character(d_et_filt$acc))
d_et_filt_id <- d_et_filt %>%
group_by(ID) %>%
mutate(trans_acc = mean(trans[acc == 1])) %>%
mutate(trans_inacc = mean(trans[acc == 0])) %>%
mutate(trans_sim = mean(trans[similar == "similar"])) %>%
mutate(trans_dissim = mean(trans[similar == "dissimilar"])) %>%
mutate(trans_acc_diff = trans_inacc - trans_acc) %>%
mutate(trans_sim_diff = trans_sim - trans_dissim) %>%
mutate(avg_acc = mean(acc))
Correlate differentials with accuracy.
## trans_acc_diff trans_sim_diff
## trans_acc_diff
## trans_sim_diff 0.26*
## avg_acc 0.10 -0.05
Accuracy was higher for dissimilar items.
Repeated-measures ANOVA shows that there is a difference between means for similar-accurate, dissimilar-accurate, and inaccurate.
T-test summary:
inaccurate - similar-accurate
inaccurate - dissimilar-accurate*
similar-accurate - dissimilar-accurate**
Individual differences in transitions differentials (i.e., transitions for similar - similar; trans for inacc - acc) were correlated with each other but not with accuracy. This is expected as accuracy and similarity are correlated.
Filter fast and slow RTs, missing data, unfamiliar trials.
top_bound <- mean(log(d_ts$RT)) + 3*sd(log(d_ts$RT))
bottom_bound <- mean(log(d_ts$RT)) - 3*sd(log(d_ts$RT))
d_filt_rt <- d_ts %>%
filter(log(RT) < top_bound,
log(RT) > bottom_bound,
!is.na(acc),
familiarity != 0)
d_filt_rt_log <- d_filt_rt %>%
mutate(RT = log(RT))
RTs for accurate and inaccurate
## group: 0
## vars n mean sd median trimmed mad min max range skew
## 1 1 312 4459 3536.01 3308 3817.99 2257.26 698 22075 21377 2.15
## kurtosis se
## 1 5.77 200.19
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew
## 1 1 1071 3702.75 2404.54 3105 3296.17 1405.5 577 22183 21606 2.99
## kurtosis se
## 1 13.07 73.47
RTs for similar and dissimilar
## group: dissimilar
## vars n mean sd median trimmed mad min max range skew
## 1 1 691 3783.56 2732.25 3052 3274.82 1482.6 577 20608 20031 2.83
## kurtosis se
## 1 10.24 103.94
## --------------------------------------------------------
## group: similar
## vars n mean sd median trimmed mad min max range skew
## 1 1 692 3963.03 2703.68 3229.5 3502.1 1674.6 718 22183 21465 2.79
## kurtosis se
## 1 11.65 102.78
Accuracy for similar and dissimilar
## group: dissimilar
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 691 0.84 0.36 1 0.93 0 0 1 1 -1.87 1.51 0.01
## --------------------------------------------------------
## group: similar
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 692 0.71 0.46 1 0.76 0 0 1 1 -0.91 -1.18 0.02
Plot accuracy for touchscreen.
We want to collapse across inaccurate trials because there are few of them. Is there an effect of similarity among dissimilar trials?
mss_ts <- d_filt_rt %>%
group_by(ID, acc, similar)%>%
summarise(RT = mean(RT))
t.test(subset(mss_ts, acc==0 & similar == "dissimilar")$RT,
subset(mss_ts, acc==0 & similar == "similar")$RT)
##
## Welch Two Sample t-test
##
## data: subset(mss_ts, acc == 0 & similar == "dissimilar")$RT and subset(mss_ts, acc == 0 & similar == "similar")$RT
## t = -1.0556, df = 105.12, p-value = 0.2936
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1470.1487 448.6413
## sample estimates:
## mean of x mean of y
## 4253.841 4764.595
No difference, makes sense to collapse across inaccurate trials.
repeated-measures ANOVA with trial type (similar and accurate, dissimilar and accurate, and inaccurate) as the independent variable.
mss_ts_acc <- d_filt_rt
mss_ts_acc$acc <- str_replace(mss_ts_acc$acc, "0", "inacc")
mss_ts_acc$acc <- str_replace(mss_ts_acc$acc, "1", "acc")
mss_ts_inacc <- mss_ts_acc%>%
group_by(ID, acc)%>%
summarise(RT = mean(RT))%>%
spread(acc, RT)%>%
select(-acc)
mss_ts_acc <- mss_ts_acc%>%
group_by(ID, acc)%>%
summarise(RT = mean(RT))%>%
spread(acc, RT)
mss_ts_sim <- d_filt_rt %>%
group_by(ID, acc, similar)%>%
summarise(RT = mean(RT))%>%
filter(acc == 1)%>%
spread(similar, RT)%>%
mutate(similar_acc = similar)%>%
mutate(dissimilar_acc = dissimilar)%>%
select(-similar, -dissimilar, -acc)
mss_ts_aov <- mss_ts_inacc %>%
left_join(mss_ts_sim)%>%
filter(!is.na(similar_acc),
!is.na(dissimilar_acc),
!is.na(inacc))%>%
gather("trial_type", "RT", inacc:dissimilar_acc)
TS.aov <- with(mss_ts_aov,
aov(RT ~ trial_type +
Error(ID /trial_type)))
summary(TS.aov)
##
## Error: ID
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 72 429285964 5962305
##
## Error: ID:trial_type
## Df Sum Sq Mean Sq F value Pr(>F)
## trial_type 2 50080294 25040147 16.36 3.97e-07 ***
## Residuals 144 220461786 1530985
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(subset(mss_ts_aov, trial_type=="inacc")$RT,
subset(mss_ts_aov, trial_type=="similar_acc")$RT, paired = TRUE)
##
## Paired t-test
##
## data: subset(mss_ts_aov, trial_type == "inacc")$RT and subset(mss_ts_aov, trial_type == "similar_acc")$RT
## t = 4.9725, df = 72, p-value = 4.332e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 643.2736 1504.1750
## sample estimates:
## mean of the differences
## 1073.724
t.test(subset(mss_ts_aov, trial_type=="inacc")$RT,
subset(mss_ts_aov, trial_type=="dissimilar_acc")$RT, paired = TRUE)
##
## Paired t-test
##
## data: subset(mss_ts_aov, trial_type == "inacc")$RT and subset(mss_ts_aov, trial_type == "dissimilar_acc")$RT
## t = 3.976, df = 72, p-value = 0.0001649
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 469.8532 1414.7576
## sample estimates:
## mean of the differences
## 942.3054
t.test(subset(mss_ts_aov, trial_type=="similar_acc")$RT,
subset(mss_ts_aov, trial_type=="dissimilar_acc")$RT, paired = TRUE)
##
## Paired t-test
##
## data: subset(mss_ts_aov, trial_type == "similar_acc")$RT and subset(mss_ts_aov, trial_type == "dissimilar_acc")$RT
## t = -0.86582, df = 72, p-value = 0.3895
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -433.9976 171.1598
## sample estimates:
## mean of the differences
## -131.4189
Do individual differences in RT differential (for similar vs. dissimilar) correlate with accuracy?
d_filt_rt$acc <- as.numeric(as.character(d_filt_rt$acc))
d_filt_rt_id <- d_filt_rt %>%
group_by(ID) %>%
mutate(rt_acc = mean(RT[acc == 1])) %>%
mutate(rt_inacc = mean(RT[acc == 0])) %>%
mutate(rt_sim = mean(RT[similar == "similar"])) %>%
mutate(rt_dissim = mean(RT[similar == "dissimilar"])) %>%
mutate(rt_acc_diff = rt_inacc - rt_acc) %>%
mutate(rt_sim_diff = rt_sim - rt_dissim) %>%
mutate(avg_acc = mean(acc))
Correlate differentials with accuracy.
## rt_acc_diff rt_sim_diff
## rt_acc_diff
## rt_sim_diff 0.29*
## avg_acc -0.06 0.15
Individual differences in RT differentials (i.e., RT for similar - similar; RT for inacc - acc) were correlated with each other but not with accuracy. This is expected as accuracy and similarity are correlated.
Accuracy was higher for dissimilar items.
Repeated-measures ANOVA shows that there is a difference between means for similar-accurate, dissimilar-accurate, and inaccurate.
T-test summary:
inaccurate - similar-accurate**
inaccurate - dissimilar-accurate**
similar-accurate - dissimilar-accurate
Individual differences in RT differentials (i.e., RT for similar - similar; RT for inacc - acc) were correlated with each other but not with accuracy. This is expected as accuracy and similarity are correlated.