There are 4 different types of center fixations in this dataset:
##
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
##
## scale
##
## Attaching package: 'kmr'
## The following object is masked from 'package:langcog':
##
## sem
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:GGally':
##
## nasa
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
##
## extract
Eye movement data
df <- read.csv("../data/3_final_merged_data/speed-acc-ss-df.csv",
check.names=F, stringsAsFactors=F)
df %<>% mutate(stimuli = ifelse(stimuli == "V1" | stimuli == "V2", "ASL", stimuli),
stimuli = factor(stimuli, levels = c("ASL", "Face", "Trio", "Bull")))
How many participants in each stimuli set and language modality? Age_code_m is a hacky way to track the different age groups that saw the Trio stimuli.
df %>%
select(Sub.Num, language_modality, age_code, stimuli) %>%
unique() %>%
group_by(language_modality, age_code, stimuli) %>%
summarise(count = n()) %>%
knitr::kable()
| language_modality | age_code | stimuli | count |
|---|---|---|---|
| ASL | adult | ASL | 16 |
| ASL | child | ASL | 27 |
| English | adult | Bull | 6 |
| English | child | Face | 24 |
| English | child | Trio | 40 |
| English | child | Bull | 16 |
What are the age distributions across language modalities?
df %>%
filter(age_code == "child") %>%
select(Sub.Num, language_modality, age_code, stimuli, age_years) %>%
unique() %>%
ggplot(aes(age_years, fill = language_modality)) +
geom_histogram() +
scale_fill_solarized() +
facet_grid(.~language_modality) +
guides(fill = F)
ASL group is a spread of ages; whereas the spoken language kids are tightly clustered around 2 and 3 years of age.
First we need to get proportion correct for each participant in each age group and language modality.
ss_prop <- df %>%
group_by(Sub.Num, age_code, Months, language_modality, stimuli) %>%
filter(trial_type != "no_shift") %>%
summarise(mean_correct = mean(correct))
Plot.
ms <- ss_prop %>%
filter(age_code == "child") %>%
group_by(language_modality, age_code, stimuli) %>%
multi_boot_standard(column = "mean_correct")
ggplot(aes(x = stimuli, y = mean, fill = language_modality), data = ms) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), position = position_dodge(width = 0.9)) +
geom_hline(yintercept = 0.5, lty = "dashed") +
scale_fill_solarized()
When we plot the average proprotion of correct shifting, we see that the ASL group was the most accurate, followed by Face, and then Trio and Bull.
We can model proportion correct as a function of stimuli set.
m1_lm <- lm(mean_correct ~ stimuli, data = filter(ss_prop, age_code == "child"))
knitr::kable(summary(m1_lm)$coef, digits = 3)
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 0.712 | 0.029 | 24.498 | 0.000 |
| stimuliFace | -0.131 | 0.042 | -3.093 | 0.003 |
| stimuliTrio | -0.205 | 0.038 | -5.452 | 0.000 |
| stimuliBull | -0.224 | 0.048 | -4.709 | 0.000 |
All groups were worse than the ASL kids.
TODO: Model 2 tests specific contrasts.
Get medians and CIs for correct/inccorect shifts for each stimuli set and language modality
ms_rt <- df %>%
filter(age_code == "child", RT <= 2000) %>%
group_by(age_code, language_modality, stimuli, correct) %>%
multi_boot_standard(column = "RT", na.rm = T, empirical_function = "median")
Plot the summary measures.
ggplot(aes(x = as.factor(correct), y = median, fill = language_modality), data = ms_rt) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), position = position_dodge(width = 0.9)) +
facet_grid(age_code~stimuli) +
scale_fill_solarized()
We see some evidence of speed accuracy tradeoffs in the ASL, Face, and Bull conditions, but not in the Trio condition. Why do medians look so much better than means?
Note the RT cutoff for the long tail. When you cutoff more of the tail, the difference between median correct and inccorrect shifting increases. It’s interesting to think about whether RTs in these long tails are meaningful.
med_all <- df %>%
filter(RT <= 2000) %>% ## Here you can play with cutoffs to see how the RT distrbutions change shape
group_by(age_code, language_modality, stimuli, correct) %>%
mutate(log_RT = log(RT)) %>%
multi_boot_standard(column = "RT", na.rm = T, empirical_function = "median")
ggplot(aes(x = RT, fill = as.factor(correct)), data = filter(df, age_code == "child", RT <= 2000)) +
geom_density(alpha = 0.5) +
facet_grid(stimuli~age_code) +
geom_vline(aes(xintercept = median, color = as.factor(correct)), size = 1,
data = filter(med_all, age_code == "child")) +
labs(fill = "Correct") +
guides(color = F)
Again we see evidence of a speed-acc tradeoff for ASL, Face, and maybe Bull. But the Trio distributions are right on top of each other.
There were data from both 26 and 36 month olds on the Trio stimuli set. Here we disaggregate and plot the RT dists separately.
med_trio <- df %>%
filter(stimuli == "Trio") %>%
group_by(experiment, language_modality, stimuli, correct) %>%
multi_boot_standard(column = "RT", na.rm = T, empirical_function = "median")
ggplot(aes(x = RT, fill = as.factor(correct)),
data = filter(df, age_code == "child", stimuli == "Trio")) +
geom_density(alpha = 0.5) +
facet_grid(experiment~.) +
geom_vline(aes(xintercept = median, color = as.factor(correct)), size = 1,
data = filter(med_trio, stimuli == "Trio")) +
labs(fill = "Correct") +
guides(color = F)
Kids aren’t getting better at this between 2 and 3?
We can compare the adult data for ASL and Bull.
ms_adults <- ss_prop %>%
filter(age_code == "adult") %>%
group_by(language_modality, stimuli) %>%
multi_boot_standard(column = "mean_correct")
ggplot(aes(x = stimuli, y = mean, fill = language_modality), data = ms_adults) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), position = position_dodge(width = 0.9)) +
geom_hline(yintercept = 0.5, lty = "dashed") +
scale_fill_solarized()
Plot distributions of correct and incorrect RTs
med_adults <- df %>%
filter(RT <= 2000) %>% ## Here you can play with cutoffs to see how the RT distrbutions change shape
filter(age_code == "adult") %>%
group_by(experiment, language_modality, stimuli, correct) %>%
multi_boot_standard(column = "RT", na.rm = T, empirical_function = "median")
ggplot(aes(x = RT, fill = as.factor(correct)),
data = filter(df, age_code == "adult", RT <= 2000)) +
geom_density(alpha = 0.5) +
facet_grid(experiment~.) +
geom_vline(aes(xintercept = median, color = as.factor(correct)), size = 1,
data = med_adults) +
labs(fill = "Correct") +
guides(color = F)
Adult data makes sense. For Bull, we see that incorrect shifts tend to occur sooner. We can also see that there are not very many incorrect shifts for the ASL participants, so hard to see any evidence of speed-accuracy tradeoffs when > 90% accurate.
Best way to model this?
m1 <- lme4::glmer(correct ~ log2(RT) * stimuli + (1|Sub.Num),
data = filter(df, age_code == "child", RT <= 2000),
family = "binomial",
nAGQ=0)
knitr::kable(summary(m1)$coef, digits = 3)
| Estimate | Std. Error | z value | Pr(>|z|) | |
|---|---|---|---|---|
| (Intercept) | -4.908 | 1.555 | -3.156 | 0.002 |
| log2(RT) | 0.603 | 0.157 | 3.836 | 0.000 |
| stimuliFace | 1.517 | 1.827 | 0.830 | 0.406 |
| stimuliTrio | 4.159 | 1.645 | 2.527 | 0.011 |
| stimuliBull | 4.005 | 1.769 | 2.264 | 0.024 |
| log2(RT):stimuliFace | -0.195 | 0.188 | -1.042 | 0.297 |
| log2(RT):stimuliTrio | -0.520 | 0.170 | -3.065 | 0.002 |
| log2(RT):stimuliBull | -0.498 | 0.187 | -2.659 | 0.008 |
RT does predict Accuracy. Longer RTs mean you are more likely to be correct. Depending on the long tail cutoff that you use of the RT distributions, you see different interactions. At cutoff of 3000ms, the interaction between RT and Trio is marginal. At an earlier cutoff of 2500ms, the interaction is robust. At a cutoff of 2000ms, the two-way interactions between RT and both Trio and Bull stimuli are significant.
Workflow taken from Nordmeyer et al. (2016). The goal is to estimate parameters separately for each participant and then we aggregate across participants to get means & confidence intervals on the parameters.
The parameters of the drift drift diffusion model are:
# get the data we care about and format for Rwiener functions
# columns need to be named "q" for RT and "resp" for response
d <- df %>%
filter(age_code == "child", trial_type != "no_shift", is.na(RT_sec) == F, RT_sec > .1) %>%
select(Sub.Num, stimuli, RT_sec, correct) %>%
mutate(resp = factor(correct),
resp = plyr::revalue(resp, c("0" = "lower", "1" = "upper")),
resp = relevel(resp, "upper")) %>%
rename(q = RT_sec)
# weiner plot showing distribution of correct and incorrect responses
# wiener_plot(select(d, q, resp))
sub.pars <- data.frame(Separation = numeric(),
Non.Decision = numeric(),
Bias = numeric(),
Drift = numeric(),
Condition = character(),
Sub.Num = character(),
stringsAsFactors = F)
#because RWiener is finicky:
d$resp <- as.character(d$resp)
conditions <- unique(as.character(d$stimuli))
subs <- unique(as.character(d$Sub.Num))
for (j in 1:length(subs)) {
sid <- as.character(subs[j])
dat <- as.data.frame(filter(d, Sub.Num == sid))
condition_type <- unique(as.character(dat$stimuli))
# fit ddm for each participant
opt <- optim(c(1, .1, .1, 1), wiener_deviance,
dat=select(dat, c(q, resp)), method="Nelder-Mead")
pars <- c(opt$par, condition_type, sid)
sub.pars[j,] <- pars
}
This plot shows the mean parameter values & 95% C.I.s for each stimuli type
sub.pars$Separation <- as.numeric(sub.pars$Separation)
sub.pars$Non.Decision <- as.numeric(sub.pars$Non.Decision)
sub.pars$Bias <- as.numeric(sub.pars$Bias)
sub.pars$Drift <- as.numeric(sub.pars$Drift)
#### Why remove extreme param values?
sub.pars <- sub.pars %>%
group_by(Condition) %>%
filter(Separation < mean(Separation) + 3 * sd(Separation),
Separation > mean(Separation) - 3 * sd(Separation)) %>%
filter(Non.Decision < mean(Non.Decision) + 3 * sd(Non.Decision),
Non.Decision > mean(Non.Decision) - 3 * sd(Non.Decision)) %>%
filter(Bias < mean(Bias) + 3 * sd(Bias),
Bias > mean(Bias) - 3 * sd(Bias)) %>%
filter(Drift < mean(Drift) + 3 * sd(Drift),
Drift > mean(Drift) - 3 * sd(Drift)) %>%
ungroup() %>%
na.omit()
sub.pars %<>% gather(Param, Value, Separation:Drift)
Plot distributions of parameters across conditions
ggplot(aes(x = Value, fill = Condition), data = filter(sub.pars, Param %in% c("Drift", "Separation"))) +
geom_density(alpha = 0.7) +
facet_grid(.~Param, scales = "free") +
scale_fill_solarized()
Get means and CIs for parameter values and plot
sub.pars.ms <- sub.pars %>%
group_by(Condition, Param) %>%
multi_boot_standard(column = "Value", empirical_function = "mean")
ggplot(aes(x = Condition, y = mean, fill = Condition), data = sub.pars.ms) +
geom_bar(stat = "identity") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper)) +
facet_grid(.~Param) +
scale_fill_solarized()
We can also take these parameters and visualize the actual diffusion process for each condition:
Pink = control trials and green = target trials (I removed the legend to make more space)
Can we predict parameter values based on condition: ASL, Bull, Face, Trio