EXP1_DATA_PATH <- here("data/1_exp_data.csv")
exp1 <- read_csv(EXP1_DATA_PATH) %>%
select(sub_id, age_months, gender, english,
exclude2, prop_correct_vocab, trial_type, correct,
start_time, end_time, resp_start_time, object1)
# filter to kids in target age range and completed all trials
NUM_TRIALS <- 19
good_counts <- exp1 %>%
count(sub_id) %>%
filter(n == NUM_TRIALS)
exp1_complete <- exp1 %>%
filter(age_months >= 24 & age_months <= 48,
sub_id %in% good_counts$sub_id) %>%
mutate(age_bin = as.factor(case_when(age_months >= 36 ~ "3-yo",
TRUE ~ "2-yo")))
total_sample_size_by_age<- exp1_complete %>%
distinct(sub_id, .keep_all = T) %>%
filter(sub_id %in% good_counts$sub_id) %>%
count(age_bin)
# prop_correct C-NF > .5
good_controls <- exp1_complete %>%
filter(trial_type == "C-NF") %>%
group_by(sub_id) %>%
summarize(prop_correct = sum(correct) / n()) %>%
filter(prop_correct >= .5)
# english input >= 75
good_language <- exp1_complete %>%
distinct(sub_id, .keep_all = T) %>%
filter(english >= 75)
# final sample
final_sub_ids <- list(good_controls$sub_id,
good_language$sub_id) %>%
accumulate(intersect) %>%
last()
final_sample <- exp1_complete %>%
filter(sub_id %in% final_sub_ids)
demographics <- final_sample %>%
distinct(sub_id, .keep_all = T) %>%
select(sub_id, age_months, prop_correct_vocab) %>%
mutate(age_group = cut(age_months,
breaks = c(24, 30, 36, 42, 48),
include.lowest = T),
vocab_group = ntile(prop_correct_vocab, 4))
There are four trials of each of the four trial types (C-FN, FN, C-NN, NN). This is a little silly because the “halves” only have two trials each.
mss_half <- final_sample %>%
group_by(sub_id, trial_type) %>%
arrange(resp_start_time) %>%
mutate(trial_type_num = 1:n()) %>%
filter(trial_type != "FF") %>% # there are only 3 FFs
mutate(half = case_when(trial_type_num %in% c(1,3) ~ "odd",
trial_type_num %in% c(2,4) ~ "even")) %>%
group_by(sub_id, trial_type, half) %>%
summarize(prop_correct = mean(correct)) %>%
spread(half, prop_correct) %>%
left_join(demographics) %>%
mutate(age_group = cut(age_months,
breaks = c(24, 30, 36, 42, 48),
include.lowest = T))
ggplot(mss_half, aes(x = odd, y = even)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~trial_type)
mss_half %>%
group_by(trial_type) %>%
nest() %>%
mutate(temp = map(data, ~cor.test(.x$odd,
.x$even) %>% tidy())) %>%
select(-data) %>%
unnest() %>%
kable()
| trial_type | estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|
| C-NF | 0.2044752 | 2.575352 | 0.0109675 | 152 | 0.0478624 | 0.3512756 | Pearson’s product-moment correlation | two.sided |
| C-NN | 0.2998126 | 3.874576 | 0.0001584 | 152 | 0.1487032 | 0.4372400 | Pearson’s product-moment correlation | two.sided |
| NF | 0.2349277 | 2.979778 | 0.0033594 | 152 | 0.0797294 | 0.3790059 | Pearson’s product-moment correlation | two.sided |
| NN | 0.3273565 | 4.271265 | 0.0000341 | 152 | 0.1784342 | 0.4616170 | Pearson’s product-moment correlation | two.sided |
ggplot(mss_half, aes(x = odd, y = even, color = age_group )) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~trial_type)
mss_half_age_corr <- mss_half %>%
group_by(trial_type, age_group) %>%
nest() %>%
mutate(temp = map(data, ~cor.test(.x$odd,
.x$even) %>% tidy())) %>%
select(-data) %>%
unnest() %>%
arrange(trial_type)
ggplot(mss_half_age_corr, aes(x = age_group, y = estimate, color = age_group)) +
ylab("split half correlation") +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
facet_wrap(~trial_type) +
geom_hline(aes(yintercept = 0), linetype = 2)
Collapsing ME and control trials so there are more trials (8 per type).
mss_half <- final_sample %>%
filter(trial_type != "FF") %>%
mutate(trial_type2 = case_when(trial_type %in% c("NF", "NN") ~ "ME",
trial_type %in% c("C-NF", "C-NN") ~ "control")) %>%
group_by(sub_id, trial_type2) %>%
arrange(resp_start_time) %>%
mutate(trial_type_num = 1:n()) %>%
mutate(half = case_when(trial_type_num %in% c(1,3, 5, 7) ~ "odd",
trial_type_num %in% c(2,4,6,8) ~ "even")) %>%
group_by(sub_id, trial_type2, half) %>%
summarize(prop_correct = mean(correct)) %>%
spread(half, prop_correct) %>%
left_join(demographics)
ggplot(mss_half, aes(x = odd, y = even)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~trial_type2)
mss_half %>%
group_by(trial_type2) %>%
nest() %>%
mutate(temp = map(data, ~cor.test(.x$odd,
.x$even) %>% tidy())) %>%
select(-data) %>%
unnest() %>%
kable()
| trial_type2 | estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|---|
| control | 0.4045906 | 5.454499 | 2e-07 | 152 | 0.2632771 | 0.5289064 | Pearson’s product-moment correlation | two.sided |
| ME | 0.4557494 | 6.312554 | 0e+00 | 152 | 0.3207057 | 0.5726337 | Pearson’s product-moment correlation | two.sided |
ggplot(mss_half, aes(x = odd, y = even, color = age_group )) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~trial_type2)
mss_half_age_corr <- mss_half %>%
group_by(trial_type2, age_group) %>%
nest() %>%
mutate(temp = map(data, ~cor.test(.x$odd,
.x$even) %>% tidy())) %>%
select(-data) %>%
unnest() %>%
arrange(trial_type2)
ggplot(mss_half_age_corr, aes(x = age_group, y = estimate, color = age_group)) +
ylab("split half correlation") +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
facet_wrap(~trial_type2) +
geom_hline(aes(yintercept = 0), linetype = 2)
Here I’m calculating reliability using Cronbach alpha with the control and ME trials separately.
final_sample_wide <- final_sample %>%
filter(trial_type != "FF") %>%
mutate(trial_type2 = case_when(trial_type %in% c("NF", "NN") ~ "ME",
trial_type %in% c("C-NF", "C-NN") ~ "control")) %>%
group_by(sub_id, trial_type2) %>%
arrange(resp_start_time) %>%
mutate(trial_type_num = 1:n()) %>%
select(sub_id, trial_type2, trial_type_num, correct) %>%
spread(trial_type_num, correct) %>%
left_join(demographics)
get.frame <- function(x) {
x <- as.data.frame(x)
non.null.cols <- apply(x, 2, function(x) {!all(is.na(x))})
x <- x[, non.null.cols]
return(x)
}
cronbach_values_age <- final_sample_wide %>%
group_by(age_group, trial_type2, add=FALSE) %>%
summarise(n = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
`5`, `6`, `7`, `8`)))$sample.size,
chronbach_alpha = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
`5`, `6`, `7`, `8`)))$alpha)
kable(cronbach_values_age)
| age_group | trial_type2 | n | chronbach_alpha |
|---|---|---|---|
| [24,30] | control | 34 | 0.3999217 |
| [24,30] | ME | 34 | 0.5379610 |
| (30,36] | control | 35 | 0.6407373 |
| (30,36] | ME | 35 | 0.5740142 |
| (36,42] | control | 44 | 0.3567399 |
| (36,42] | ME | 44 | 0.5629384 |
| (42,48] | control | 41 | 0.6477973 |
| (42,48] | ME | 41 | 0.6038767 |
ggplot(cronbach_values_age, aes(x = age_group, y = chronbach_alpha, color = trial_type2, group = trial_type2) ) +
geom_point() +
geom_line()
cronbach_values_vocab <- final_sample_wide %>%
group_by(vocab_group, trial_type2, add=FALSE) %>%
summarise(n = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
`5`, `6`, `7`, `8`)))$sample.size,
chronbach_alpha = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
`5`, `6`, `7`, `8`)))$alpha)
kable(cronbach_values_vocab)
| vocab_group | trial_type2 | n | chronbach_alpha |
|---|---|---|---|
| 1 | control | 39 | 0.2680976 |
| 1 | ME | 39 | 0.5156850 |
| 2 | control | 38 | 0.6813402 |
| 2 | ME | 38 | 0.4763237 |
| 3 | control | 39 | 0.4694034 |
| 3 | ME | 39 | 0.3991798 |
| 4 | control | 38 | 0.4426617 |
| 4 | ME | 38 | 0.5866107 |
ggplot(cronbach_values_vocab, aes(x = vocab_group, y = chronbach_alpha, color = trial_type2, group = trial_type2) ) +
geom_point() +
geom_line()
(higher vocab group = more correct)