EXP1_DATA_PATH <- here("data/1_exp_data.csv")
exp1 <- read_csv(EXP1_DATA_PATH) %>%
  select(sub_id, age_months, gender, english, 
         exclude2, prop_correct_vocab, trial_type, correct, 
         start_time, end_time, resp_start_time, object1)  
# filter to kids in target age range and completed all trials
NUM_TRIALS <- 19
good_counts <- exp1 %>%
  count(sub_id) %>%
  filter(n == NUM_TRIALS) 

exp1_complete <- exp1 %>%
  filter(age_months >= 24 & age_months <= 48,
         sub_id %in% good_counts$sub_id) %>%
  mutate(age_bin = as.factor(case_when(age_months >= 36 ~ "3-yo", 
                             TRUE ~ "2-yo"))) 

total_sample_size_by_age<- exp1_complete %>%
  distinct(sub_id, .keep_all = T) %>%
  filter(sub_id %in% good_counts$sub_id) %>%
  count(age_bin)
# prop_correct C-NF > .5
good_controls <- exp1_complete %>%
  filter(trial_type == "C-NF") %>%
  group_by(sub_id) %>%
  summarize(prop_correct = sum(correct) / n())  %>%
  filter(prop_correct >= .5) 

# english input >= 75
good_language <- exp1_complete %>%
  distinct(sub_id, .keep_all = T) %>%
  filter(english >= 75) 

# final sample
final_sub_ids <- list(good_controls$sub_id,
                      good_language$sub_id) %>%
  accumulate(intersect) %>%
  last()

final_sample <- exp1_complete %>%
               filter(sub_id %in% final_sub_ids) 

demographics <- final_sample %>%
  distinct(sub_id, .keep_all = T) %>%
  select(sub_id, age_months, prop_correct_vocab) %>%
  mutate(age_group = cut(age_months, 
                         breaks = c(24, 30, 36, 42, 48),
                         include.lowest = T),
         vocab_group = ntile(prop_correct_vocab, 4))

Split half reliability - by trial type

There are four trials of each of the four trial types (C-FN, FN, C-NN, NN). This is a little silly because the “halves” only have two trials each.

mss_half <- final_sample %>%
  group_by(sub_id, trial_type) %>%
  arrange(resp_start_time) %>%
  mutate(trial_type_num = 1:n()) %>%
  filter(trial_type != "FF") %>% # there are only 3 FFs 
  mutate(half = case_when(trial_type_num %in% c(1,3) ~ "odd",
                          trial_type_num %in% c(2,4) ~ "even")) %>%
  group_by(sub_id, trial_type, half) %>%
  summarize(prop_correct = mean(correct)) %>%
  spread(half, prop_correct) %>%
  left_join(demographics) %>%
  mutate(age_group = cut(age_months, 
                         breaks = c(24, 30, 36, 42, 48),
                         include.lowest = T))

Overall

ggplot(mss_half, aes(x = odd, y = even))  +
  geom_point() + 
  geom_smooth(method = "lm") +
  facet_wrap(~trial_type)

mss_half %>%
  group_by(trial_type) %>%
  nest() %>%
  mutate(temp = map(data, ~cor.test(.x$odd, 
                                    .x$even) %>% tidy())) %>%
  select(-data) %>%
  unnest() %>%
  kable()
trial_type estimate statistic p.value parameter conf.low conf.high method alternative
C-NF 0.2044752 2.575352 0.0109675 152 0.0478624 0.3512756 Pearson’s product-moment correlation two.sided
C-NN 0.2998126 3.874576 0.0001584 152 0.1487032 0.4372400 Pearson’s product-moment correlation two.sided
NF 0.2349277 2.979778 0.0033594 152 0.0797294 0.3790059 Pearson’s product-moment correlation two.sided
NN 0.3273565 4.271265 0.0000341 152 0.1784342 0.4616170 Pearson’s product-moment correlation two.sided

By age group

ggplot(mss_half, aes(x = odd, y = even, color = age_group ))  +
  geom_point() + 
  geom_smooth(method = "lm") +
  facet_wrap(~trial_type)

mss_half_age_corr <- mss_half %>%
  group_by(trial_type, age_group) %>%
  nest() %>%
  mutate(temp = map(data, ~cor.test(.x$odd, 
                                    .x$even) %>% tidy())) %>%
  select(-data) %>%
  unnest() %>%
  arrange(trial_type) 

ggplot(mss_half_age_corr, aes(x = age_group, y = estimate, color = age_group)) +
  ylab("split half correlation") +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
  facet_wrap(~trial_type) +
  geom_hline(aes(yintercept = 0), linetype = 2)

Split half reliability - by trial type2

Collapsing ME and control trials so there are more trials (8 per type).

mss_half <- final_sample %>%
  filter(trial_type != "FF") %>%
  mutate(trial_type2 = case_when(trial_type %in% c("NF", "NN") ~ "ME",
                                 trial_type %in% c("C-NF", "C-NN") ~ "control")) %>%
  group_by(sub_id, trial_type2) %>%
  arrange(resp_start_time) %>%
  mutate(trial_type_num = 1:n()) %>%
  mutate(half = case_when(trial_type_num %in% c(1,3, 5, 7) ~ "odd",
                          trial_type_num %in% c(2,4,6,8) ~ "even")) %>%
  group_by(sub_id, trial_type2, half) %>%
  summarize(prop_correct = mean(correct)) %>%
  spread(half, prop_correct) %>%
  left_join(demographics)

Overall

ggplot(mss_half, aes(x = odd, y = even))  +
  geom_point() + 
  geom_smooth(method = "lm") +
  facet_wrap(~trial_type2)

mss_half %>%
  group_by(trial_type2) %>%
  nest() %>%
  mutate(temp = map(data, ~cor.test(.x$odd, 
                                    .x$even) %>% tidy())) %>%
  select(-data) %>%
  unnest() %>%
  kable()
trial_type2 estimate statistic p.value parameter conf.low conf.high method alternative
control 0.4045906 5.454499 2e-07 152 0.2632771 0.5289064 Pearson’s product-moment correlation two.sided
ME 0.4557494 6.312554 0e+00 152 0.3207057 0.5726337 Pearson’s product-moment correlation two.sided

By age group

ggplot(mss_half, aes(x = odd, y = even, color = age_group ))  +
  geom_point() + 
  geom_smooth(method = "lm") +
  facet_wrap(~trial_type2)

mss_half_age_corr <- mss_half %>%
  group_by(trial_type2, age_group) %>%
  nest() %>%
  mutate(temp = map(data, ~cor.test(.x$odd, 
                                    .x$even) %>% tidy())) %>%
  select(-data) %>%
  unnest() %>%
  arrange(trial_type2) 

ggplot(mss_half_age_corr, aes(x = age_group, y = estimate, color = age_group)) +
  ylab("split half correlation") +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
  facet_wrap(~trial_type2) +
  geom_hline(aes(yintercept = 0), linetype = 2)

Cronbach alpha (as in Frank et al. 2016) - by trial type2

Here I’m calculating reliability using Cronbach alpha with the control and ME trials separately.

final_sample_wide <- final_sample %>%
  filter(trial_type != "FF") %>%
  mutate(trial_type2 = case_when(trial_type %in% c("NF", "NN") ~ "ME",
                                 trial_type %in% c("C-NF", "C-NN") ~ "control")) %>%
  group_by(sub_id, trial_type2) %>%
  arrange(resp_start_time) %>%
  mutate(trial_type_num = 1:n()) %>%
  select(sub_id, trial_type2, trial_type_num, correct) %>%
  spread(trial_type_num, correct) %>% 
  left_join(demographics) 

Age

 get.frame <- function(x) { 
   x <- as.data.frame(x) 
   non.null.cols <- apply(x, 2, function(x) {!all(is.na(x))}) 
   x <- x[, non.null.cols] 
   return(x) 
 } 
 
cronbach_values_age <- final_sample_wide  %>%
   group_by(age_group, trial_type2,  add=FALSE) %>% 
    summarise(n = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
                                                       `5`, `6`, `7`, `8`)))$sample.size,  
             chronbach_alpha = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
                                                       `5`, `6`, `7`, `8`)))$alpha)  

kable(cronbach_values_age)
age_group trial_type2 n chronbach_alpha
[24,30] control 34 0.3999217
[24,30] ME 34 0.5379610
(30,36] control 35 0.6407373
(30,36] ME 35 0.5740142
(36,42] control 44 0.3567399
(36,42] ME 44 0.5629384
(42,48] control 41 0.6477973
(42,48] ME 41 0.6038767
ggplot(cronbach_values_age, aes(x = age_group, y = chronbach_alpha, color = trial_type2, group = trial_type2) ) +
  geom_point() +
  geom_line()

Vocab

cronbach_values_vocab <- final_sample_wide  %>%
   group_by(vocab_group, trial_type2,  add=FALSE) %>% 
    summarise(n = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
                                                       `5`, `6`, `7`, `8`)))$sample.size,  
             chronbach_alpha = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
                                                       `5`, `6`, `7`, `8`)))$alpha)  

kable(cronbach_values_vocab)
vocab_group trial_type2 n chronbach_alpha
1 control 39 0.2680976
1 ME 39 0.5156850
2 control 38 0.6813402
2 ME 38 0.4763237
3 control 39 0.4694034
3 ME 39 0.3991798
4 control 38 0.4426617
4 ME 38 0.5866107
ggplot(cronbach_values_vocab, aes(x = vocab_group, y = chronbach_alpha, color = trial_type2, group = trial_type2) ) +
  geom_point() +
  geom_line()

(higher vocab group = more correct)