Split half reliability - by trial type
- Overall
- By age group
Split half reliability - by trial type2
- Overall
- By age group
Cronbach alpha (as in Frank et al. 2016) - by trial type2
- Age
- Vocab

EXP1_DATA_PATH <- here("data/1_exp_data.csv")
exp1 <- read_csv(EXP1_DATA_PATH) %>%
  select(sub_id, age_months, gender, english, 
         exclude2, prop_correct_vocab, trial_type, correct, 
         start_time, end_time, resp_start_time, object1)

# filter to kids in target age range and completed all trials
NUM_TRIALS <- 19
good_counts <- exp1 %>%
  count(sub_id) %>%
  filter(n == NUM_TRIALS) 

exp1_complete <- exp1 %>%
  filter(age_months >= 24 & age_months <= 48,
         sub_id %in% good_counts$sub_id) %>%
  mutate(age_bin = as.factor(case_when(age_months >= 36 ~ "3-yo", 
                             TRUE ~ "2-yo"))) 

total_sample_size_by_age<- exp1_complete %>%
  distinct(sub_id, .keep_all = T) %>%
  filter(sub_id %in% good_counts$sub_id) %>%
  count(age_bin)

# prop_correct C-NF > .5
good_controls <- exp1_complete %>%
  filter(trial_type == "C-NF") %>%
  group_by(sub_id) %>%
  summarize(prop_correct = sum(correct) / n())  %>%
  filter(prop_correct >= .5) 

# english input >= 75
good_language <- exp1_complete %>%
  distinct(sub_id, .keep_all = T) %>%
  filter(english >= 75) 

# final sample
final_sub_ids <- list(good_controls$sub_id,
                      good_language$sub_id) %>%
  accumulate(intersect) %>%
  last()

final_sample <- exp1_complete %>%
               filter(sub_id %in% final_sub_ids) 

demographics <- final_sample %>%
  distinct(sub_id, .keep_all = T) %>%
  select(sub_id, age_months, prop_correct_vocab) %>%
  mutate(age_group = cut(age_months, 
                         breaks = c(24, 30, 36, 42, 48),
                         include.lowest = T),
         vocab_group = ntile(prop_correct_vocab, 4))

Split half reliability - by trial type

There are four trials of each of the four trial types (C-FN, FN, C-NN, NN). This is a little silly because the “halves” only have two trials each.

mss_half <- final_sample %>%
  group_by(sub_id, trial_type) %>%
  arrange(resp_start_time) %>%
  mutate(trial_type_num = 1:n()) %>%
  filter(trial_type != "FF") %>% # there are only 3 FFs 
  mutate(half = case_when(trial_type_num %in% c(1,3) ~ "odd",
                          trial_type_num %in% c(2,4) ~ "even")) %>%
  group_by(sub_id, trial_type, half) %>%
  summarize(prop_correct = mean(correct)) %>%
  spread(half, prop_correct) %>%
  left_join(demographics) %>%
  mutate(age_group = cut(age_months, 
                         breaks = c(24, 30, 36, 42, 48),
                         include.lowest = T))

Overall

ggplot(mss_half, aes(x = odd, y = even))  +
  geom_point() + 
  geom_smooth(method = "lm") +
  facet_wrap(~trial_type)

mss_half %>%
  group_by(trial_type) %>%
  nest() %>%
  mutate(temp = map(data, ~cor.test(.x$odd, 
                                    .x$even) %>% tidy())) %>%
  select(-data) %>%
  unnest() %>%
  kable()

trial_type	estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
C-NF	0.2044752	2.575352	0.0109675	152	0.0478624	0.3512756	Pearson’s product-moment correlation	two.sided
C-NN	0.2998126	3.874576	0.0001584	152	0.1487032	0.4372400	Pearson’s product-moment correlation	two.sided
NF	0.2349277	2.979778	0.0033594	152	0.0797294	0.3790059	Pearson’s product-moment correlation	two.sided
NN	0.3273565	4.271265	0.0000341	152	0.1784342	0.4616170	Pearson’s product-moment correlation	two.sided

By age group

ggplot(mss_half, aes(x = odd, y = even, color = age_group ))  +
  geom_point() + 
  geom_smooth(method = "lm") +
  facet_wrap(~trial_type)

mss_half_age_corr <- mss_half %>%
  group_by(trial_type, age_group) %>%
  nest() %>%
  mutate(temp = map(data, ~cor.test(.x$odd, 
                                    .x$even) %>% tidy())) %>%
  select(-data) %>%
  unnest() %>%
  arrange(trial_type) 

ggplot(mss_half_age_corr, aes(x = age_group, y = estimate, color = age_group)) +
  ylab("split half correlation") +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
  facet_wrap(~trial_type) +
  geom_hline(aes(yintercept = 0), linetype = 2)

Split half reliability - by trial type2

Collapsing ME and control trials so there are more trials (8 per type).

mss_half <- final_sample %>%
  filter(trial_type != "FF") %>%
  mutate(trial_type2 = case_when(trial_type %in% c("NF", "NN") ~ "ME",
                                 trial_type %in% c("C-NF", "C-NN") ~ "control")) %>%
  group_by(sub_id, trial_type2) %>%
  arrange(resp_start_time) %>%
  mutate(trial_type_num = 1:n()) %>%
  mutate(half = case_when(trial_type_num %in% c(1,3, 5, 7) ~ "odd",
                          trial_type_num %in% c(2,4,6,8) ~ "even")) %>%
  group_by(sub_id, trial_type2, half) %>%
  summarize(prop_correct = mean(correct)) %>%
  spread(half, prop_correct) %>%
  left_join(demographics)

Overall

ggplot(mss_half, aes(x = odd, y = even))  +
  geom_point() + 
  geom_smooth(method = "lm") +
  facet_wrap(~trial_type2)

mss_half %>%
  group_by(trial_type2) %>%
  nest() %>%
  mutate(temp = map(data, ~cor.test(.x$odd, 
                                    .x$even) %>% tidy())) %>%
  select(-data) %>%
  unnest() %>%
  kable()

trial_type2	estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
control	0.4045906	5.454499	2e-07	152	0.2632771	0.5289064	Pearson’s product-moment correlation	two.sided
ME	0.4557494	6.312554	0e+00	152	0.3207057	0.5726337	Pearson’s product-moment correlation	two.sided

By age group

ggplot(mss_half, aes(x = odd, y = even, color = age_group ))  +
  geom_point() + 
  geom_smooth(method = "lm") +
  facet_wrap(~trial_type2)

mss_half_age_corr <- mss_half %>%
  group_by(trial_type2, age_group) %>%
  nest() %>%
  mutate(temp = map(data, ~cor.test(.x$odd, 
                                    .x$even) %>% tidy())) %>%
  select(-data) %>%
  unnest() %>%
  arrange(trial_type2) 

ggplot(mss_half_age_corr, aes(x = age_group, y = estimate, color = age_group)) +
  ylab("split half correlation") +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
  facet_wrap(~trial_type2) +
  geom_hline(aes(yintercept = 0), linetype = 2)

Cronbach alpha (as in Frank et al. 2016) - by trial type2

Here I’m calculating reliability using Cronbach alpha with the control and ME trials separately.

final_sample_wide <- final_sample %>%
  filter(trial_type != "FF") %>%
  mutate(trial_type2 = case_when(trial_type %in% c("NF", "NN") ~ "ME",
                                 trial_type %in% c("C-NF", "C-NN") ~ "control")) %>%
  group_by(sub_id, trial_type2) %>%
  arrange(resp_start_time) %>%
  mutate(trial_type_num = 1:n()) %>%
  select(sub_id, trial_type2, trial_type_num, correct) %>%
  spread(trial_type_num, correct) %>% 
  left_join(demographics)

Age

 get.frame <- function(x) { 
   x <- as.data.frame(x) 
   non.null.cols <- apply(x, 2, function(x) {!all(is.na(x))}) 
   x <- x[, non.null.cols] 
   return(x) 
 } 
 
cronbach_values_age <- final_sample_wide  %>%
   group_by(age_group, trial_type2,  add=FALSE) %>% 
    summarise(n = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
                                                       `5`, `6`, `7`, `8`)))$sample.size,  
             chronbach_alpha = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
                                                       `5`, `6`, `7`, `8`)))$alpha)  

kable(cronbach_values_age)

age_group	trial_type2	n	chronbach_alpha
[24,30]	control	34	0.3999217
[24,30]	ME	34	0.5379610
(30,36]	control	35	0.6407373
(30,36]	ME	35	0.5740142
(36,42]	control	44	0.3567399
(36,42]	ME	44	0.5629384
(42,48]	control	41	0.6477973
(42,48]	ME	41	0.6038767

ggplot(cronbach_values_age, aes(x = age_group, y = chronbach_alpha, color = trial_type2, group = trial_type2) ) +
  geom_point() +
  geom_line()

Vocab

cronbach_values_vocab <- final_sample_wide  %>%
   group_by(vocab_group, trial_type2,  add=FALSE) %>% 
    summarise(n = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
                                                       `5`, `6`, `7`, `8`)))$sample.size,  
             chronbach_alpha = psy::cronbach(get.frame(cbind(`1`, `2`, `3`, `4`,
                                                       `5`, `6`, `7`, `8`)))$alpha)  

kable(cronbach_values_vocab)

vocab_group	trial_type2	n	chronbach_alpha
1	control	39	0.2680976
1	ME	39	0.5156850
2	control	38	0.6813402
2	ME	38	0.4763237
3	control	39	0.4694034
3	ME	39	0.3991798
4	control	38	0.4426617
4	ME	38	0.5866107

ggplot(cronbach_values_vocab, aes(x = vocab_group, y = chronbach_alpha, color = trial_type2, group = trial_type2) ) +
  geom_point() +
  geom_line()

(higher vocab group = more correct)

Experiment 1 reliability

2019-08-13

Split half reliability - by trial type

Overall

By age group

Split half reliability - by trial type2

Overall

By age group

Cronbach alpha (as in Frank et al. 2016) - by trial type2

Age

Vocab