EXP1_DATA_PATH <- here("data/1_exp_data.csv")
exp1 <- read_csv(EXP1_DATA_PATH) %>%
  select(sub_id, age_months, gender, english, 
         exclude2, prop_correct_vocab, trial_type, correct, 
         start_time, end_time, resp_start_time, object1)  
# filter to kids in target age range and completed all trials
NUM_TRIALS <- 19
good_counts <- exp1 %>%
  count(sub_id) %>%
  filter(n == NUM_TRIALS) 

exp1_complete <- exp1 %>%
  filter(age_months >= 24 & age_months <= 48,
         sub_id %in% good_counts$sub_id) %>%
  mutate(age_bin = as.factor(case_when(age_months >= 36 ~ "3-yo", 
                             TRUE ~ "2-yo"))) 

total_sample_size_by_age<- exp1_complete %>%
  distinct(sub_id, .keep_all = T) %>%
  filter(sub_id %in% good_counts$sub_id) %>%
  count(age_bin)
# prop_correct C-NF > .5
good_controls <- exp1_complete %>%
  filter(trial_type == "C-NF") %>%
  group_by(sub_id) %>%
  summarize(prop_correct = sum(correct) / n())  %>%
  filter(prop_correct >= .5) 

# english input >= 75
good_language <- exp1_complete %>%
  distinct(sub_id, .keep_all = T) %>%
  filter(english >= 75) 

# final sample
final_sub_ids <- list(good_controls$sub_id,
                      good_language$sub_id) %>%
  accumulate(intersect) %>%
  last()

final_sample <- exp1_complete %>%
               filter(sub_id %in% final_sub_ids) 

crit_sample <- final_sample  %>%
    filter(trial_type != "FF") %>%
    mutate(condition_type = ifelse(trial_type %in% c("NF", "NN"),
                                 "N", "F"),
           trial_type2 = ifelse(trial_type %in% c("NN", "C-NN"),
                                 "NN", "NF"),
           age_months = scale(age_months, 
                              scale = FALSE, center = TRUE), # scale continuous analysis
           prop_correct_vocab = scale(prop_correct_vocab, 
                                      scale = FALSE, center = TRUE))


by_subj_control_score <- crit_sample %>%
  filter(condition_type == "F") %>%
  group_by(sub_id) %>%
  summarize(control_score = mean(correct))
crit_sample2 <- crit_sample %>%
  left_join(by_subj_control_score)
accuracy_model2 <- glmer(correct ~ prop_correct_vocab * trial_type2 * age_months +control_score +
                  (trial_type2 | sub_id),
                  family = "binomial",
                  data = filter(crit_sample2,
                                condition_type == "N"),
                  control = glmerControl(optimizer = "bobyqa"))
m2 <- summary(accuracy_model2)

m2
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## correct ~ prop_correct_vocab * trial_type2 * age_months + control_score +  
##     (trial_type2 | sub_id)
##    Data: filter(crit_sample2, condition_type == "N")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1098.5   1159.9   -537.2   1074.5     1220 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6659  0.2049  0.2951  0.4419  1.7213 
## 
## Random effects:
##  Groups Name          Variance Std.Dev. Corr
##  sub_id (Intercept)   0.2394   0.4893       
##         trial_type2NN 0.8381   0.9155   1.00
## Number of obs: 1232, groups:  sub_id, 154
## 
## Fixed effects:
##                                             Estimate Std. Error z value
## (Intercept)                                  1.39766    0.55285   2.528
## prop_correct_vocab                           5.76035    1.10106   5.232
## trial_type2NN                               -0.34341    0.23538  -1.459
## age_months                                   0.01559    0.02242   0.695
## control_score                                0.72809    0.64951   1.121
## prop_correct_vocab:trial_type2NN            -2.54632    1.52705  -1.667
## prop_correct_vocab:age_months               -0.02401    0.14216  -0.169
## trial_type2NN:age_months                     0.01749    0.03138   0.557
## prop_correct_vocab:trial_type2NN:age_months  0.17062    0.20348   0.839
##                                             Pr(>|z|)    
## (Intercept)                                   0.0115 *  
## prop_correct_vocab                          1.68e-07 ***
## trial_type2NN                                 0.1446    
## age_months                                    0.4870    
## control_score                                 0.2623    
## prop_correct_vocab:trial_type2NN              0.0954 .  
## prop_correct_vocab:age_months                 0.8659    
## trial_type2NN:age_months                      0.5772    
## prop_correct_vocab:trial_type2NN:age_months   0.4017    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) prp_c_ tr_2NN ag_mnt cntrl_ pr__:_2NN pr__:_ t_2NN:
## prp_crrct_v  0.338                                                    
## tril_typ2NN -0.137 -0.181                                             
## age_months  -0.034 -0.249  0.015                                      
## control_scr -0.960 -0.277 -0.003  0.028                               
## prp_c_:_2NN -0.063 -0.466  0.198  0.097  0.008                        
## prp_crrc_:_  0.037  0.366  0.079  0.314 -0.104 -0.177                 
## trl_ty2NN:_  0.014  0.099  0.011 -0.521 -0.009 -0.282    -0.217       
## pr__:_2NN:_  0.016 -0.174 -0.230 -0.211  0.006  0.344    -0.467  0.219
## convergence code: 0
## boundary (singular) fit: see ?isSingular