Analysis walkthrough for the paper: Zettersten, M., Bredemann, C., Kaul, M., Vlach, H., Kirkorian, H., & Lupyan, G. (2023). Nameability supports rule-based category learning in children and adults. Child Development. https://doi.org/10.1111/cdev.14008

Preparing the data

We first set up the paths to relevant data files and read in the data. We also do some minimal data processing and remove any excluded participants (N=8; 8 additional excluded participants did not have response data to contribute: 4 due to an experimenter error, 4 due to not completing the experiment). See color_rule_kid_data_codebook.csv for information about individual columns.

read_path <- here::here("..","data")
model_output_path <- here::here("model_outputs")
figure_path <- here::here("figures")
d <- read.csv(here::here(read_path,"color_rule_kid_data.csv"))
color_naming_adult <- read.csv(here::here(read_path,"color_nameability_adults.csv")) %>%
  mutate(age_group="adult")
color_naming_kid <- read.csv(here::here(read_path,"color_nameability_kids.csv")) %>%
  mutate(age_group="kid")
color_naming <- color_naming_adult %>%
  bind_rows(color_naming_kid)
#Data on color poroperties computed in Zettersten & Lupyan (2020)
color_properties_zl <- read.csv(here::here(read_path,"color_properties.csv")) %>%
  filter(colorSet=="colorset1") %>%
  mutate(
    color=case_when(
      colorName == "mustard" ~ "chartreuse",
      colorName == "neonyellow" ~ "honeydew",
      colorName == "darkgreenblue" ~ "teal",
      colorName == "lightred" ~ "sienna",
      colorName == "pink" ~ "mauve",
      TRUE ~ colorName
    ))
  
#remove excluded data
d <- d %>%
  filter(exclude==0|is.na(exclude))

Demographics

Children

Age, Gender

####summarize by subject####
kid_subj <-  d %>%
  filter(age_group=="kid") %>%
  group_by(subject,condition, age_m,gender,race, ethnicity,language_history,household_income,parental_education) %>%
  summarize(
    mean_learning_accuracy=mean(is_right[trial_kind=="learn"]),
    mean_learning_accuracy_block3=mean(is_right[trial_kind=="learn"&block==3]),
    mean_test_accuracy=mean(is_right[trial_kind=="test"]),
    mean_test_accuracy_prototype=mean(is_right[trial_kind=="test"&stimulus_type=="prototype"]),
    mean_test_accuracy_novel=mean(is_right[trial_kind=="test"&stimulus_type=="2-color-diff"]),
    total_time_mins=max(time_elapsed)/1000/60,
    round_1_training_time_mins = max(time_elapsed[trial_kind=="learn"&round==1])/1000/60-max(time_elapsed[trial_kind=="train"])/1000/60,
    round_2_training_time_mins = max(time_elapsed[trial_kind=="learn"&round==2])/1000/60-max(time_elapsed[trial_kind=="learn"&round==1])/1000/60,
    total_training_mins = round_1_training_time_mins+round_2_training_time_mins,
    generalization_time_mins = total_time_mins-max(time_elapsed[trial_kind!="test"])/1000/60,
    mean_training_trial_responses = trial_index[trial_id==48]-trial_index[trial_id==1&trial_kind=="learn"]-1
  )

####summarize demographics####
kid_demographics <-  kid_subj %>%
  ungroup() %>%
  summarize(N=n(), 
            mean_age = round(mean(age_m,na.rm=TRUE),1), 
            sd_age = round(sd(age_m,na.rm=TRUE),1), 
            min_age = min(age_m,na.rm=TRUE), 
            max_age = max(age_m,na.rm=TRUE),
            count_female = sum(gender=='female'),
            english_speaker=paste(100*sum(str_detect(language_history,"1"))/sum(language_history!=""),"%",sep=""),
            bilingual=sum(language_history!="1"&language_history!=""),
            avg_total_time = mean(total_time_mins),
            sd_total_time = sd(total_time_mins),
            avg_round_1_time = mean(round_1_training_time_mins),
            sd_round_1_time = sd(round_1_training_time_mins),
            avg_round_2_time = mean(round_2_training_time_mins),
            sd_round_2_time = sd(round_2_training_time_mins),
            avg_training_time = mean(total_training_mins),
            sd_training_time = sd(total_training_mins),
            avg_training_trial_responses = mean(mean_training_trial_responses),
            sd_training_trial_responses = sd(mean_training_trial_responses),
            avg_generalization_time = mean(generalization_time_mins),
            sd_generalization_time = sd(generalization_time_mins)
            )
kable(kid_demographics)
N mean_age sd_age min_age max_age count_female english_speaker bilingual avg_total_time sd_total_time avg_round_1_time sd_round_1_time avg_round_2_time sd_round_2_time avg_training_time sd_training_time avg_training_trial_responses sd_training_trial_responses avg_generalization_time sd_generalization_time
97 63.2 7 48 81 46 100% 7 12.14806 2.758708 5.211619 1.126233 3.973008 1.081146 9.184627 2.092541 63.13402 7.091117 1.122329 0.6171955

Race

kid_subj %>%
  group_by(race) %>%
  summarize(count=n(),
            percent=count/nrow(kid_subj)) %>%
  kable()
race count percent
Asian 4 0.0412371
Black or African American 3 0.0309278
More than one race 8 0.0824742
Prefer not to disclose 2 0.0206186
White 75 0.7731959
NA 5 0.0515464

Ethnicity

kid_subj %>%
  group_by(ethnicity) %>%
  summarize(count=n(),
            percent=count/nrow(kid_subj)) %>%
  kable()
ethnicity count percent
Hispanic or Latino 2 0.0206186
Not Hispanic or Latino 89 0.9175258
Prefer not to disclose 1 0.0103093
NA 5 0.0515464

Household Income

kid_subj %>%
  mutate(
    household_income_category = case_when(
      household_income == 1 ~ "less than $24,999",
       household_income == 2 ~ "$25,000 to $49,999", 
       household_income == 3 ~ "$50,000 to 99,999", 
       household_income == 4 ~ "$100,000 or more", 
       household_income == 5 ~ "Prefer not to disclose"
    ) 
  ) %>%
  mutate(household_income_category = factor(household_income_category,levels=c("$100,000 or more","$50,000 to 99,999","$25,000 to $49,999","less than $24,999","Prefer not to disclose"))) %>%
  group_by(household_income_category) %>%
  summarize(count=n(),
            percent=count/nrow(kid_subj)) %>%
  kable()
household_income_category count percent
$100,000 or more 46 0.4742268
$50,000 to 99,999 23 0.2371134
$25,000 to $49,999 5 0.0515464
less than $24,999 2 0.0206186
Prefer not to disclose 15 0.1546392
NA 6 0.0618557

Parental Education

kid_subj %>%
  mutate(
    parental_education_category = case_when(
      parental_education == 1 ~ "Some high school",
       parental_education == 2 ~ "High school graduate", 
       parental_education == 3 ~ "Some college", 
       parental_education == 4 ~ "Trade/technical/vocational training", 
       parental_education == 5 ~ "College graduate",
      parental_education == 6 ~ "Postgraduate",
      parental_education == 7 ~ "Prefer not to disclose"
    ) 
  ) %>%
  mutate(parental_education_category = factor(parental_education_category,levels=c("Some high school","High school graduate","Some college","Trade/technical/vocational training","College graduate","Postgraduate","Prefer not to disclose"))) %>%
  group_by(parental_education_category) %>%
  summarize(count=n(),
            percent=count/nrow(kid_subj)) %>%
  kable()
parental_education_category count percent
Some college 5 0.0515464
Trade/technical/vocational training 7 0.0721649
College graduate 30 0.3092784
Postgraduate 50 0.5154639
NA 5 0.0515464

Training Time/ Trials

kid_subj %>%
  group_by(condition) %>%
  summarize(
    avg_training_time = mean(total_training_mins),
    sd_training_time = sd(total_training_mins),
    avg_training_trial_responses = mean(mean_training_trial_responses),
    sd_training_trial_responses = sd(mean_training_trial_responses),
  )
## # A tibble: 2 × 5
##   condition avg_training_time sd_training_time avg_training_trial_responses
##   <chr>                 <dbl>            <dbl>                        <dbl>
## 1 high                   9.14             2.37                         61.9
## 2 low                    9.23             1.79                         64.4
## # ℹ 1 more variable: sd_training_trial_responses <dbl>
t.test(total_training_mins ~ condition,data=kid_subj)
## 
##  Welch Two Sample t-test
## 
## data:  total_training_mins by condition
## t = -0.20436, df = 89.423, p-value = 0.8385
## alternative hypothesis: true difference in means between group high and group low is not equal to 0
## 95 percent confidence interval:
##  -0.9332199  0.7591460
## sample estimates:
## mean in group high  mean in group low 
##           9.141557           9.228594
t.test(mean_training_trial_responses ~ condition,data=kid_subj)
## 
##  Welch Two Sample t-test
## 
## data:  mean_training_trial_responses by condition
## t = -1.7506, df = 92.606, p-value = 0.08332
## alternative hypothesis: true difference in means between group high and group low is not equal to 0
## 95 percent confidence interval:
##  -5.3314545  0.3357062
## sample estimates:
## mean in group high  mean in group low 
##           61.89796           64.39583

Adults

Age, Gender

####summarize by subject####
adult_subj <-  d %>%
  filter(age_group=="adult") %>%
  group_by(subject,condition, age_y,gender,l1) %>%
  summarize(
    mean_learning_accuracy=mean(is_right[trial_kind=="learn"]),
    mean_test_accuracy=mean(is_right[trial_kind=="test"]),
    mean_test_accuracy_prototype=mean(is_right[trial_kind=="test"&stimulus_type=="prototype"]),
    mean_test_accuracy_novel=mean(is_right[trial_kind=="test"&stimulus_type=="2-color-diff"]),
    total_time_mins=max(time_elapsed)/1000/60,
    round_1_training_time_mins = max(time_elapsed[trial_kind=="learn"&round==1])/1000/60-max(time_elapsed[trial_kind=="train"])/1000/60,
    round_2_training_time_mins = max(time_elapsed[trial_kind=="learn"&round==2])/1000/60-max(time_elapsed[trial_kind=="learn"&round==1])/1000/60,
    total_training_mins = round_1_training_time_mins+round_2_training_time_mins,
    generalization_time_mins = total_time_mins-max(time_elapsed[trial_kind!="test"])/1000/60,
    mean_training_trial_responses = trial_index[trial_id==48]-trial_index[trial_id==1&trial_kind=="learn"]-1
  )
####summarize demographics####
adult_demographics <-  adult_subj %>%
  ungroup() %>%
  summarize(N=n(), 
            mean_age = round(mean(age_y,na.rm=TRUE),1), 
            sd_age = round(sd(age_y,na.rm=TRUE),1), 
            min_age = min(age_y,na.rm=TRUE), 
            max_age = max(age_y,na.rm=TRUE),
            count_female = sum(gender=='female'),
            english_l1 = sum(l1=="English"),
            avg_total_time = mean(total_time_mins),
            sd_total_time = sd(total_time_mins),
            avg_round_1_time = mean(round_1_training_time_mins),
            sd_round_1_time = sd(round_1_training_time_mins),
            avg_round_2_time = mean(round_2_training_time_mins),
            sd_round_2_time = sd(round_2_training_time_mins),
            avg_training_time = mean(total_training_mins),
            sd_training_time = sd(total_training_mins),
            avg_training_trial_responses = mean(mean_training_trial_responses),
            sd_training_trial_responses = sd(mean_training_trial_responses),
            avg_generalization_time = mean(generalization_time_mins),
            sd_generalization_time = sd(generalization_time_mins)
            )
kable(adult_demographics)
N mean_age sd_age min_age max_age count_female english_l1 avg_total_time sd_total_time avg_round_1_time sd_round_1_time avg_round_2_time sd_round_2_time avg_training_time sd_training_time avg_training_trial_responses sd_training_trial_responses avg_generalization_time sd_generalization_time
90 20.1 1.2 18 23 70 83 7.892174 0.6820091 3.426194 0.2994248 2.5252 0.2371402 5.951395 0.4888921 51.98889 4.249242 0.6772491 0.0862937

Training Time/ Trials

adult_subj %>%
  group_by(condition) %>%
  summarize(
    avg_training_time = mean(total_training_mins),
    sd_training_time = sd(total_training_mins),
    avg_training_trial_responses = mean(mean_training_trial_responses),
    sd_training_trial_responses = sd(mean_training_trial_responses),
  )
## # A tibble: 2 × 5
##   condition avg_training_time sd_training_time avg_training_trial_responses
##   <chr>                 <dbl>            <dbl>                        <dbl>
## 1 high                   5.82            0.428                         50  
## 2 low                    6.09            0.513                         54.0
## # ℹ 1 more variable: sd_training_trial_responses <dbl>
t.test(total_training_mins ~ condition,data=adult_subj)
## 
##  Welch Two Sample t-test
## 
## data:  total_training_mins by condition
## t = -2.6857, df = 85.282, p-value = 0.008697
## alternative hypothesis: true difference in means between group high and group low is not equal to 0
## 95 percent confidence interval:
##  -0.46574230 -0.06950733
## sample estimates:
## mean in group high  mean in group low 
##           5.817582           6.085207
t.test(mean_training_trial_responses ~ condition,data=adult_subj)
## 
##  Welch Two Sample t-test
## 
## data:  mean_training_trial_responses by condition
## t = -5.0044, df = 56.288, p-value = 5.842e-06
## alternative hypothesis: true difference in means between group high and group low is not equal to 0
## 95 percent confidence interval:
##  -5.569900 -2.385656
## sample estimates:
## mean in group high  mean in group low 
##           50.00000           53.97778

Training Accuracy

Adults

Main model

To test the effect of nameability on category learning, we predicted participants’ trial-by-trial accuracy on training trials from Condition (centered; Low Nameability = -0.5, High Nameability = 0.5), Block Number (centered) and Experiment Round (centered), and all interactions between the three predictors in a logistic mixed-effects model. We fit the model with the maximal by-subject random effects structure, including a by-subject intercept and a by-subject random slopes for Block Number, Experiment Round, and their interaction.

#### training modeling ####
m <- glmer(is_right~condition_c*block_c*round_c+(1+block_c*round_c|subject), data=subset(d, age_group=="adult" & trial_kind=="learn"), family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c * block_c * round_c + (1 + block_c * round_c |  
##     subject)
##    Data: subset(d, age_group == "adult" & trial_kind == "learn")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   2108.3   2223.0  -1036.2   2072.3     4302 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.2258  0.0710  0.1549  0.3037  1.0864 
## 
## Random effects:
##  Groups  Name            Variance Std.Dev. Corr             
##  subject (Intercept)     1.6816   1.2968                    
##          block_c         0.3381   0.5815    1.00            
##          round_c         0.4148   0.6440    0.85  0.85      
##          block_c:round_c 0.4582   0.6769    0.17  0.17 -0.37
## Number of obs: 4320, groups:  subject, 90
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   3.5775     0.2253  15.882  < 2e-16 ***
## condition_c                   1.6758     0.3634   4.611 4.01e-06 ***
## block_c                       1.2632     0.2027   6.233 4.57e-10 ***
## round_c                       1.2565     0.3191   3.937 8.24e-05 ***
## condition_c:block_c           0.5556     0.2675   2.077   0.0378 *  
## condition_c:round_c           0.3679     0.3990   0.922   0.3565    
## block_c:round_c              -0.2288     0.3563  -0.642   0.5208    
## condition_c:block_c:round_c  -0.1765     0.4473  -0.394   0.6932    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_ blck_c rond_c cndtn_c:b_ cndtn_c:r_ blc_:_
## condition_c 0.274                                                   
## block_c     0.765  0.241                                            
## round_c     0.521  0.137  0.417                                     
## cndtn_c:bl_ 0.274  0.730  0.408  0.110                              
## cndtn_c:rn_ 0.187  0.466  0.139  0.420  0.302                       
## blck_c:rnd_ 0.301  0.068  0.502  0.681  0.170      0.282            
## cndtn_c:_:_ 0.084  0.119  0.181  0.267  0.299      0.422      0.399 
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
#Anova(m, type="III") # chi-squared test yields virtually identical results
confint(m, method="Wald")[11:18,]
##                                   2.5 %    97.5 %
## (Intercept)                  3.13606404 4.0190322
## condition_c                  0.96348822 2.3881622
## block_c                      0.86602702 1.6604334
## round_c                      0.63102772 1.8819361
## condition_c:block_c          0.03132518 1.0798923
## condition_c:round_c         -0.41417325 1.1499622
## block_c:round_c             -0.92722203 0.4695611
## condition_c:block_c:round_c -1.05315796 0.7002500
coefs <- summary(m)$coef %>%
  as_tibble %>%
  mutate_at(c("Estimate","Std. Error", "z value", "Pr(>|z|)"), 
            function (x) signif(x, digits = 3)) %>%
  rename(SE = `Std. Error`, 
         z = `z value`,
         p = `Pr(>|z|)`)

rownames(coefs) <- c("Intercept", "Condition", "Block Number", "Round", 
                     "Condition * Block Number","Condition * Round", "Block Number * Round", 
                     "Condition * Block Number * Round")

write.table(coefs, file=here::here(model_output_path,"adult_lme_model_output.csv"),sep=",")

Simplified random effects

While the model with the full random effects had a singular fit, simplifying the random effects structure did not meaningfully affect the main pattern of results from the model. Below, we successively prune random effects until no singular fit is obtained. The simplified model yields highly similar results to the model with the full random effects structure.

#remove round random slope
#m <- glmer(is_right~condition_c*block_c*round_c+(1+block_c+block_c:round_c|subject), data=filter(d, age_group=="adult" & trial_kind=="learn"), family=binomial,glmerControl(optimizer="bobyqa"))
#model still yields a singular fit

#remove random slope for block
#m <- glmer(is_right~condition_c*block_c*round_c+(1+round_c+block_c:round_c|subject), data=filter(d, age_group=="adult" & trial_kind=="learn"), family=binomial,glmerControl(optimizer="bobyqa"))
#model still yields a singular fit

#remove both round and block random slopes
m <- glmer(is_right~condition_c*block_c*round_c+(1+block_c:round_c|subject), data=filter(d, age_group=="adult" & trial_kind=="learn"), family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c * block_c * round_c + (1 + block_c:round_c |  
##     subject)
##    Data: filter(d, age_group == "adult" & trial_kind == "learn")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   2123.8   2193.9  -1050.9   2101.8     4309 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.8287  0.0971  0.1716  0.3055  1.2036 
## 
## Random effects:
##  Groups  Name            Variance Std.Dev. Corr 
##  subject (Intercept)     0.9409   0.970         
##          block_c:round_c 0.8705   0.933    -0.50
## Number of obs: 4320, groups:  subject, 90
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   3.2404     0.1547  20.949  < 2e-16 ***
## condition_c                   1.6122     0.2835   5.688 1.29e-08 ***
## block_c                       0.8065     0.1034   7.796 6.39e-15 ***
## round_c                       0.7487     0.1761   4.251 2.13e-05 ***
## condition_c:block_c           0.5104     0.2065   2.472  0.01343 *  
## condition_c:round_c           0.2986     0.3517   0.849  0.39595    
## block_c:round_c              -0.8532     0.2847  -2.997  0.00272 ** 
## condition_c:block_c:round_c  -0.2263     0.4776  -0.474  0.63560    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_ blck_c rond_c cndtn_c:b_ cndtn_c:r_ blc_:_
## condition_c  0.293                                                  
## block_c      0.298  0.267                                           
## round_c      0.156  0.128  0.055                                    
## cndtn_c:bl_  0.233  0.328  0.618  0.069                             
## cndtn_c:rn_  0.118  0.164  0.070  0.644  0.053                      
## blck_c:rnd_ -0.281 -0.011  0.237  0.394  0.169      0.304           
## cndtn_c:_:_ -0.012 -0.183  0.195  0.372  0.247      0.450      0.457
#note that the round by block interaction now becomes significant after removing the random slopes - this effect is not of particular theoretical interest in the current study.

Plot

#summarize by block
adult_training_by_block <-  d %>%
  filter(age_group=="adult" & trial_kind=="learn") %>%
  group_by(subject,condition,round,block) %>%
  summarize(accuracy=mean(is_right)) %>%
  summarySEwithin(measurevar="accuracy",betweenvars=c("condition"),withinvars=c("round","block"),idvar="subject") %>%
  mutate(round_factor = case_when(
    round=="1" ~ "Round 1",
    round=="2" ~ "Round 2"
  )) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  ) 
# show by block learning accuracy as a table
adult_training_by_block %>%
  select(condition,N,round,block,accuracy,lower_ci,upper_ci) %>%
  kable(digits=3)
condition N round block accuracy lower_ci upper_ci
high 45 1 1 0.856 0.814 0.897
high 45 1 2 0.972 0.954 0.990
high 45 1 3 0.983 0.971 0.996
high 45 2 1 0.961 0.937 0.985
high 45 2 2 0.989 0.977 1.001
high 45 2 3 0.989 0.974 1.004
low 45 1 1 0.739 0.701 0.777
low 45 1 2 0.861 0.829 0.893
low 45 1 3 0.917 0.891 0.942
low 45 2 1 0.881 0.845 0.916
low 45 2 2 0.908 0.883 0.934
low 45 2 3 0.933 0.906 0.960
#learning phase
ggplot(adult_training_by_block, aes(block,accuracy,color=condition,group=condition))+
  geom_line(aes(linetype=condition),position=position_dodge(0.1),size=1.3)+
  geom_point(aes(shape=condition),position=position_dodge(0.1),size=2.5)+
  geom_errorbar(aes(ymin=accuracy-se,ymax=accuracy+se),width=0,size=0.5,position=position_dodge(.1))+
  xlab("Block")+
  ylab("Accuracy")+
  scale_linetype_discrete(name="Nameability")+
  scale_shape_discrete(name="Nameability")+
  scale_color_brewer(palette="Set1",name="Nameability")+
  #ggtitle("Performance during training")+
  geom_hline(yintercept=0.5, linetype="dashed",size=1)+
  theme(legend.position=c(0.3,0.3))+
  theme(text=element_text(size=18))+
  theme(strip.text.x = element_text(size=16), plot.background = element_rect(fill="white",color="white"))+
  facet_wrap(~round_factor)+
  ylim(0,1)

ggsave(here::here("figures","adults_training_half.png"),width=8, height=5,dpi=600)

Overall Accuracy

adult_training_summarized <- adult_subj %>%
  group_by(condition) %>%
  summarize(
    N=n(),
    avg_accuracy = mean(mean_learning_accuracy),
    avg_accuracy_ci = qt(0.975, N-1)*sd(mean_learning_accuracy,na.rm=TRUE)/sqrt(N),
    avg_accuracy_lower_ci = avg_accuracy - avg_accuracy_ci,
    avg_accuracy_upper_ci = avg_accuracy + avg_accuracy_ci,
  )
adult_training_summarized %>%
  select(-avg_accuracy_ci) %>%
  mutate(
    ci = str_c("[",round(avg_accuracy_lower_ci,3),", ", round(avg_accuracy_upper_ci,3),"]")) %>%
  select(condition,N,avg_accuracy,ci) %>%
  kable(col.names=c("Condition", "N", "Average Accuracy","CI"),digits=3)
Condition N Average Accuracy CI
high 45 0.958 [0.947, 0.97]
low 45 0.873 [0.841, 0.905]
#effect size
cohens_d(mean_learning_accuracy ~ condition,data=adult_subj)
## Cohen's d |       95% CI
## ------------------------
## 1.06      | [0.61, 1.50]
## 
## - Estimated using pooled SD.

Children

Main model

To test the effect of nameability on category learning, we predicted participants’ trial-by-trial accuracy on training trials from Condition (centered; Low Nameability = -0.5, High Nameability = 0.5), Block Number (centered) and Experiment Round (centered), and all interactions between the three predictors in a logistic mixed-effects model. We fit the model with the maximal by-subject random effects structure, including a by-subject intercept and a by-subject random slopes for Block Number, Experiment Round, and their interaction.

#### training modeling ####
m <- glmer(
  is_right~condition_c*block_c*round_c+(1+block_c*round_c|subject), 
  data=filter(d, age_group=="kid"  & trial_kind=="learn"),
  family=binomial,
  glmerControl(optimizer="bobyqa"))

summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c * block_c * round_c + (1 + block_c * round_c |  
##     subject)
##    Data: filter(d, age_group == "kid" & trial_kind == "learn")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   5520.5   5636.5  -2742.3   5484.5     4638 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6099 -1.0710  0.4750  0.7436  1.1249 
## 
## Random effects:
##  Groups  Name            Variance Std.Dev. Corr             
##  subject (Intercept)     0.64208  0.8013                    
##          block_c         0.03008  0.1734    0.58            
##          round_c         0.35489  0.5957    0.87  0.91      
##          block_c:round_c 0.06064  0.2462   -0.82 -0.94 -1.00
## Number of obs: 4656, groups:  subject, 97
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.93598    0.09001  10.399  < 2e-16 ***
## condition_c                  0.27810    0.17796   1.563  0.11812    
## block_c                      0.12478    0.04902   2.545  0.01092 *  
## round_c                      0.45523    0.09683   4.701 2.58e-06 ***
## condition_c:block_c          0.23083    0.08951   2.579  0.00992 ** 
## condition_c:round_c          0.06553    0.18327   0.358  0.72065    
## block_c:round_c             -0.25631    0.09482  -2.703  0.00687 ** 
## condition_c:block_c:round_c -0.11709    0.17181  -0.681  0.49557    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_ blck_c rond_c cndtn_c:b_ cndtn_c:r_ blc_:_
## condition_c  0.005                                                  
## block_c      0.212  0.016                                           
## round_c      0.571  0.006  0.181                                    
## cndtn_c:bl_  0.015  0.230  0.048 -0.002                             
## cndtn_c:rn_  0.006  0.575  0.002  0.033  0.229                      
## blck_c:rnd_ -0.214  0.001  0.094 -0.131  0.020      0.030           
## cndtn_c:_:_  0.000 -0.236  0.019  0.029 -0.039     -0.174      0.051
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
#Anova(m, type="III") # chi-squared test yields virtually identical results
confint(m, method="Wald")[11:18,]
##                                   2.5 %      97.5 %
## (Intercept)                  0.75956687  1.11239528
## condition_c                 -0.07069404  0.62689610
## block_c                      0.02869696  0.22087051
## round_c                      0.26544627  0.64500576
## condition_c:block_c          0.05538923  0.40627317
## condition_c:round_c         -0.29365852  0.42472752
## block_c:round_c             -0.44214751 -0.07047936
## condition_c:block_c:round_c -0.45382935  0.21965776
coefs <- summary(m)$coef %>%
  as_tibble %>%
  mutate_at(c("Estimate","Std. Error", "z value", "Pr(>|z|)"), 
            function (x) signif(x, digits = 3)) %>%
  rename(SE = `Std. Error`, 
         z = `z value`,
         p = `Pr(>|z|)`)

rownames(coefs) <- c("Intercept", "Condition", "Block Number", "Round", 
                     "Condition * Block Number","Condition * Round", "Block Number * Round", 
                     "Condition * Block Number * Round")

write.table(coefs, file=here::here(model_output_path,"kid_lme_model_output.csv"),sep=",")

Simplified random effects

While the model with the full random effects had a singular fit, simplifying the random effects structure did not meaningfully affect the pattern of results from the model. Below, we successively prune random effects until no singular fit is obtained. The simplified model yields highly similar results to the model with the full random effects structure.

#remove block and round random slopes
#m <- glmer(is_right~condition_c*block_c*round_c+(1+block_c:round_c|subject), data=filter(d, age_group=="kid" & trial_kind=="learn"), family=binomial,glmerControl(optimizer="bobyqa"))
#model still yields a singular fit

# remove interaction random effect
#m <- glmer(is_right~condition_c*block_c*round_c+(1+block_c+round_c|subject), data=filter(d, age_group=="kid" & trial_kind=="learn"), family=binomial,glmerControl(optimizer="bobyqa"))
# model still yields a singular fit

# retain only the block random slope
m <- glmer(is_right~condition_c*block_c*round_c+(1+block_c|subject), data=filter(d, age_group=="kid" & trial_kind=="learn"), family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c * block_c * round_c + (1 + block_c | subject)
##    Data: filter(d, age_group == "kid" & trial_kind == "learn")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   5546.9   5617.8  -2762.5   5524.9     4645 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0321 -1.0877  0.4765  0.7242  1.1829 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev. Corr
##  subject (Intercept) 0.53648  0.7324       
##          block_c     0.02246  0.1499   0.89
## Number of obs: 4656, groups:  subject, 97
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.89129    0.08268  10.779  < 2e-16 ***
## condition_c                  0.25865    0.16408   1.576  0.11494    
## block_c                      0.14694    0.04621   3.180  0.00147 ** 
## round_c                      0.30377    0.06622   4.587 4.49e-06 ***
## condition_c:block_c          0.22574    0.08710   2.592  0.00955 ** 
## condition_c:round_c          0.02884    0.13236   0.218  0.82753    
## block_c:round_c             -0.22863    0.08116  -2.817  0.00484 ** 
## condition_c:block_c:round_c -0.12816    0.16216  -0.790  0.42935    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_ blck_c rond_c cndtn_c:b_ cndtn_c:r_ blc_:_
## condition_c  0.001                                                  
## block_c      0.311  0.016                                           
## round_c      0.022  0.003 -0.025                                    
## cndtn_c:bl_  0.014  0.310  0.038 -0.005                             
## cndtn_c:rn_  0.003  0.019 -0.007  0.034 -0.021                      
## blck_c:rnd_ -0.011 -0.003  0.047  0.043  0.004      0.029           
## cndtn_c:_:_ -0.004 -0.011  0.004  0.029  0.037      0.044      0.034

Block 3

We tested for a condition difference in block 3 for both round 1 and round 2 by subsetting the data to the relevant block and predicting correct responses from condition in a logistic mixed-effects model.

Block 3, Round 1

#block 3, round 1
m=glmer(is_right~condition_c+(1|subject), data=filter(d, age_group=="kid" &  trial_kind=="learn"& block==3 & round==1), family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c + (1 | subject)
##    Data: filter(d, age_group == "kid" & trial_kind == "learn" & block ==  
##     3 & round == 1)
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    947.4    961.4   -470.7    941.4      773 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9877 -1.0659  0.5031  0.6396  1.0437 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  subject (Intercept) 0.3712   0.6093  
## Number of obs: 776, groups:  subject, 97
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.8694     0.1052   8.261  < 2e-16 ***
## condition_c   0.5517     0.2051   2.690  0.00715 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## condition_c 0.062
confint(m, method="Wald")[2:3,]
##                 2.5 %    97.5 %
## (Intercept) 0.6631645 1.0757091
## condition_c 0.1497043 0.9536001

Block 3, Round 2

#block 3, round 2
m=glmer(is_right~condition_c+(1|subject), data=filter(d, age_group=="kid" &  trial_kind=="learn" & block==3 & round==2), family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c + (1 | subject)
##    Data: filter(d, age_group == "kid" & trial_kind == "learn" & block ==  
##     3 & round == 2)
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    913.0    926.9   -453.5    907.0      773 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1515 -1.0418  0.4648  0.6399  0.9723 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  subject (Intercept) 0.5826   0.7633  
## Number of obs: 776, groups:  subject, 97
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.0235     0.1199   8.533   <2e-16 ***
## condition_c   0.5268     0.2306   2.285   0.0223 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## condition_c 0.063
confint(m, method="Wald")[2:3,]
##                  2.5 %    97.5 %
## (Intercept) 0.78840742 1.2585682
## condition_c 0.07488766 0.9787954

Controlling for Age/ Interaction with Age

We also checked if any of these effects interact with children’s age. We fit the same model structure while including age (centered) as a fixed effect, as well as the interaction between age and all other fixed effect terms. Overall, accuracy increased with age, but there was no interaction between age and any of the other model terms. All of the lower-order effects observed in the main model remained significant.

#### training modeling ####
m <- glmer(
  is_right~condition_c*block_c*round_c*age_c+(1+block_c*round_c|subject), 
  data=filter(d, age_group=="kid"  & trial_kind=="learn"),
  family=binomial,
  glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c * block_c * round_c * age_c + (1 + block_c *  
##     round_c | subject)
##    Data: filter(d, age_group == "kid" & trial_kind == "learn")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   5456.1   5623.5  -2702.1   5404.1     4582 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.5009 -1.0636  0.4673  0.7393  1.1258 
## 
## Random effects:
##  Groups  Name            Variance Std.Dev. Corr             
##  subject (Intercept)     0.56228  0.7499                    
##          block_c         0.02762  0.1662    0.52            
##          round_c         0.33671  0.5803    0.85  0.89      
##          block_c:round_c 0.05306  0.2303   -0.80 -0.93 -0.99
## Number of obs: 4608, groups:  subject, 96
## 
## Fixed effects:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        0.969383   0.086729  11.177  < 2e-16 ***
## condition_c                        0.251116   0.171113   1.468 0.142227    
## block_c                            0.125883   0.050122   2.512 0.012022 *  
## round_c                            0.475341   0.098168   4.842 1.28e-06 ***
## age_c                              0.040810   0.012193   3.347 0.000816 ***
## condition_c:block_c                0.235856   0.091420   2.580 0.009882 ** 
## condition_c:round_c                0.052202   0.185587   0.281 0.778497    
## block_c:round_c                   -0.260938   0.097175  -2.685 0.007248 ** 
## condition_c:age_c                  0.004498   0.024327   0.185 0.853300    
## block_c:age_c                      0.008583   0.006528   1.315 0.188597    
## round_c:age_c                      0.021706   0.013196   1.645 0.099989 .  
## condition_c:block_c:round_c       -0.140930   0.175911  -0.801 0.423049    
## condition_c:block_c:age_c          0.002529   0.012857   0.197 0.844075    
## condition_c:round_c:age_c          0.013985   0.026134   0.535 0.592564    
## block_c:round_c:age_c             -0.011247   0.012565  -0.895 0.370741    
## condition_c:block_c:round_c:age_c -0.022898   0.024704  -0.927 0.353993    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")
##                                          2.5 %      97.5 %
## .sig01                                      NA          NA
## .sig02                                      NA          NA
## .sig03                                      NA          NA
## .sig04                                      NA          NA
## .sig05                                      NA          NA
## .sig06                                      NA          NA
## .sig07                                      NA          NA
## .sig08                                      NA          NA
## .sig09                                      NA          NA
## .sig10                                      NA          NA
## (Intercept)                        0.799398299  1.13936841
## condition_c                       -0.084258904  0.58649146
## block_c                            0.027644680  0.22412114
## round_c                            0.282935680  0.66774595
## age_c                              0.016913490  0.06470726
## condition_c:block_c                0.056677042  0.41503576
## condition_c:round_c               -0.311543091  0.41594628
## block_c:round_c                   -0.451397035 -0.07047837
## condition_c:age_c                 -0.043181816  0.05217845
## block_c:age_c                     -0.004212311  0.02137873
## round_c:age_c                     -0.004157427  0.04756940
## condition_c:block_c:round_c       -0.485709760  0.20384966
## condition_c:block_c:age_c         -0.022670748  0.02772831
## condition_c:round_c:age_c         -0.037236916  0.06520694
## block_c:round_c:age_c             -0.035873859  0.01338025
## condition_c:block_c:round_c:age_c -0.071316731  0.02552161

Plot

#summarize by block
kid_training_by_block <-  d %>%
  filter(age_group=="kid" & trial_kind=="learn") %>%
  group_by(subject,condition,round,block) %>%
  summarize(accuracy=mean(is_right)) %>%
  summarySEwithin(measurevar="accuracy",betweenvars=c("condition"),withinvars=c("round","block"),idvar="subject") %>%
  mutate(round_factor = case_when(
    round=="1" ~ "Round 1",
    round=="2" ~ "Round 2"
  )) %>%
  mutate(
    lower_ci = accuracy - ci,
    upper_ci = accuracy + ci
  ) 
# show by block learning accuracy as a table
kid_training_by_block %>%
  select(condition,N,round,block,accuracy,lower_ci,upper_ci) %>%
  kable(digits=3)
condition N round block accuracy lower_ci upper_ci
high 49 1 1 0.594 0.537 0.652
high 49 1 2 0.696 0.637 0.755
high 49 1 3 0.742 0.699 0.786
high 49 2 1 0.732 0.692 0.772
high 49 2 2 0.735 0.685 0.784
high 49 2 3 0.758 0.712 0.803
low 48 1 1 0.596 0.550 0.643
low 48 1 2 0.659 0.609 0.709
low 48 1 3 0.633 0.586 0.679
low 48 2 1 0.695 0.652 0.739
low 48 2 2 0.711 0.669 0.753
low 48 2 3 0.661 0.620 0.703
#learning phase plot
ggplot(kid_training_by_block, aes(block,accuracy,color=condition,group=condition))+
  geom_line(aes(linetype=condition),position=position_dodge(0.1),size=1.3)+
  geom_point(aes(shape=condition),position=position_dodge(0.1),size=2.5)+
  geom_errorbar(aes(ymin=accuracy-se,ymax=accuracy+se),width=0,size=0.5,position=position_dodge(.1))+
  xlab("Block")+
  ylab("Accuracy")+
  scale_linetype_discrete(name="Nameability")+
  scale_shape_discrete(name="Nameability")+
  scale_color_brewer(palette="Set1",name="Nameability")+
  #ggtitle("Performance during training")+
  geom_hline(yintercept=0.5, linetype="dashed",size=1)+
  theme(legend.position=c(0.3,0.3))+
  theme(text=element_text(size=18))+
  theme(strip.text.x = element_text(size=16), plot.background = element_rect(fill="white",color="white"))+
  facet_wrap(~round_factor)+
  ylim(0,1)

ggsave(here::here("figures","kids_training_half.png"),width=8, height=5,dpi=600)

Overall Accuracy

kid_training_summarized <- kid_subj %>%
  group_by(condition) %>%
  summarize(
    N=n(),
    avg_accuracy = mean(mean_learning_accuracy),
    avg_accuracy_ci = qt(0.975, N-1)*sd(mean_learning_accuracy,na.rm=TRUE)/sqrt(N),
    avg_accuracy_lower_ci = avg_accuracy - avg_accuracy_ci,
    avg_accuracy_upper_ci = avg_accuracy + avg_accuracy_ci,
  )
kid_training_summarized %>%
  select(-avg_accuracy_ci) %>%
  mutate(
    ci = str_c("[",round(avg_accuracy_lower_ci,3),", ", round(avg_accuracy_upper_ci,3),"]")) %>%
  select(condition,N,avg_accuracy,ci) %>%
  kable(col.names=c("Condition", "N", "Average Accuracy","CI"),digits=3)
Condition N Average Accuracy CI
high 49 0.710 [0.672, 0.748]
low 48 0.659 [0.616, 0.703]
#effect size
cohens_d(mean_learning_accuracy ~ condition,data=kid_subj)
## Cohen's d |        95% CI
## -------------------------
## 0.36      | [-0.05, 0.76]
## 
## - Estimated using pooled SD.
# effect size, block 3
cohens_d(mean_learning_accuracy_block3 ~ condition,data=kid_subj)
## Cohen's d |       95% CI
## ------------------------
## 0.57      | [0.16, 0.97]
## 
## - Estimated using pooled SD.

Children vs. Adults

Main Model

#full model
m <- glmer(
  is_right~condition_c*block_c*round_c*age_group_c+(1+block_c*round_c|subject), 
  data=filter(d, trial_kind=="learn"),
  family=binomial,
  glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c * block_c * round_c * age_group_c + (1 +  
##     block_c * round_c | subject)
##    Data: filter(d, trial_kind == "learn")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   7629.0   7813.7  -3788.5   7577.0     8950 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.9494  0.0685  0.2398  0.5665  1.1669 
## 
## Random effects:
##  Groups  Name            Variance Std.Dev. Corr             
##  subject (Intercept)     0.84916  0.9215                    
##          block_c         0.04853  0.2203    0.93            
##          round_c         0.36008  0.6001    0.85  0.98      
##          block_c:round_c 0.08436  0.2905   -0.47 -0.76 -0.87
## Number of obs: 8976, groups:  subject, 187
## 
## Fixed effects:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                              2.11355    0.09150  23.098  < 2e-16
## condition_c                              0.92710    0.17230   5.381 7.42e-08
## block_c                                  0.54838    0.06976   7.860 3.83e-15
## round_c                                  0.78755    0.12255   6.426 1.31e-10
## age_group_c                              2.32214    0.17614  13.183  < 2e-16
## condition_c:block_c                      0.37435    0.11778   3.178 0.001481
## condition_c:round_c                      0.17975    0.21457   0.838 0.402193
## block_c:round_c                         -0.35570    0.13395  -2.655 0.007921
## condition_c:age_group_c                  1.28435    0.34374   3.736 0.000187
## block_c:age_group_c                      0.78478    0.12318   6.371 1.88e-10
## round_c:age_group_c                      0.67643    0.22238   3.042 0.002352
## condition_c:block_c:round_c             -0.16717    0.22690  -0.737 0.461267
## condition_c:block_c:age_group_c          0.27926    0.23531   1.187 0.235324
## condition_c:round_c:age_group_c          0.22303    0.42857   0.520 0.602779
## block_c:round_c:age_group_c             -0.24931    0.23576  -1.057 0.290290
## condition_c:block_c:round_c:age_group_c -0.08709    0.45320  -0.192 0.847604
##                                            
## (Intercept)                             ***
## condition_c                             ***
## block_c                                 ***
## round_c                                 ***
## age_group_c                             ***
## condition_c:block_c                     ** 
## condition_c:round_c                        
## block_c:round_c                         ** 
## condition_c:age_group_c                 ***
## block_c:age_group_c                     ***
## round_c:age_group_c                     ** 
## condition_c:block_c:round_c                
## condition_c:block_c:age_group_c            
## condition_c:round_c:age_group_c            
## block_c:round_c:age_group_c                
## condition_c:block_c:round_c:age_group_c    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
#Anova(m, type="III") # chi-squared test yields virtually identical results
confint(m, method="Wald")[11:26,]
##                                              2.5 %     97.5 %
## (Intercept)                              1.9342064  2.2928926
## condition_c                              0.5893963  1.2647943
## block_c                                  0.4116436  0.6851146
## round_c                                  0.5473491  1.0277509
## age_group_c                              1.9769002  2.6673756
## condition_c:block_c                      0.1435052  0.6051891
## condition_c:round_c                     -0.2408058  0.6003071
## block_c:round_c                         -0.6182368 -0.0931577
## condition_c:age_group_c                  0.6106202  1.9580755
## block_c:age_group_c                      0.5433471  1.0262184
## round_c:age_group_c                      0.2405812  1.1122861
## condition_c:block_c:round_c             -0.6118915  0.2775464
## condition_c:block_c:age_group_c         -0.1819441  0.7404572
## condition_c:round_c:age_group_c         -0.6169528  1.0630175
## block_c:round_c:age_group_c             -0.7113844  0.2127653
## condition_c:block_c:round_c:age_group_c -0.9753545  0.8011649
coefs <- summary(m)$coef %>%
  as_tibble %>%
  mutate_at(c("Estimate","Std. Error", "z value", "Pr(>|z|)"), 
            function (x) signif(x, digits = 3)) %>%
  rename(SE = `Std. Error`, 
         z = `z value`,
         p = `Pr(>|z|)`)

rownames(coefs) <- c("Intercept",
                     "Condition",
                     "Block Number",
                     "Round",
                     "Age Group",
                     "Condition * Block Number",
                     "Condition * Round",
                     "Block Number * Round",
                     "Condition * Age Group",
                     "Block Number * Age Group",
                     "Round * Age Group",
                     "Condition * Block Number * Round",
                     "Condition * Block Number * Age Group",
                     "Condition * Round * Age Group",
                     "Block * Round * Age Group",
                     "Condition * Block Number * Round * Age Group")

write.table(coefs, file=here::here(model_output_path,"adult_vs_kid_lme_model_output.csv"),sep=",")

Simplified random effects

While the model with the full random effects had a singular fit, simplifying the random effects structure did not meaningfully affect the pattern of results from the model. Below, we successively prune random effects until no singular fit is obtained. The simplified model yields highly similar results to the model with the full random effects structure.

#### pruning random effects structure to achieve convergence
# remove random slope for block
# m <- glmer(
#   is_right~condition_c*block_c*round_c*age_group_c+(1+block_c+block_c:round_c|subject), 
#   data=filter(d, trial_kind=="learn"),
#   family=binomial,
#   glmerControl(optimizer="bobyqa"))
# model still yields a singular fit

# remove random slope for round
# m <- glmer(
#   is_right~condition_c*block_c*round_c*age_group_c+(1+round_c+block_c:round_c|subject), 
#   data=filter(d, trial_kind=="learn"),
#   family=binomial,
#   glmerControl(optimizer="bobyqa"))
# model still yields a singular fit

#retain only random intercept and random slope for block by round interaction
m <- glmer(
  is_right~condition_c*block_c*round_c*age_group_c+(1+block_c:round_c|subject), 
  data=filter(d, trial_kind=="learn"),
  family=binomial,
  glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c * block_c * round_c * age_group_c + (1 +  
##     block_c:round_c | subject)
##    Data: filter(d, trial_kind == "learn")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   7671.7   7806.6  -3816.8   7633.7     8957 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -10.4395   0.0848   0.2591   0.5560   1.1921 
## 
## Random effects:
##  Groups  Name            Variance Std.Dev. Corr 
##  subject (Intercept)     0.6619   0.8136        
##          block_c:round_c 0.1565   0.3956   -0.80
## Number of obs: 8976, groups:  subject, 187
## 
## Fixed effects:
##                                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)                              2.01412    0.07930  25.399  < 2e-16
## condition_c                              0.91625    0.15417   5.943 2.79e-09
## block_c                                  0.45778    0.05436   8.421  < 2e-16
## round_c                                  0.52080    0.09167   5.681 1.34e-08
## age_group_c                              2.22158    0.15551  14.286  < 2e-16
## condition_c:block_c                      0.37639    0.10856   3.467 0.000526
## condition_c:round_c                      0.15993    0.18313   0.873 0.382498
## block_c:round_c                         -0.52740    0.12305  -4.286 1.82e-05
## condition_c:age_group_c                  1.31402    0.30798   4.267 1.98e-05
## block_c:age_group_c                      0.70070    0.10862   6.451 1.11e-10
## round_c:age_group_c                      0.42989    0.18321   2.346 0.018951
## condition_c:block_c:round_c             -0.16807    0.22659  -0.742 0.458250
## condition_c:block_c:age_group_c          0.30670    0.21711   1.413 0.157762
## condition_c:round_c:age_group_c          0.27354    0.36613   0.747 0.455007
## block_c:round_c:age_group_c             -0.41194    0.23070  -1.786 0.074154
## condition_c:block_c:round_c:age_group_c -0.05257    0.45267  -0.116 0.907548
##                                            
## (Intercept)                             ***
## condition_c                             ***
## block_c                                 ***
## round_c                                 ***
## age_group_c                             ***
## condition_c:block_c                     ***
## condition_c:round_c                        
## block_c:round_c                         ***
## condition_c:age_group_c                 ***
## block_c:age_group_c                     ***
## round_c:age_group_c                     *  
## condition_c:block_c:round_c                
## condition_c:block_c:age_group_c            
## condition_c:round_c:age_group_c            
## block_c:round_c:age_group_c             .  
## condition_c:block_c:round_c:age_group_c    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Color Word and Vocabulary Knowledge

Color Naming

#color sets
high_colors <- c("blue","brown","orange","red","purple","yellow")
low_colors <- c("chartreuse","honeydew","sienna","mauve","teal","turquoise")
color_naming <- color_naming %>%
  mutate(
    color_nameability = case_when(
      color %in% high_colors ~ "high",
      color %in% low_colors ~ "low"
    )
  )

#summarize
summarize_color_naming <- color_naming %>%
  group_by(age_group, color_nameability) %>%
  summarize(
    N = n(),
    mean_simpson_diversity = mean(simpson_diversity,na.rm=TRUE),
    simpson_diversity_ci = qt(0.975, N-1)*sd(simpson_diversity,na.rm=TRUE)/sqrt(N),
    simpson_diversity_lower_ci = mean_simpson_diversity - simpson_diversity_ci,
    simpson_diversity_upper_ci = mean_simpson_diversity + simpson_diversity_ci
  )

Overall Simpson Diversity

summarize_color_naming %>%
  select(-simpson_diversity_ci) %>%
  kable(digits=2,caption="Average Simpson Diversity by age group and color nameability", col.names=c("Age Group","Color Nameability","N","Average Simpson Diversity", "Lower 95% CI","Upper 95% CI"))
Average Simpson Diversity by age group and color nameability
Age Group Color Nameability N Average Simpson Diversity Lower 95% CI Upper 95% CI
adult high 6 1.00 0.99 1.01
adult low 6 0.20 0.12 0.28
kid high 6 0.96 0.91 1.02
kid low 6 0.24 0.12 0.35

Simpson Diversity by Color

#table of comprehension accuracy by color
color_naming %>%
  select(age_group,color_nameability,color,simpson_diversity) %>%
  group_by(color_nameability,color) %>%
  pivot_wider(
    names_from = age_group,
    values_from = simpson_diversity
  ) %>%
  arrange(color_nameability,color) %>%
  kable(digits=2,caption="Average Simpson Diversity by color for each age group",col.names=
          c("Color Nameability","Color","Adults","Children"))
Average Simpson Diversity by color for each age group
Color Nameability Color Adults Children
high blue 1.00 1.00
high brown 1.00 1.00
high orange 1.00 1.00
high purple 0.98 0.94
high red 1.00 0.98
high yellow 1.00 0.86
low chartreuse 0.20 0.32
low honeydew 0.30 0.42
low mauve 0.10 0.15
low sienna 0.18 0.18
low teal 0.16 0.16
low turquoise 0.28 0.20

Adults

Adults named high nameability colors more consistently than low nameability colors.

t.test(
  filter(color_naming, age_group == "adult" & color_nameability == "high")$simpson_diversity,
  filter(color_naming, age_group == "adult" & color_nameability == "low")$simpson_diversity,
  var.equal=T)
## 
##  Two Sample t-test
## 
## data:  filter(color_naming, age_group == "adult" & color_nameability == "high")$simpson_diversity and filter(color_naming, age_group == "adult" & color_nameability == "low")$simpson_diversity
## t = 25.207, df = 10, p-value = 2.212e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.7239879 0.8643924
## sample estimates:
## mean of x mean of y 
## 0.9962963 0.2021062

Children

Children named high nameability colors more consistently than low nameability colors.

t.test(
  filter(color_naming, age_group == "kid" & color_nameability == "high")$simpson_diversity,
  filter(color_naming, age_group == "kid" & color_nameability == "low")$simpson_diversity,
  var.equal=T)
## 
##  Two Sample t-test
## 
## data:  filter(color_naming, age_group == "kid" & color_nameability == "high")$simpson_diversity and filter(color_naming, age_group == "kid" & color_nameability == "low")$simpson_diversity
## t = 14.593, df = 10, p-value = 4.551e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.6146593 0.8361728
## sample estimates:
## mean of x mean of y 
## 0.9635276 0.2381115

Children vs. Adults

The nameability did not differ between children and adults, either for highly nameable colors or for more difficult-to-name colors

High-Nameability Colors

#adults vs. kids
## high nameability colors
t.test(
  filter(color_naming, age_group == "adult" & color_nameability == "high")$simpson_diversity,
  filter(color_naming, age_group == "kid" & color_nameability == "high")$simpson_diversity,
  paired=T)
## 
##  Paired t-test
## 
## data:  filter(color_naming, age_group == "adult" & color_nameability == "high")$simpson_diversity and filter(color_naming, age_group == "kid" & color_nameability == "high")$simpson_diversity
## t = 1.4897, df = 5, p-value = 0.1965
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.02377573  0.08931321
## sample estimates:
## mean difference 
##      0.03276874

Low-Nameability Colors

## low nameability colors
t.test(
  filter(color_naming, age_group == "adult" & color_nameability == "low")$simpson_diversity,
  filter(color_naming, age_group == "kid" & color_nameability == "low")$simpson_diversity,
  paired=T)
## 
##  Paired t-test
## 
## data:  filter(color_naming, age_group == "adult" & color_nameability == "low")$simpson_diversity and filter(color_naming, age_group == "kid" & color_nameability == "low")$simpson_diversity
## t = -1.1739, df = 5, p-value = 0.2933
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.11485053  0.04283988
## sample estimates:
## mean difference 
##     -0.03600532

Correlations

Correlation between children and adult data in color naming

The correlation between children’s and adults’ naming was strong. This also holds true within the low nameability group (there was little variation in high color nameability, where naming was essentially at ceiling).

color_naming_wide <- color_naming %>%
  pivot_wider(names_from=age_group,values_from = number_responses:modal_response)

p1 <- ggplot(color_naming_wide,aes(simpson_diversity_kid,simpson_diversity_adult))+
  geom_jitter(aes(color=color_nameability),width=0.02,height=0.02)+
  geom_smooth(method="lm")

cor.test(color_naming_wide$simpson_diversity_kid,color_naming_wide$simpson_diversity_adult)
## 
##  Pearson's product-moment correlation
## 
## data:  color_naming_wide$simpson_diversity_kid and color_naming_wide$simpson_diversity_adult
## t = 19.327, df = 10, p-value = 2.999e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9523682 0.9964301
## sample estimates:
##       cor 
## 0.9868768

Correlation between children naming data and nameability based on the online survey

color_naming_wide <- color_naming_wide %>%
  left_join(color_properties_zl)

ggplot(color_naming_wide,aes(simpson_diversity_kid,simpson_diversity))+
  geom_jitter(aes(color=color_nameability),width=0.02,height=0.02)+
  geom_smooth(method="lm")

cor.test(color_naming_wide$simpson_diversity_kid,color_naming_wide$simpson_diversity)
## 
##  Pearson's product-moment correlation
## 
## data:  color_naming_wide$simpson_diversity_kid and color_naming_wide$simpson_diversity
## t = 13.562, df = 10, p-value = 9.174e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9067802 0.9928592
## sample estimates:
##       cor 
## 0.9738748

Naming Length

subj_color_naming <- d %>%
  distinct(subject,age_group,word_1_high,word_2_high,word_3_high,word_4_high,word_5_high,word_6_high,word_1_low,word_2_low,word_3_low,word_4_low,word_5_low,word_6_low) %>%
  pivot_longer(word_1_high:word_6_low,names_to="item",values_to="name") %>%
  separate(item,into=c("word","item_num","condition"),sep="_",remove=FALSE) %>%
  select(-word) %>%
  #could possibly remove some non-alphanumeric characters, but the resulting description length metric is very, very correlated with the metric where non-alphanumeric characters are not removed, so this has little impact on the results
  #mutate(name_clean=str_replace_all(name, regex("\\W+"), "")) %>%
  mutate(
    name_length = str_length(name)
  ) %>%
  mutate(
    color = case_when(
      item == "word_1_high" ~ "brown",
      item == "word_2_high" ~ "blue",
      item == "word_3_high" ~ "orange",
      item == "word_4_high" ~ "yellow",
      item == "word_5_high" ~   "red",
      item == "word_6_high" ~ "purple",
      item == "word_1_low"   ~ "sienna",
      item == "word_2_low"  ~ "mauve",
      item == "word_3_low"  ~ "turquoise",
      item == "word_4_low"  ~ "chartreuse",
      item == "word_5_low"  ~ "teal",
      item == "word_6_low"  ~ "honeydew"
    )
  ) 

subj_summarized_naming_length <- subj_color_naming %>%
  group_by(subject,age_group,condition) %>%
  summarize(
    avg_length=mean(name_length)
  )

color_summarized_naming_length <- subj_color_naming %>%
  group_by(age_group,condition,color) %>%
  summarize(
    N=n(),
    avg_length=mean(name_length),
    sd = sd(name_length),
    ci = qt(0.975, N-1)*sd/sqrt(N),
  ) %>%
  left_join(color_properties_zl)

color_summarized_naming_length %>%
  select(age_group,condition, color, RGB, N, avg_length, sd, ci) %>%
  kable()
age_group condition color RGB N avg_length sd ci
adult high blue [30, 90, 210] 90 4.000000 0.0000000 0.0000000
adult high brown [120, 80, 40] 90 5.011111 0.1054093 0.0220775
adult high orange [250, 120, 30] 90 6.800000 0.4022409 0.0842477
adult high purple [130, 30, 180] 90 6.711111 0.4557854 0.0954624
adult high red [220, 20, 0] 90 3.000000 0.0000000 0.0000000
adult high yellow [250, 240, 0] 90 6.000000 0.0000000 0.0000000
adult low chartreuse [170, 160, 40] 90 9.511111 4.1926754 0.8781389
adult low honeydew [220, 240, 150] 90 10.333333 3.1265446 0.6548421
adult low mauve [200, 170, 170] 90 7.611111 3.0673871 0.6424518
adult low sienna [200, 100, 70] 90 8.100000 3.7597813 0.7874709
adult low teal [70, 100, 90] 90 9.111111 2.8499732 0.5969153
adult low turquoise [150, 200, 180] 90 6.144444 2.9205245 0.6116920
kid high blue [30, 90, 210] 97 4.000000 0.0000000 0.0000000
kid high brown [120, 80, 40] 97 5.134021 0.3424442 0.0690178
kid high orange [250, 120, 30] 97 6.134021 0.3424442 0.0690178
kid high purple [130, 30, 180] 97 6.690722 0.7268772 0.1464982
kid high red [220, 20, 0] 97 3.206186 0.4773657 0.0962105
kid high yellow [250, 240, 0] 97 6.154639 1.1487331 0.2315210
kid low chartreuse [170, 160, 40] 97 8.402062 4.6852157 0.9442800
kid low honeydew [220, 240, 150] 97 9.206186 4.0232091 0.8108561
kid low mauve [200, 170, 170] 97 6.845361 3.8224670 0.7703977
kid low sienna [200, 100, 70] 97 6.948454 3.7538068 0.7565596
kid low teal [70, 100, 90] 97 8.061856 3.7744051 0.7607110
kid low turquoise [150, 200, 180] 97 6.855670 3.1257989 0.6299879

Color Comprehension

#color comprehension scores
subj_color_comprehension <- d %>%
  distinct(subject,age_group,color_ppvt_high,color_ppvt_low) %>%
  mutate(
    percent_color_ppvt_high = color_ppvt_high / 6,
    percent_color_ppvt_low = color_ppvt_low / 6
  )

#summarize
summarize_color_comprehension <- subj_color_comprehension %>%
  group_by(age_group) %>%
  summarize(
    N = n(),
    mean_color_ppvt_high = mean(color_ppvt_high,na.rm=TRUE),
    mean_color_ppvt_low = mean(color_ppvt_low,na.rm=TRUE),
    mean_percent_color_ppvt_high = mean(percent_color_ppvt_high,na.rm=TRUE),
    mean_percent_color_ppvt_low = mean(percent_color_ppvt_low,na.rm=TRUE),
    percent_color_ppvt_high_ci = qt(0.975, N-1)*sd(percent_color_ppvt_high,na.rm=TRUE)/sqrt(N),
    percent_color_ppvt_high_lower_ci = mean_percent_color_ppvt_high - percent_color_ppvt_high_ci,
    percent_color_ppvt_high_upper_ci = mean_percent_color_ppvt_high + percent_color_ppvt_high_ci,
    percent_color_ppvt_low_ci = qt(0.975, N-1)*sd(percent_color_ppvt_low ,na.rm=TRUE)/sqrt(N),
    percent_color_ppvt_low_lower_ci = mean_percent_color_ppvt_low - percent_color_ppvt_low_ci,
    percent_color_ppvt_low_upper_ci = mean_percent_color_ppvt_low + percent_color_ppvt_low_ci
  )

#accuracy by color
summarize_color_accuracy <- d %>%
  distinct(subject,age_group,ppvt_color1_high,ppvt_color2_high,ppvt_color3_high,ppvt_color4_high,ppvt_color5_high,ppvt_color6_high,ppvt_color1_low,ppvt_color2_low,ppvt_color3_low,ppvt_color4_low,ppvt_color5_low,ppvt_color6_low) %>%
  pivot_longer(cols = ppvt_color1_high:ppvt_color6_low,names_to="color_stim",values_to = "correct") %>%
  mutate(
    color = case_when(
      color_stim == "ppvt_color1_high" ~ "purple",
      color_stim == "ppvt_color2_high"  ~ "orange",
      color_stim == "ppvt_color3_high" ~ "yellow",
      color_stim == "ppvt_color4_high" ~ "brown",
      color_stim == "ppvt_color5_high" ~    "blue",
      color_stim == "ppvt_color6_high" ~ "red",
      color_stim == "ppvt_color1_low"    ~ "mauve",
      color_stim == "ppvt_color2_low"   ~ "chartreuse",
      color_stim == "ppvt_color3_low"   ~ "sienna",
      color_stim == "ppvt_color4_low"   ~ "teal",
      color_stim == "ppvt_color5_low"   ~ "turquoise",
      color_stim == "ppvt_color6_low"   ~ "honeydew"
    )
  ) %>%
  mutate(
    color_nameability = case_when(
      str_detect(color_stim,"high") ~ "high",
      str_detect(color_stim,"low") ~ "low")
  ) %>%
  group_by(age_group,color,color_nameability) %>%
  summarize(
    comprehension_accuracy = mean(correct)
  ) %>%
  pivot_wider(
    names_from = age_group,
    values_from = comprehension_accuracy
  ) %>%
  arrange(color_nameability,color)

Overall Color Comprehension

summarize_color_comprehension %>%
  mutate(
     percent_color_ppvt_high_ci = str_c("[",round(percent_color_ppvt_high_lower_ci,3),", ", round(percent_color_ppvt_high_upper_ci,3),"]"),
    percent_color_ppvt_low_ci = str_c("[",round(percent_color_ppvt_low_lower_ci,3),", ", round(percent_color_ppvt_low_upper_ci,3),"]"),
  ) %>%
  select(mean_percent_color_ppvt_high,
         percent_color_ppvt_high_ci,
         mean_percent_color_ppvt_low,
         percent_color_ppvt_low_ci) %>%
  kable(digits=3,col.names = c("High Nameability Accuracy","High Nameability 95% CI","Low Nameability Accuracy","Low Nameability 95% CI"),caption="Average color comprehension by age group and color nameability")
Average color comprehension by age group and color nameability
High Nameability Accuracy High Nameability 95% CI Low Nameability Accuracy Low Nameability 95% CI
1.000 [1, 1] 0.496 [0.448, 0.545]
0.997 [0.99, 1.003] 0.261 [0.225, 0.298]

Comprehension by Color Word

Below is a table of average comprehension accuracy by color for each age group

#table of comprehension accuracy by color
summarize_color_accuracy %>%
  kable(digits=2)
color color_nameability adult kid
blue high 1.00 1.00
brown high 1.00 1.00
orange high 1.00 0.99
purple high 1.00 1.00
red high 1.00 1.00
yellow high 1.00 0.99
chartreuse low 0.21 0.15
honeydew low 0.72 0.32
mauve low 0.74 0.46
sienna low 0.41 0.12
teal low 0.23 0.11
turquoise low 0.66 0.39

Adults

Adults were more accurate in identifying high nameability colors than low nameability colors in the comprehension task.

t.test(filter(subj_color_comprehension, age_group=="adult")$percent_color_ppvt_high,filter(subj_color_comprehension, age_group=="adult")$percent_color_ppvt_low,paired=TRUE)
## 
##  Paired t-test
## 
## data:  filter(subj_color_comprehension, age_group == "adult")$percent_color_ppvt_high and filter(subj_color_comprehension, age_group == "adult")$percent_color_ppvt_low
## t = 20.748, df = 89, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.4554652 0.5519422
## sample estimates:
## mean difference 
##       0.5037037

Children

Children were more accurate in identifying high nameability colors than low nameability colors in the comprehension task.

t.test(filter(subj_color_comprehension, age_group=="kid")$percent_color_ppvt_high,filter(subj_color_comprehension, age_group=="kid")$percent_color_ppvt_low,paired=TRUE)
## 
##  Paired t-test
## 
## data:  filter(subj_color_comprehension, age_group == "kid")$percent_color_ppvt_high and filter(subj_color_comprehension, age_group == "kid")$percent_color_ppvt_low
## t = 39.618, df = 96, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.6985496 0.7722407
## sample estimates:
## mean difference 
##       0.7353952

Children were still above chance (1 in 6) for low nameability colors.

t.test(filter(subj_color_comprehension, age_group=="kid")$percent_color_ppvt_low, mu=1/6)
## 
##  One Sample t-test
## 
## data:  filter(subj_color_comprehension, age_group == "kid")$percent_color_ppvt_low
## t = 5.1288, df = 96, p-value = 1.515e-06
## alternative hypothesis: true mean is not equal to 0.1666667
## 95 percent confidence interval:
##  0.2245935 0.2977433
## sample estimates:
## mean of x 
## 0.2611684

Vocabulary Test

#color comprehension scores
subj_vocab_comprehension <- d %>%
  distinct(subject,condition,age_group,ppvt) %>%
  mutate(
    percent_ppvt = ppvt / 12
  )

#summarize
summarize_vocab_comprehension <- subj_vocab_comprehension %>%
  group_by(age_group,condition) %>%
  summarize(
    N = n(),
    mean_ppvt = mean(ppvt,na.rm=TRUE),
    mean_percent_ppvt = mean(percent_ppvt,na.rm=TRUE),
    percent_ppvt_ci = qt(0.975, N-1)*sd(percent_ppvt,na.rm=TRUE)/sqrt(N),
    percent_ppvt_lower_ci = mean_percent_ppvt - percent_ppvt_ci,
    percent_ppvt_upper_ci = mean_percent_ppvt + percent_ppvt_ci
  )

Accuracy on the vocabulary test did not differ between High and Low Nameability conditions for either adults or children.

#adults
t.test(filter(subj_vocab_comprehension,age_group=="adult"&condition=="high")$percent_ppvt,filter(subj_vocab_comprehension,age_group=="adult"&condition=="low")$percent_ppvt,var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  filter(subj_vocab_comprehension, age_group == "adult" & condition == "high")$percent_ppvt and filter(subj_vocab_comprehension, age_group == "adult" & condition == "low")$percent_ppvt
## t = 0, df = 88, p-value = 1
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02125711  0.02125711
## sample estimates:
## mean of x mean of y 
## 0.9796296 0.9796296
#kids
t.test(filter(subj_vocab_comprehension,age_group=="kid"&condition=="high")$percent_ppvt,filter(subj_vocab_comprehension,age_group=="kid"&condition=="low")$percent_ppvt,var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  filter(subj_vocab_comprehension, age_group == "kid" & condition == "high")$percent_ppvt and filter(subj_vocab_comprehension, age_group == "kid" & condition == "low")$percent_ppvt
## t = -0.21534, df = 95, p-value = 0.83
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05865504  0.04717545
## sample estimates:
## mean of x mean of y 
## 0.7755102 0.7812500

On average, children scored lower than adults in the task.

#comparison
t.test(filter(subj_vocab_comprehension,age_group=="adult")$percent_ppvt,filter(subj_vocab_comprehension,age_group=="kid")$percent_ppvt,var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  filter(subj_vocab_comprehension, age_group == "adult")$percent_ppvt and filter(subj_vocab_comprehension, age_group == "kid")$percent_ppvt
## t = 13.701, df = 185, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1722968 0.2302614
## sample estimates:
## mean of x mean of y 
## 0.9796296 0.7783505
#adult average performance
t.test(filter(subj_vocab_comprehension,age_group=="adult")$percent_ppvt)
## 
##  One Sample t-test
## 
## data:  filter(subj_vocab_comprehension, age_group == "adult")$percent_ppvt
## t = 184.21, df = 89, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.9690626 0.9901966
## sample estimates:
## mean of x 
## 0.9796296
#kids average performance
t.test(filter(subj_vocab_comprehension,age_group=="kid")$percent_ppvt)
## 
##  One Sample t-test
## 
## data:  filter(subj_vocab_comprehension, age_group == "kid")$percent_ppvt
## t = 58.699, df = 96, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.7520296 0.8046715
## sample estimates:
## mean of x 
## 0.7783505

Relation between category learning and color word knowledge

To investigate whether variability in participants’ knowledge of low nameability color words predicted category learning accuracy, we fit a linear model, separately for adults and for children, predicting overall category learning accuracy from low nameability color comprehension, condition (centered; high=0.5, low=-0.5), and their interaction.

Adults

#summarize accuracy
subj_accuracy <- d %>%
  filter(trial_kind=="learn") %>%
  group_by(subject,age_group,age_m,age_c,condition,condition_c,color_ppvt_high,color_ppvt_low) %>%
  dplyr::summarize(
    training_accuracy=mean(is_right,na.rm=TRUE),
    percent_color_ppvt_high = mean(color_ppvt_high) / 6,
    percent_color_ppvt_low = mean(color_ppvt_low) / 6,
    percent_ppvt = mean(ppvt) / 12
  )

#fit interaction model
m <- lm(training_accuracy ~ condition_c * percent_color_ppvt_low,data=filter(subj_accuracy, age_group=="adult"))
summary(m)
## 
## Call:
## lm(formula = training_accuracy ~ condition_c * percent_color_ppvt_low, 
##     data = filter(subj_accuracy, age_group == "adult"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25334 -0.03819  0.02095  0.04213  0.15118 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         0.89882    0.02052  43.794   <2e-16 ***
## condition_c                         0.10001    0.04105   2.436   0.0169 *  
## percent_color_ppvt_low              0.03511    0.03771   0.931   0.3545    
## condition_c:percent_color_ppvt_low -0.03406    0.07542  -0.452   0.6526    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08101 on 86 degrees of freedom
## Multiple R-squared:  0.2314, Adjusted R-squared:  0.2046 
## F-statistic: 8.631 on 3 and 86 DF,  p-value: 4.527e-05

Children

#fit interaction model
m <- lm(training_accuracy ~ condition_c * color_ppvt_low,data=filter(subj_accuracy, age_group=="kid"))
summary(m)
## 
## Call:
## lm(formula = training_accuracy ~ condition_c * color_ppvt_low, 
##     data = filter(subj_accuracy, age_group == "kid"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23621 -0.10748 -0.02415  0.08485  0.32553 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 0.65553    0.02474  26.493  < 2e-16 ***
## condition_c                 0.14425    0.04949   2.915  0.00446 ** 
## color_ppvt_low              0.01663    0.01311   1.268  0.20785    
## condition_c:color_ppvt_low -0.05781    0.02622  -2.205  0.02993 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1378 on 93 degrees of freedom
## Multiple R-squared:  0.103,  Adjusted R-squared:  0.07406 
## F-statistic: 3.559 on 3 and 93 DF,  p-value: 0.01724
confint(m)
##                                   2.5 %      97.5 %
## (Intercept)                 0.606389420  0.70466062
## condition_c                 0.045977937  0.24252034
## color_ppvt_low             -0.009405936  0.04266085
## condition_c:color_ppvt_low -0.109877163 -0.00574359

Low nameability color knowledge predicted accurary in the low nameability condition.

#recenter to predict effect of low nameability color knowledge in low nameability condition
subj_accuracy <- subj_accuracy %>%
  mutate(
    condition_low = condition_c + 0.5,
    condition_high = condition_c - 0.5
  ) %>%
  ungroup() %>%
  mutate(color_ppvt_low_c=color_ppvt_low-mean(color_ppvt_low,na.rm=TRUE),
         percent_ppvt_c=percent_ppvt-mean(percent_ppvt,na.rm=TRUE))
m <- lm(training_accuracy ~ condition_low * color_ppvt_low,data=filter(subj_accuracy, age_group=="kid"))
summary(m)
## 
## Call:
## lm(formula = training_accuracy ~ condition_low * color_ppvt_low, 
##     data = filter(subj_accuracy, age_group == "kid"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23621 -0.10748 -0.02415  0.08485  0.32553 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   0.58340    0.03480  16.764  < 2e-16 ***
## condition_low                 0.14425    0.04949   2.915  0.00446 ** 
## color_ppvt_low                0.04553    0.01713   2.657  0.00927 ** 
## condition_low:color_ppvt_low -0.05781    0.02622  -2.205  0.02993 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1378 on 93 degrees of freedom
## Multiple R-squared:  0.103,  Adjusted R-squared:  0.07406 
## F-statistic: 3.559 on 3 and 93 DF,  p-value: 0.01724
confint(m)
##                                    2.5 %      97.5 %
## (Intercept)                   0.51429315  0.65250776
## condition_low                 0.04597794  0.24252034
## color_ppvt_low                0.01150656  0.07955873
## condition_low:color_ppvt_low -0.10987716 -0.00574359

There was no effect of low nameability color knowledge in the high nameability condition.

#no effect of low nameability color knowledge in the high nameability condition
m <- lm(training_accuracy ~ condition_high * color_ppvt_low,data=filter(subj_accuracy, age_group=="kid"))
summary(m)
## 
## Call:
## lm(formula = training_accuracy ~ condition_high * color_ppvt_low, 
##     data = filter(subj_accuracy, age_group == "kid"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23621 -0.10748 -0.02415  0.08485  0.32553 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.72765    0.03518  20.682  < 2e-16 ***
## condition_high                 0.14425    0.04949   2.915  0.00446 ** 
## color_ppvt_low                -0.01228    0.01985  -0.619  0.53766    
## condition_high:color_ppvt_low -0.05781    0.02622  -2.205  0.02993 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1378 on 93 degrees of freedom
## Multiple R-squared:  0.103,  Adjusted R-squared:  0.07406 
## F-statistic: 3.559 on 3 and 93 DF,  p-value: 0.01724

Qualitatively equivalent results were obtained when controlling for (non-color) vocabulary knowledge and participant age.

m <- lm(training_accuracy ~ condition_c * color_ppvt_low*age_c+percent_ppvt,data=filter(subj_accuracy, age_group=="kid"))
summary(m)
## 
## Call:
## lm(formula = training_accuracy ~ condition_c * color_ppvt_low * 
##     age_c + percent_ppvt, data = filter(subj_accuracy, age_group == 
##     "kid"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26448 -0.09415 -0.01617  0.08270  0.31158 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       0.6512752  0.0871508   7.473 5.75e-11 ***
## condition_c                       0.1350071  0.0496514   2.719   0.0079 ** 
## color_ppvt_low                    0.0142645  0.0131133   1.088   0.2797    
## age_c                             0.0042929  0.0035527   1.208   0.2302    
## percent_ppvt                      0.0138131  0.1095594   0.126   0.9000    
## condition_c:color_ppvt_low       -0.0519919  0.0261441  -1.989   0.0499 *  
## condition_c:age_c                 0.0074234  0.0069948   1.061   0.2915    
## color_ppvt_low:age_c              0.0008444  0.0016308   0.518   0.6059    
## condition_c:color_ppvt_low:age_c -0.0032009  0.0032593  -0.982   0.3288    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1342 on 87 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2019, Adjusted R-squared:  0.1285 
## F-statistic: 2.751 on 8 and 87 DF,  p-value: 0.009332
confint(m, method="Wald")
##                                         2.5 %        97.5 %
## (Intercept)                       0.478053537  8.244969e-01
## condition_c                       0.036319490  2.336946e-01
## color_ppvt_low                   -0.011799644  4.032861e-02
## age_c                            -0.002768447  1.135422e-02
## percent_ppvt                     -0.203947994  2.315742e-01
## condition_c:color_ppvt_low       -0.103956130 -2.764225e-05
## condition_c:age_c                -0.006479558  2.132639e-02
## color_ppvt_low:age_c             -0.002396889  4.085710e-03
## condition_c:color_ppvt_low:age_c -0.009679121  3.277224e-03

Plot

subj_accuracy <- subj_accuracy %>%
  mutate(age_group_clean = case_when(
    age_group == "adult" ~ "adults",
    age_group == "kid" ~ "children"))
ggplot(subj_accuracy,aes(color_ppvt_low,training_accuracy, color=condition))+
  geom_jitter(width=0.1,height=0.02)+
  geom_smooth(method="lm")+
  ylab("Overall Categorization Accuracy")+
  scale_color_brewer(palette="Set1",name="Nameability")+
  xlab("Comprehension of harder to name color words")+
  facet_wrap(~age_group_clean)+
  theme(legend.position=c(0.3,0.3), plot.background = element_rect(fill="white",color="white"))+
  geom_hline(yintercept=0.5, linetype="dashed",size=1)

ggsave(here::here(figure_path,"kids_adults_low_color_ppvt.png"),width=7, height=5,dpi=600)

Additional test: Color Naming Length

subj_summarized_naming_length_wide <- subj_summarized_naming_length %>%
  pivot_wider(names_from = condition,values_from = avg_length) %>%
  rename(high_naming_length=high,low_naming_length=low)
subj_accuracy <- subj_accuracy %>%
  left_join(subj_summarized_naming_length_wide)
subj_accuracy <- subj_accuracy %>%
  group_by(age_group) %>%
  mutate(high_naming_length_c=high_naming_length-mean(high_naming_length,na.rm=TRUE),
         low_naming_length_c=low_naming_length-mean(low_naming_length,na.rm=TRUE))

Children

m <- lm(training_accuracy ~ condition_low * low_naming_length,data=filter(subj_accuracy, age_group=="kid"))
summary(m)
## 
## Call:
## lm(formula = training_accuracy ~ condition_low * low_naming_length, 
##     data = filter(subj_accuracy, age_group == "kid"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23154 -0.10126 -0.03123  0.09928  0.33627 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      0.625212   0.061665  10.139   <2e-16 ***
## condition_low                    0.072673   0.090478   0.803    0.424    
## low_naming_length                0.004445   0.007580   0.586    0.559    
## condition_low:low_naming_length -0.002936   0.011096  -0.265    0.792    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1429 on 93 degrees of freedom
## Multiple R-squared:  0.03512,    Adjusted R-squared:  0.003993 
## F-statistic: 1.128 on 3 and 93 DF,  p-value: 0.3418
confint(m)
##                                       2.5 %     97.5 %
## (Intercept)                      0.50275635 0.74766724
## condition_low                   -0.10699882 0.25234515
## low_naming_length               -0.01060764 0.01949713
## condition_low:low_naming_length -0.02497131 0.01909873
cor.test(filter(subj_accuracy,condition=="low"&age_group=="kid")$training_accuracy,filter(subj_accuracy,condition=="low"&age_group=="kid")$low_naming_length)
## 
##  Pearson's product-moment correlation
## 
## data:  filter(subj_accuracy, condition == "low" & age_group == "kid")$training_accuracy and filter(subj_accuracy, condition == "low" & age_group == "kid")$low_naming_length
## t = 0.5531, df = 46, p-value = 0.5829
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2076506  0.3571658
## sample estimates:
##        cor 
## 0.08127963

Adults

m <- lm(training_accuracy ~ condition_low * low_naming_length,data=filter(subj_accuracy, age_group=="adult"))
summary(m)
## 
## Call:
## lm(formula = training_accuracy ~ condition_low * low_naming_length, 
##     data = filter(subj_accuracy, age_group == "adult"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24251 -0.03466  0.01655  0.04457  0.14454 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      0.786419   0.056810  13.843   <2e-16 ***
## condition_low                    0.189434   0.077901   2.432   0.0171 *  
## low_naming_length                0.010329   0.006614   1.562   0.1220    
## condition_low:low_naming_length -0.012381   0.008985  -1.378   0.1718    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0803 on 86 degrees of freedom
## Multiple R-squared:  0.2448, Adjusted R-squared:  0.2184 
## F-statistic: 9.291 on 3 and 86 DF,  p-value: 2.184e-05
confint(m)
##                                        2.5 %      97.5 %
## (Intercept)                      0.673485685 0.899352811
## condition_low                    0.034571431 0.344297079
## low_naming_length               -0.002818978 0.023477825
## condition_low:low_naming_length -0.030242750 0.005481175
cor.test(filter(subj_accuracy,condition=="low"&age_group=="adult")$training_accuracy,filter(subj_accuracy,condition=="low"&age_group=="adult")$low_naming_length)
## 
##  Pearson's product-moment correlation
## 
## data:  filter(subj_accuracy, condition == "low" & age_group == "adult")$training_accuracy and filter(subj_accuracy, condition == "low" & age_group == "adult")$low_naming_length
## t = 1.1777, df = 43, p-value = 0.2454
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1231532  0.4471053
## sample estimates:
##      cor 
## 0.176771

Plot

subj_accuracy <- subj_accuracy %>%
  mutate(age_group_clean = case_when(
    age_group == "adult" ~ "adults",
    age_group == "kid" ~ "children"))
ggplot(subj_accuracy,aes(low_naming_length,training_accuracy, color=condition))+
  geom_jitter(width=0.1,height=0.02)+
  geom_smooth(method="lm")+
  ylab("Overall Categorization Accuracy")+
  scale_color_brewer(palette="Set1",name="Nameability")+
  xlab("Naming Length for Low Nameability Colors")+
  facet_wrap(~age_group_clean)+
  theme(legend.position=c(0.3,0.3), plot.background = element_rect(fill="white",color="white"))+
  geom_hline(yintercept=0.5, linetype="dashed",size=1)

Generalization Phase

Adults

Accuracy by each stimulus type

adult_test_summarized <- adult_subj %>%
  group_by(condition) %>%
  summarize(
    N=n(),
    avg_test_prototype = mean(mean_test_accuracy_prototype),
    test_prototype_ci = qt(0.975, N-1)*sd(mean_test_accuracy_prototype,na.rm=TRUE)/sqrt(N),
    test_prototype_lower_ci = avg_test_prototype - test_prototype_ci,
    test_prototype_upper_ci = avg_test_prototype + test_prototype_ci,
    avg_test_novel = mean(mean_test_accuracy_novel),
    test_novel_ci = qt(0.975, N-1)*sd(mean_test_accuracy_novel,na.rm=TRUE)/sqrt(N),
    test_novel_lower_ci = avg_test_novel - test_novel_ci,
    test_novel_upper_ci = avg_test_novel + test_novel_ci,
  )
adult_test_summarized %>%
  select(-test_prototype_ci,-test_novel_ci) %>%
  mutate(
    prototype_ci = str_c("[",round(test_prototype_lower_ci,3),", ", round(test_prototype_upper_ci,3),"]"),
    novel_ci = str_c("[",round(test_novel_lower_ci,3),", ", round(test_novel_upper_ci,3),"]")
  ) %>%
  select(condition,N,avg_test_prototype,prototype_ci,avg_test_novel,novel_ci) %>%
  kable(col.names=c("Condition", "N", "Average Accuracy Prototype","Prototype CI", "Average Accuracy Novel","Novel CI"),digits=3)
Condition N Average Accuracy Prototype Prototype CI Average Accuracy Novel Novel CI
high 45 1.000 [1, 1] 0.694 [0.563, 0.826]
low 45 0.978 [0.951, 1.005] 0.744 [0.63, 0.859]

We also investigated the proportion of participants who sorted the novel generalization exemplars into one category or the other, i.e. consistently according to the 100% predictive color feature, or consistently in accordance with the 75% predictive color features.

#proportion of consistent categorizers
adult_consistent_count <- adult_subj %>%
  group_by(condition) %>%
  summarize(count_consistent=sum(mean_test_accuracy_novel==1 |mean_test_accuracy_novel==0),count_inconsistent=sum(mean_test_accuracy_novel!=1 &mean_test_accuracy_novel!=0), count_consistent_percent=count_consistent/(count_consistent+count_inconsistent))
chisq.test(as.matrix(select(adult_consistent_count, c(count_consistent,count_inconsistent))))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  as.matrix(select(adult_consistent_count, c(count_consistent,     count_inconsistent)))
## X-squared = 1.9003, df = 1, p-value = 0.168

Main Model

To test for differences between condition, we fit a logistic mixed-effects model predicting trial-by-trial accuracy from condition (centered) while controlling for stimulus type (centered). We included a by-subject random intercept and a by-subject random slope for stimulus type.

m=glmer(is_right ~condition_c+stimulus_test_type_c+(1+stimulus_test_type_c|subject), data=filter(d, age_group=="adult" & trial_kind=="test"), family=binomial, 
  glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## is_right ~ condition_c + stimulus_test_type_c + (1 + stimulus_test_type_c |  
##     subject)
##    Data: filter(d, age_group == "adult" & trial_kind == "test")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    269.5    297.0   -128.8    257.5      714 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.90550  0.00504  0.00690  0.01181  1.67149 
## 
## Random effects:
##  Groups  Name                 Variance Std.Dev. Corr 
##  subject (Intercept)           55.46    7.447        
##          stimulus_test_type_c 260.53   16.141   -0.47
## Number of obs: 720, groups:  subject, 90
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            9.3631     1.3621   6.874 6.23e-12 ***
## condition_c            0.1538     1.3242   0.116    0.908    
## stimulus_test_type_c   1.3186     2.6992   0.489    0.625    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_
## condition_c -0.123       
## stmls_tst__  0.660 -0.162
confint(m, method="Wald")[4:6,]
##                          2.5 %    97.5 %
## (Intercept)           6.693492 12.032664
## condition_c          -2.441601  2.749120
## stimulus_test_type_c -3.971702  6.608805

Children

Accuracy by each stimulus type

kid_test_summarized <- kid_subj %>%
  group_by(condition) %>%
  summarize(
    N=n(),
    avg_test_prototype = mean(mean_test_accuracy_prototype),
    test_prototype_ci = qt(0.975, N-1)*sd(mean_test_accuracy_prototype,na.rm=TRUE)/sqrt(N),
    test_prototype_lower_ci = avg_test_prototype - test_prototype_ci,
    test_prototype_upper_ci = avg_test_prototype + test_prototype_ci,
    avg_test_novel = mean(mean_test_accuracy_novel),
    test_novel_ci = qt(0.975, N-1)*sd(mean_test_accuracy_novel,na.rm=TRUE)/sqrt(N),
    test_novel_lower_ci = avg_test_novel - test_novel_ci,
    test_novel_upper_ci = avg_test_novel + test_novel_ci,
  )
kid_test_summarized %>%
  select(-test_prototype_ci,-test_novel_ci) %>%
  mutate(
    prototype_ci = str_c("[",round(test_prototype_lower_ci,3),", ", round(test_prototype_upper_ci,3),"]"),
    novel_ci = str_c("[",round(test_novel_lower_ci,3),", ", round(test_novel_upper_ci,3),"]")
  ) %>%
  select(condition,N,avg_test_prototype,prototype_ci,avg_test_novel,novel_ci) %>%
  kable(col.names=c("Condition", "N", "Average Accuracy Prototype","Prototype CI", "Average Accuracy Novel","Novel CI"),digits=3)
Condition N Average Accuracy Prototype Prototype CI Average Accuracy Novel Novel CI
high 49 0.694 [0.608, 0.78] 0.622 [0.521, 0.724]
low 48 0.656 [0.568, 0.744] 0.578 [0.487, 0.669]

We also investigated the proportion of participants who sorted the novel generalization exemplars into one category or the other, i.e. consistently according to the 100% predictive color feature, or consistently in accordance with the 75% predictive color features.

#proportion of consistent categorizers
kid_consistent_count <- kid_subj %>%
  group_by(condition) %>%
  summarize(count_consistent=sum(mean_test_accuracy_novel==1 |mean_test_accuracy_novel==0),count_inconsistent=sum(mean_test_accuracy_novel!=1 &mean_test_accuracy_novel!=0), count_consistent_percent=count_consistent/(count_consistent+count_inconsistent))
chisq.test(as.matrix(select(kid_consistent_count, c(count_consistent,count_inconsistent))))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  as.matrix(select(kid_consistent_count, c(count_consistent, count_inconsistent)))
## X-squared = 3.2064, df = 1, p-value = 0.07335

Main Model

To test for differences between condition, we fit a logistic mixed-effects model predicting trial-by-trial accuracy from condition (centered) while controlling for stimulus type (centered). We included a by-subject random intercept and a by-subject random slope for stimulus type.

m=glmer(is_right ~condition_c+stimulus_test_type_c+(1+stimulus_test_type_c|subject), data=filter(d, age_group=="kid" & trial_kind=="test"), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## is_right ~ condition_c + stimulus_test_type_c + (1 + stimulus_test_type_c |  
##     subject)
##    Data: filter(d, age_group == "kid" & trial_kind == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##    972.8   1000.8   -480.4    960.8      770 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7238 -0.7941  0.4202  0.6244  1.3015 
## 
## Random effects:
##  Groups  Name                 Variance Std.Dev. Corr 
##  subject (Intercept)          0.7141   0.845         
##          stimulus_test_type_c 3.3492   1.830    -0.19
## Number of obs: 776, groups:  subject, 97
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            0.7467     0.1262   5.917 3.29e-09 ***
## condition_c            0.2362     0.2427   0.973    0.330    
## stimulus_test_type_c   0.3716     0.2622   1.417    0.156    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_
## condition_c  0.018       
## stmls_tst__ -0.057  0.004
confint(m, method="Wald")[4:6,]
##                           2.5 %    97.5 %
## (Intercept)           0.4993655 0.9941046
## condition_c          -0.2395276 0.7119437
## stimulus_test_type_c -0.1423476 0.8856098

Qualitatively similar results were obtained in a model additionally controlling for the effect of participant age.

#controlling for age
m=glmer(is_right ~condition_c+stimulus_test_type_c+age_c+(1+stimulus_test_type_c|subject), data=filter(d, age_group=="kid" & trial_kind=="test"), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c + stimulus_test_type_c + age_c + (1 +  
##     stimulus_test_type_c | subject)
##    Data: filter(d, age_group == "kid" & trial_kind == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##    952.6    985.1   -469.3    938.6      761 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2458 -0.8144  0.4131  0.6081  1.4270 
## 
## Random effects:
##  Groups  Name                 Variance Std.Dev. Corr 
##  subject (Intercept)          0.6132   0.7831        
##          stimulus_test_type_c 3.4399   1.8547   -0.25
## Number of obs: 768, groups:  subject, 96
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           0.80053    0.12737   6.285 3.28e-10 ***
## condition_c           0.17626    0.23767   0.742  0.45833    
## stimulus_test_type_c  0.35579    0.27062   1.315  0.18861    
## age_c                 0.05237    0.01726   3.035  0.00241 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_ stm___
## condition_c  0.006              
## stmls_tst__ -0.077  0.000       
## age_c        0.174 -0.036  0.011
confint(m, method="Wald")[4:7,]
##                            2.5 %     97.5 %
## (Intercept)           0.55088806 1.05016475
## condition_c          -0.28957430 0.64208713
## stimulus_test_type_c -0.17461774 0.88619765
## age_c                 0.01854667 0.08620298

Children vs. Adults

Main Model

Next, we tested for differences between children and adults by fitting a logistic mixed-effects model predicting trial-by-trial accuracy from condition, stimulus type, age group (centered; children vs. adults) and the 2-way interactions between condition and age group as well as stimulus type and age group. We included a by-subject random intercept and a by-subject random slope for stimulus test type.

#full model - does not converge
m <- glmer(
  is_right~(condition_c+stimulus_test_type_c)*age_group_c+(1+stimulus_test_type_c|subject), 
  data=filter(d, trial_kind=="test"),
  family=binomial,
  glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ (condition_c + stimulus_test_type_c) * age_group_c +  
##     (1 + stimulus_test_type_c | subject)
##    Data: filter(d, trial_kind == "test")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1303.8   1351.6   -642.9   1285.8     1487 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9331 -0.3284  0.1853  0.3837  1.5383 
## 
## Random effects:
##  Groups  Name                 Variance Std.Dev. Corr 
##  subject (Intercept)          2.035    1.427         
##          stimulus_test_type_c 9.375    3.062    -0.64
## Number of obs: 1496, groups:  subject, 187
## 
## Fixed effects:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       2.30450    0.21607  10.665  < 2e-16 ***
## condition_c                       0.29525    0.34591   0.854 0.393362    
## stimulus_test_type_c              1.55156    0.43676   3.552 0.000382 ***
## age_group_c                       2.83932    0.39241   7.236 4.64e-13 ***
## condition_c:age_group_c           0.05875    0.69225   0.085 0.932364    
## stimulus_test_type_c:age_group_c  2.73960    0.79674   3.438 0.000585 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_ stm___ ag_gr_ cn_:__
## condition_c  0.115                            
## stmls_tst__  0.132  0.003                     
## age_group_c  0.600  0.108  0.327              
## cndtn_c:g__  0.090  0.604 -0.008  0.103       
## stmls___:__  0.330  0.035  0.573  0.135  0.024
#Anova(m, type="III") # chi-squared test yields virtually identical results
confint(m, method="Wald")[4:9,]
##                                       2.5 %    97.5 %
## (Intercept)                       1.8810070 2.7279967
## condition_c                      -0.3827240 0.9732161
## stimulus_test_type_c              0.6955247 2.4075945
## age_group_c                       2.0702035 3.6084267
## condition_c:age_group_c          -1.2980295 1.4155325
## stimulus_test_type_c:age_group_c  1.1780082 4.3011823

We also compared adults and children in the consistency with which they sorted novel items into one category or the other. Adults were far more likely to consistently assign a given 2-color difference item to a given category in both the High Nameability condition and the Low Nameability condition.

###adults vs. kids 
adult_consistent_count$ageGroup <- "adults"
kid_consistent_count$ageGroup <- "kids"
consistent_count <- bind_rows(kid_consistent_count, adult_consistent_count)
#high
chisq.test(as.matrix(select(filter(consistent_count,condition=="high"), c(count_consistent,count_inconsistent))))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  as.matrix(select(filter(consistent_count, condition == "high"),     c(count_consistent, count_inconsistent)))
## X-squared = 15.407, df = 1, p-value = 8.668e-05
#low
chisq.test(as.matrix(select(filter(consistent_count,condition=="low"), c(count_consistent,count_inconsistent))))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  as.matrix(select(filter(consistent_count, condition == "low"),     c(count_consistent, count_inconsistent)))
## X-squared = 18.199, df = 1, p-value = 1.99e-05

Plot

kid_test_long <- kid_subj %>%
  pivot_longer(
    cols=c(mean_test_accuracy_prototype, mean_test_accuracy_novel),
    names_to="stimulus_type",
    names_prefix="mean_test_accuracy_",
    values_to="accuracy")

adult_test_long <- adult_subj %>%
  pivot_longer(
    cols=c(mean_test_accuracy_prototype, mean_test_accuracy_novel),
    names_to="stimulus_type",
    names_prefix="mean_test_accuracy_",
    values_to="accuracy")

p1 <- ggplot(kid_test_long,aes(stimulus_type,accuracy,fill=condition,color=condition))+
  geom_hline(yintercept=0.5, linetype="dashed",size=1)+
  geom_pirate(jitter_width=0.2,points_params=list(size=1.8,shape=21,alpha=0.9),violins_params=list(width=0.4))+
  scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") +
  facet_wrap(~condition)+
  ylab("Accuracy")+
  scale_x_discrete(name="Stimulus Type",
                   limits=c("prototype","novel"))+
  ggtitle("Children")+
  theme(plot.title = element_text(hjust = 0.5,size=24), plot.background = element_rect(fill="white",color="white"))
p2 <- ggplot(adult_test_long,aes(stimulus_type,accuracy,fill=condition,color=condition))+
  geom_hline(yintercept=0.5, linetype="dashed",size=1)+
  geom_pirate(jitter_width=0.2,points_params=list(size=1.8,shape=21,alpha=0.9),violins_params=list(width=0.4))+
  scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") +
  facet_wrap(~condition)+
  ylab("Accuracy")+
  scale_x_discrete(name="Stimulus Type",
                   limits=c("prototype","novel"))+
  ggtitle("Adults")+
  theme(plot.title = element_text(hjust = 0.5,size=24), plot.background = element_rect(fill="white",color="white"))
plot_grid(p2,p1,labels=c("A","B"),label_size=18)

ggsave(here::here("figures","generalization_accuracy.png"),width=12, height=6,dpi=600)

Relation between generalization phase accuracy and color word knowledge

In order to investigate the effect of low nameability color word comprehension on final generaliztion accuracy, we fit linear models, separately for adult participants and for child participants, predicting accuracy during the generalization phase from the interaction between low nameability color comprehension and condition.

Adults

#summarize accuracy
subj_test_accuracy <- d %>%
  filter(trial_kind=="test") %>%
  group_by(subject,age_group,age_m,age_c,condition,condition_c,color_ppvt_high,color_ppvt_low) %>%
  dplyr::summarize(
    test_accuracy=mean(is_right,na.rm=TRUE),
    percent_color_ppvt_high = mean(color_ppvt_high) / 6,
    percent_color_ppvt_low = mean(color_ppvt_low) / 6,
    percent_ppvt = mean(ppvt) / 12
  )

m <- lm(test_accuracy ~ condition_c * percent_color_ppvt_low,data=filter(subj_test_accuracy, age_group=="adult"))
summary(m)
## 
## Call:
## lm(formula = test_accuracy ~ condition_c * percent_color_ppvt_low, 
##     data = filter(subj_test_accuracy, age_group == "adult"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4929 -0.2089  0.1308  0.1540  0.2341 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         0.79438    0.05314  14.948   <2e-16 ***
## condition_c                         0.05695    0.10628   0.536    0.593    
## percent_color_ppvt_low              0.12518    0.09764   1.282    0.203    
## condition_c:percent_color_ppvt_low -0.15767    0.19528  -0.807    0.422    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2098 on 86 degrees of freedom
## Multiple R-squared:  0.02549,    Adjusted R-squared:  -0.008508 
## F-statistic: 0.7497 on 3 and 86 DF,  p-value: 0.5255

Children

m <- lm(test_accuracy ~ condition_c * percent_color_ppvt_low,data=filter(subj_test_accuracy, age_group=="kid"))
summary(m)
## 
## Call:
## lm(formula = test_accuracy ~ condition_c * percent_color_ppvt_low, 
##     data = filter(subj_test_accuracy, age_group == "kid"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44285 -0.16924  0.00296  0.14699  0.41825 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         0.63730    0.03996  15.947   <2e-16 ***
## condition_c                         0.11110    0.07993   1.390    0.168    
## percent_color_ppvt_low             -0.00702    0.12704  -0.055    0.956    
## condition_c:percent_color_ppvt_low -0.26920    0.25408  -1.059    0.292    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2225 on 93 degrees of freedom
## Multiple R-squared:  0.02058,    Adjusted R-squared:  -0.01101 
## F-statistic: 0.6515 on 3 and 93 DF,  p-value: 0.584

Session Info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.6.8
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] effectsize_0.8.3 ggpirate_0.1.2   janitor_2.2.0    lmerTest_3.1-3  
##  [5] car_3.1-1        carData_3.0-5    lme4_1.1-31      Matrix_1.5-1    
##  [9] cowplot_1.1.1    forcats_0.5.2    stringr_1.5.0    purrr_1.0.1     
## [13] readr_2.1.3      tidyr_1.3.0      tibble_3.2.1     ggplot2_3.4.2   
## [17] tidyverse_1.3.2  dplyr_1.1.2      plyr_1.8.8       here_1.0.1      
## [21] knitr_1.41      
## 
## loaded via a namespace (and not attached):
##  [1] TH.data_1.1-2       googledrive_2.0.0   minqa_1.2.5        
##  [4] colorspace_2.0-3    ellipsis_0.3.2      rprojroot_2.0.3    
##  [7] estimability_1.4.1  snakecase_0.11.0    parameters_0.20.2  
## [10] fs_1.5.2            rstudioapi_0.14     farver_2.1.1       
## [13] fansi_1.0.3         mvtnorm_1.1-3       lubridate_1.9.0    
## [16] xml2_1.3.3          codetools_0.2-18    splines_4.2.2      
## [19] cachem_1.0.6        jsonlite_1.8.4      nloptr_2.0.3       
## [22] broom_1.0.4         dbplyr_2.2.1        compiler_4.2.2     
## [25] httr_1.4.4          emmeans_1.8.4-1     backports_1.4.1    
## [28] assertthat_0.2.1    fastmap_1.1.0       gargle_1.2.1       
## [31] cli_3.6.1           htmltools_0.5.4     tools_4.2.2        
## [34] coda_0.19-4         gtable_0.3.1        glue_1.6.2         
## [37] Rcpp_1.0.9          cellranger_1.1.0    jquerylib_0.1.4    
## [40] vctrs_0.6.2         nlme_3.1-160        insight_0.19.0     
## [43] xfun_0.36           rvest_1.0.3         timechange_0.1.1   
## [46] lifecycle_1.0.3     googlesheets4_1.0.1 MASS_7.3-58.1      
## [49] zoo_1.8-11          scales_1.2.1        ragg_1.2.5         
## [52] hms_1.1.2           sandwich_3.0-2      RColorBrewer_1.1-3 
## [55] yaml_2.3.6          sass_0.4.4          stringi_1.7.8      
## [58] highr_0.10          bayestestR_0.13.0   boot_1.3-28        
## [61] systemfonts_1.0.4   rlang_1.1.0         pkgconfig_2.0.3    
## [64] evaluate_0.19       lattice_0.20-45     labeling_0.4.2     
## [67] tidyselect_1.2.0    magrittr_2.0.3      R6_2.5.1           
## [70] generics_0.1.3      multcomp_1.4-23     DBI_1.1.3          
## [73] mgcv_1.8-41         pillar_1.9.0        haven_2.5.1        
## [76] withr_2.5.0         survival_3.4-0      datawizard_0.6.5   
## [79] abind_1.4-5         modelr_0.1.10       crayon_1.5.2       
## [82] utf8_1.2.2          tzdb_0.3.0          rmarkdown_2.19     
## [85] grid_4.2.2          readxl_1.4.1        reprex_2.0.2       
## [88] digest_0.6.31       xtable_1.8-4        numDeriv_2016.8-1.1
## [91] textshaping_0.3.6   munsell_0.5.0       bslib_0.4.2