Childes data

This is for all kids in Childes in the NA and UK corpora for children with more than one timepoint between 12 and 36 months of age. There are two things that are different from the previous analyses: (1) I’m using a different age range of kids, and (2) I’m using the MTLD measure in the childesr package. This measure is new to the package and not well documented.

IND_VARS_NA <-  here("exploratory_analyses/17_taxonomic_childes/data/child_stats_eng_na.csv")
PREDICT_VARS_NA<-  here("exploratory_analyses/17_taxonomic_childes/data/mean_hypernym_eng_na.csv")
IND_VARS_UK <-  here("exploratory_analyses/17_taxonomic_childes/data/child_stats_eng_uk.csv")
PREDICT_VARS_UK<-  here("exploratory_analyses/17_taxonomic_childes/data/mean_hypernym_eng_uk.csv")


independent_vars_na <- read_csv(IND_VARS_NA,
                               col_names = c("corpus", "target_child_name", "target_child_age",
                                      "num_utterances", "mlu_w", "num_types", "num_tokens", "hdd", "mtld", "mlu_m",
                                      "num_morphemes")) %>%
  mutate(collection = "NA")
predictor_vars_na <- read_csv(PREDICT_VARS_NA)

independent_vars_uk <- read_csv(IND_VARS_UK,
                               col_names = c("corpus", "target_child_name", "target_child_age",
                                      "num_utterances", "mlu_w", "num_types", "num_tokens", "hdd", "mtld", "mlu_m",
                                      "num_morphemes")) %>%
  mutate(collection = "UK")
predictor_vars_uk <- read_csv(PREDICT_VARS_UK)


tidy_pred_vars <- bind_rows(predictor_vars_na, predictor_vars_uk)
tidy_ind_vars <- bind_rows(independent_vars_uk, independent_vars_na) %>%
  group_by(collection, target_child_name, target_child_age, corpus) %>% # collapse across multiple transcripts at same age point
  summarize_if(is.numeric, mean)
  

full_df <- inner_join(tidy_ind_vars, tidy_pred_vars)

There are 232 that satisfy this criteria.

multiple_transcript_children <- full_df %>% # (after getting rid of children with multiple trasncripts at same timepoint)
  ungroup() %>%
  count(corpus, target_child_name) %>%
  filter(n > 1) %>%
  select(-n)

# add in session info and child_id
full_df_by_session <- full_df %>%
  right_join(multiple_transcript_children) %>%
  arrange(corpus, target_child_name, target_child_age) %>%
  group_by(corpus, target_child_name) %>%
  mutate(session_num = 1:n(),
         child_id = paste0(corpus, "_", target_child_name)) %>%
  select(child_id, target_child_age, session_num, everything())

full_df_with_delta <- full_df_by_session %>%
  mutate(subsequent_age = lead(target_child_age), by = session_num,
         subsequent_mtld = lead(mtld), by = session_num,
          subsequent_tokens= lead(num_tokens), by = session_num,

         delta_age = subsequent_age - target_child_age,
          delta_mtld = subsequent_mtld - mtld) 

Pairwise correlations

make_corr_plot(tidy_pred_vars %>% select(-target_child_age))

Models predicting vocab size with previous mean vocab hypernym

These models are predicting change in mtld from current timepoint to next timepoint as a function of: (1) child/parents’s mean hypernym level of vocab, (2) change in age from timepoint 1 to 2, (3) mtld at timepoint 1, (4) log number of child tokens at timepoint 1, and (5) log number of child tokens at timepoint2.

MTLD

hypernyms all

CHILD

lmer(delta_mtld  ~ child_hypernyms_all+ delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 3.3226528 0.3906014 8.5065054 fixed
child_hypernyms_all -0.5179360 0.1093310 -4.7373205 fixed
delta_age 0.1943315 0.0250463 7.7588755 fixed
mtld -0.7829746 0.0203930 -38.3942621 fixed
log(num_tokens) 0.2125548 0.0492574 4.3151863 fixed
log(subsequent_tokens) -0.0104818 0.0497629 -0.2106357 fixed
sd_(Intercept).child_id 1.1175458 NA NA child_id
sd_session_num.child_id 0.0685614 NA NA child_id
cor_(Intercept).session_num.child_id -0.2562873 NA NA child_id
sd_Observation.Residual 1.0436554 NA NA Residual
lmer(delta_mtld  ~ child_hyponyms_all + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 2.8501344 0.3769596 7.5608478 fixed
child_hyponyms_all 0.3081902 0.1342489 2.2956618 fixed
delta_age 0.1794414 0.0254646 7.0466910 fixed
mtld -0.7753797 0.0202649 -38.2621426 fixed
log(num_tokens) 0.2425965 0.0488150 4.9697150 fixed
log(subsequent_tokens) 0.0005853 0.0498374 0.0117442 fixed
sd_(Intercept).child_id 1.1659156 NA NA child_id
sd_session_num.child_id 0.0715246 NA NA child_id
cor_(Intercept).session_num.child_id -0.2668122 NA NA child_id
sd_Observation.Residual 1.0446271 NA NA Residual
lmer(delta_mtld  ~ child_hypernyms_all + child_hyponyms_all + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 3.2878750 0.3910386 8.4080579 fixed
child_hypernyms_all -0.4857506 0.1121216 -4.3323534 fixed
child_hyponyms_all 0.1780231 0.1369181 1.3002158 fixed
delta_age 0.1910278 0.0252079 7.5780864 fixed
mtld -0.7826421 0.0203900 -38.3835440 fixed
log(num_tokens) 0.2083489 0.0493616 4.2208717 fixed
log(subsequent_tokens) -0.0127375 0.0497916 -0.2558166 fixed
sd_(Intercept).child_id 1.1156205 NA NA child_id
sd_session_num.child_id 0.0682625 NA NA child_id
cor_(Intercept).session_num.child_id -0.2459639 NA NA child_id
sd_Observation.Residual 1.0435362 NA NA Residual

PARENT

lmer(delta_mtld  ~  parent_hypernyms_all + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control=lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 2.4551272 0.4026144 6.0979615 fixed
parent_hypernyms_all -0.0700735 0.1247350 -0.5617787 fixed
delta_age 0.1890162 0.0244858 7.7194343 fixed
mtld -0.7455593 0.0208114 -35.8245004 fixed
log(num_tokens) 0.2826684 0.0527083 5.3628797 fixed
log(subsequent_tokens) 0.0237697 0.0527309 0.4507745 fixed
sd_(Intercept).child_id 1.0746134 NA NA child_id
sd_session_num.child_id 0.0558001 NA NA child_id
cor_(Intercept).session_num.child_id -0.5392470 NA NA child_id
sd_Observation.Residual 1.0435735 NA NA Residual
lmer(delta_mtld  ~  parent_hyponyms_all + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 2.5123528 0.4128488 6.0854064 fixed
parent_hyponyms_all -0.1919922 0.1822757 -1.0533071 fixed
delta_age 0.1898546 0.0246572 7.6997691 fixed
mtld -0.7525245 0.0208214 -36.1419621 fixed
log(num_tokens) 0.3035777 0.0528207 5.7473232 fixed
log(subsequent_tokens) 0.0166772 0.0529411 0.3150137 fixed
sd_(Intercept).child_id 1.0847746 NA NA child_id
sd_session_num.child_id 0.0547328 NA NA child_id
cor_(Intercept).session_num.child_id -0.5445583 NA NA child_id
sd_Observation.Residual 1.0507541 NA NA Residual
lmer(delta_mtld  ~  parent_hypernyms_all + parent_hyponyms_all + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 2.7010642 0.4128138 6.5430575 fixed
parent_hypernyms_all -0.1184262 0.1261851 -0.9385115 fixed
parent_hyponyms_all -0.4786621 0.1975930 -2.4224644 fixed
delta_age 0.1888548 0.0245421 7.6951475 fixed
mtld -0.7508370 0.0208349 -36.0374596 fixed
log(num_tokens) 0.2845241 0.0526732 5.4016843 fixed
log(subsequent_tokens) 0.0205256 0.0526775 0.3896460 fixed
sd_(Intercept).child_id 1.0887384 NA NA child_id
sd_session_num.child_id 0.0570999 NA NA child_id
cor_(Intercept).session_num.child_id -0.5427898 NA NA child_id
sd_Observation.Residual 1.0412568 NA NA Residual

BOTH

lmer(delta_mtld  ~ child_hypernyms_all + parent_hypernyms_all + child_hyponyms_all + parent_hyponyms_all  +delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control=lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 3.2576311 0.4220577 7.7184501 fixed
child_hypernyms_all -0.6109608 0.1197307 -5.1027909 fixed
parent_hypernyms_all 0.0332598 0.1298652 0.2561103 fixed
child_hyponyms_all 0.2460171 0.1407177 1.7483024 fixed
parent_hyponyms_all -0.4778946 0.2001013 -2.3882636 fixed
delta_age 0.1971178 0.0243072 8.1094516 fixed
mtld -0.7649144 0.0210032 -36.4189948 fixed
log(num_tokens) 0.2227676 0.0535333 4.1612941 fixed
log(subsequent_tokens) 0.0018281 0.0524447 0.0348581 fixed
sd_(Intercept).child_id 1.0126951 NA NA child_id
sd_session_num.child_id 0.0548253 NA NA child_id
cor_(Intercept).session_num.child_id -0.5031373 NA NA child_id
sd_Observation.Residual 1.0377438 NA NA Residual

hypernyms first

CHILD

lmer(delta_mtld  ~ child_hypernyms_first + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 3.2235891 0.3864817 8.3408580 fixed
child_hypernyms_first -0.5229381 0.1156237 -4.5227579 fixed
delta_age 0.1954141 0.0251062 7.7835113 fixed
mtld -0.7815793 0.0203871 -38.3369797 fixed
log(num_tokens) 0.2257742 0.0488960 4.6174382 fixed
log(subsequent_tokens) -0.0092622 0.0497916 -0.1860192 fixed
sd_(Intercept).child_id 1.1219413 NA NA child_id
sd_session_num.child_id 0.0678836 NA NA child_id
cor_(Intercept).session_num.child_id -0.2673652 NA NA child_id
sd_Observation.Residual 1.0442476 NA NA Residual
lmer(delta_mtld  ~ child_hypernyms_first + child_hyponyms_all + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 3.1954904 0.3862055 8.2740685 fixed
child_hypernyms_first -0.4954458 0.1167520 -4.2435753 fixed
child_hyponyms_all 0.2285663 0.1349059 1.6942655 fixed
delta_age 0.1914627 0.0252352 7.5871181 fixed
mtld -0.7813385 0.0203761 -38.3457751 fixed
log(num_tokens) 0.2185597 0.0490708 4.4539703 fixed
log(subsequent_tokens) -0.0126711 0.0498201 -0.2543367 fixed
sd_(Intercept).child_id 1.1166094 NA NA child_id
sd_session_num.child_id 0.0673961 NA NA child_id
cor_(Intercept).session_num.child_id -0.2519085 NA NA child_id
sd_Observation.Residual 1.0440003 NA NA Residual

PARENT

lmer(delta_mtld  ~  parent_hypernyms_first + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control=lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 2.4594054 0.4028852 6.104482 fixed
parent_hypernyms_first -0.0379819 0.1252290 -0.303299 fixed
delta_age 0.1889583 0.0245026 7.711751 fixed
mtld -0.7458942 0.0208189 -35.827676 fixed
log(num_tokens) 0.2823124 0.0527316 5.353765 fixed
log(subsequent_tokens) 0.0243834 0.0527335 0.462389 fixed
sd_(Intercept).child_id 1.0753888 NA NA child_id
sd_session_num.child_id 0.0558765 NA NA child_id
cor_(Intercept).session_num.child_id -0.5394238 NA NA child_id
sd_Observation.Residual 1.0435696 NA NA Residual
lmer(delta_mtld  ~  parent_hypernyms_first + parent_hyponyms_all + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 2.6976939 0.4129212 6.5331929 fixed
parent_hypernyms_first -0.0800480 0.1263789 -0.6333967 fixed
parent_hyponyms_all -0.4667530 0.1971028 -2.3680692 fixed
delta_age 0.1889133 0.0245569 7.6928834 fixed
mtld -0.7509832 0.0208428 -36.0307851 fixed
log(num_tokens) 0.2842725 0.0527005 5.3941180 fixed
log(subsequent_tokens) 0.0213581 0.0526826 0.4054106 fixed
sd_(Intercept).child_id 1.0889180 NA NA child_id
sd_session_num.child_id 0.0571127 NA NA child_id
cor_(Intercept).session_num.child_id -0.5429819 NA NA child_id
sd_Observation.Residual 1.0413706 NA NA Residual

BOTH

lmer(delta_mtld  ~ child_hypernyms_first + parent_hypernyms_first + child_hyponyms_all + parent_hyponyms_all  +delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control=lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 3.1439936 0.4184528 7.5133770 fixed
child_hypernyms_first -0.6173603 0.1262165 -4.8912821 fixed
parent_hypernyms_first 0.0898299 0.1316641 0.6822653 fixed
child_hyponyms_all 0.3136954 0.1382720 2.2686842 fixed
parent_hyponyms_all -0.4646096 0.1998322 -2.3249989 fixed
delta_age 0.1970364 0.0243631 8.0874858 fixed
mtld -0.7640585 0.0210228 -36.3443010 fixed
log(num_tokens) 0.2338906 0.0533420 4.3847400 fixed
log(subsequent_tokens) 0.0035103 0.0524757 0.0668934 fixed
sd_(Intercept).child_id 1.0164440 NA NA child_id
sd_session_num.child_id 0.0543061 NA NA child_id
cor_(Intercept).session_num.child_id -0.5093642 NA NA child_id
sd_Observation.Residual 1.0384655 NA NA Residual

hyponyms leaf

CHILD

lmer(delta_mtld  ~  child_hyponyms_leaf + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 2.8667826 0.3772981 7.5981902 fixed
child_hyponyms_leaf 0.2748196 0.1217628 2.2570081 fixed
delta_age 0.1768955 0.0255840 6.9143075 fixed
mtld -0.7735754 0.0202600 -38.1824003 fixed
log(num_tokens) 0.2415001 0.0489294 4.9356882 fixed
log(subsequent_tokens) 0.0027272 0.0498155 0.0547452 fixed
sd_(Intercept).child_id 1.1705970 NA NA child_id
sd_session_num.child_id 0.0707084 NA NA child_id
cor_(Intercept).session_num.child_id -0.2779667 NA NA child_id
sd_Observation.Residual 1.0448094 NA NA Residual
lmer(delta_mtld  ~ child_hypernyms_all + child_hyponyms_leaf + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 3.3110874 0.3902806 8.4838646 fixed
child_hypernyms_all -0.4988977 0.1098115 -4.5432200 fixed
child_hyponyms_leaf 0.2219834 0.1215583 1.8261476 fixed
delta_age 0.1881750 0.0252821 7.4430112 fixed
mtld -0.7814888 0.0203852 -38.3360132 fixed
log(num_tokens) 0.2036279 0.0495265 4.1114960 fixed
log(subsequent_tokens) -0.0127819 0.0497740 -0.2567987 fixed
sd_(Intercept).child_id 1.1156562 NA NA child_id
sd_session_num.child_id 0.0673405 NA NA child_id
cor_(Intercept).session_num.child_id -0.2492370 NA NA child_id
sd_Observation.Residual 1.0434365 NA NA Residual

PARENT

lmer(delta_mtld  ~  parent_hyponyms_leaf + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 2.4181651 0.4071478 5.9392800 fixed
parent_hyponyms_leaf -0.0169953 0.1389437 -0.1223179 fixed
delta_age 0.1900083 0.0246318 7.7139389 fixed
mtld -0.7495543 0.0208439 -35.9604044 fixed
log(num_tokens) 0.3016714 0.0529134 5.7012280 fixed
log(subsequent_tokens) 0.0182479 0.0529582 0.3445718 fixed
sd_(Intercept).child_id 1.0782239 NA NA child_id
sd_session_num.child_id 0.0542837 NA NA child_id
cor_(Intercept).session_num.child_id -0.5432701 NA NA child_id
sd_Observation.Residual 1.0515024 NA NA Residual
lmer(delta_mtld  ~  parent_hypernyms_all + parent_hyponyms_leaf + delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control = lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 2.6041775 0.4077458 6.3867675 fixed
parent_hypernyms_all -0.0931201 0.1251389 -0.7441342 fixed
parent_hyponyms_leaf -0.3290750 0.1620998 -2.0300772 fixed
delta_age 0.1896830 0.0245327 7.7318395 fixed
mtld -0.7500581 0.0208482 -35.9771450 fixed
log(num_tokens) 0.2851794 0.0527068 5.4106762 fixed
log(subsequent_tokens) 0.0217371 0.0526941 0.4125150 fixed
sd_(Intercept).child_id 1.0848525 NA NA child_id
sd_session_num.child_id 0.0568584 NA NA child_id
cor_(Intercept).session_num.child_id -0.5418679 NA NA child_id
sd_Observation.Residual 1.0419633 NA NA Residual

BOTH

lmer(delta_mtld  ~ child_hypernyms_all+ parent_hypernyms_all + child_hyponyms_leaf + parent_hyponyms_leaf  +delta_age + mtld + log(num_tokens) + log(subsequent_tokens) +
       (session_num|child_id), full_df_with_delta,
     control=lmerControl(optimizer="bobyqa")) %>%
    tidy() %>% 
    kable()
term estimate std.error statistic group
(Intercept) 3.1929657 0.4175598 7.6467268 fixed
child_hypernyms_all -0.6356928 0.1167819 -5.4434172 fixed
parent_hypernyms_all 0.0654549 0.1283361 0.5100273 fixed
child_hyponyms_leaf 0.2670171 0.1242386 2.1492293 fixed
parent_hyponyms_leaf -0.3139801 0.1627750 -1.9289212 fixed
delta_age 0.1951064 0.0243755 8.0041877 fixed
mtld -0.7629508 0.0210071 -36.3187897 fixed
log(num_tokens) 0.2165729 0.0538395 4.0225656 fixed
log(subsequent_tokens) 0.0046007 0.0524090 0.0877838 fixed
sd_(Intercept).child_id 1.0082473 NA NA child_id
sd_session_num.child_id 0.0539129 NA NA child_id
cor_(Intercept).session_num.child_id -0.5039109 NA NA child_id
sd_Observation.Residual 1.0382978 NA NA Residual