Do kids with different perceptual abilities learn words in a different order?

ATYPICAL_CDI_PATH <- here("data/word_level_CDI/")
files <- list.files(ATYPICAL_CDI_PATH, full.names = T)

hi_wg <- read_csv(files[1])  %>%
  mutate(subj_id = paste0("hi_", 1:n()),
         group = "hi",
         form = "WG") %>%
  pivot_longer(cols = 6:494, 
               names_to = "item") %>%
  clean_names() %>%
   select(-contains("words_understood"),
                  -contains("gestures_percentile"))

vi_wg <- read_csv(files[3])  %>%
  mutate(subj_id = paste0("vi_", 1:n()),
         group = "vi",
         form = "WG") %>%
  pivot_longer(cols = 6:494, 
               names_to = "item") %>%
  clean_names() %>%
   select(-contains("words_understood"),
          -contains("gestures_percentile"))

wg_data <- bind_rows(hi_wg, vi_wg) %>%
  select(subj_id, group, age, sex, form, item, value)


hi_ws <- read_csv(files[2]) %>%
    mutate(subj_id = paste0("hi_", 1:n()),
         group = "hi",
         form = "WS") %>%
  pivot_longer(cols = 3:802, 
               names_to = "item") %>%
  clean_names()%>%
  select(-contains("total_produced"))


vi_ws <- read_csv(files[4])  %>%
    mutate(subj_id = paste0("vi_", 1:n()),
         group = "vi",
         form = "WS") %>%
  pivot_longer(cols = 3:802, 
               names_to = "item") %>%
  clean_names() %>%
  select(-contains("total_produced"))

ws_data <- bind_rows(hi_ws, vi_ws) %>%
  select(subj_id, group, age, sex, form, item, value)

atypical_cdi_data_raw <- bind_rows(wg_data, ws_data) %>%
  filter(value %in% c(NA, "understands", "never", "produces", "often", "not yet", "sometimes", "yes", "no")) %>%
  mutate(value = case_when(value %in% c("never", "no", "not yet", NA) ~ 0,
                           value %in% c("often", "produces", "sometimes", "understands", "yes") ~ 1))
CDI_DICT <-  here("exploratory_analyses/03_aoa_norms/wordbank_dict.csv")
cdi_dict <- read_csv(CDI_DICT)
atypical_cdi_data <- atypical_cdi_data_raw %>%
  left_join(cdi_dict) %>%
  filter(type == "word",
         lexical_category %in% c("nouns", "predicates")) %>%
  select(-type, -complexity_category, -num_item_id, -lexical_class) %>%
  mutate(uni_lemma_clean =  map_chr(uni_lemma, ~ str_split(., " \\(")[[1]][1]))

Predicting known words with perceptual predictors for atypical kids

nouns and predicates only

CONC_PATH <- here("data/brysbaert_concreteness.csv")
conc_data <- read_csv(CONC_PATH) %>%
  clean_names() %>%
  select(word, conc_m)

SENSORY_PATH <- here("data/Lancaster_sensorimotor_norms_for_39707_words.csv")
sensory_data <- read_csv(SENSORY_PATH) %>%
  clean_names() %>%
  select(1:7) %>%
  mutate(word = tolower(word))

atypical_with_norms <- atypical_cdi_data %>%
  left_join(sensory_data, by = c("uni_lemma_clean"= "word")) %>%
  left_join(conc_data, by = c("uni_lemma_clean"= "word")) 

means_by_group <- atypical_with_norms %>%
  pivot_longer(cols = 14:20, values_to = "norm_value") %>%
  filter(value == 1) %>%
  #group_by(subj_id, group, form, name, value) %>%
#  summarize(norm_value = mean(norm_value, na.rm = T)) %>%
  group_by(group, form, name, value) %>%
  multi_boot_standard(col = "norm_value", na.rm = T)

ggplot(means_by_group, aes(x = name, y = mean, color = group, group = group)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  facet_wrap(form~value) +
  theme(axis.text.x = element_text(angle = 90)) 

Additive model with all predictors:

glmer(value ~ group + age + form +sex + lexical_category + auditory_mean + gustatory_mean + haptic_mean + interoceptive_mean + olfactory_mean + visual_mean + (1|uni_lemma_clean) + (1|subj_id),
      data  = atypical_with_norms) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: value ~ group + age + form + sex + lexical_category + auditory_mean +  
##     gustatory_mean + haptic_mean + interoceptive_mean + olfactory_mean +  
##     visual_mean + (1 | uni_lemma_clean) + (1 | subj_id)
##    Data: atypical_with_norms
## 
## REML criterion at convergence: 2992.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7834 -0.5422 -0.1094  0.1935  3.7708 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  uni_lemma_clean (Intercept) 0.006919 0.08318 
##  subj_id         (Intercept) 0.072169 0.26864 
##  Residual                    0.077579 0.27853 
## Number of obs: 8494, groups:  uni_lemma_clean, 448; subj_id, 16
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                 0.708559   0.122562   5.781
## groupvi                     0.124697   0.138949   0.897
## age                        -0.051612   0.002679 -19.262
## formWS                      0.590264   0.024793  23.808
## sexMale                    -0.292960   0.012064 -24.284
## lexical_categorypredicates -0.022703   0.014045  -1.616
## auditory_mean               0.012139   0.004707   2.579
## gustatory_mean              0.007839   0.005427   1.444
## haptic_mean                 0.011208   0.004824   2.323
## interoceptive_mean          0.016997   0.006749   2.519
## olfactory_mean              0.002437   0.007187   0.339
## visual_mean                 0.006643   0.007974   0.833
## 
## Correlation of Fixed Effects:
##             (Intr) groupv age    formWS sexMal lxcl_c adtry_ gsttr_ hptc_m
## groupvi     -0.714                                                        
## age         -0.301  0.018                                                 
## formWS       0.255 -0.009 -0.942                                          
## sexMale     -0.174  0.003  0.364 -0.237                                   
## lxcl_ctgryp -0.127  0.000  0.000 -0.012  0.000                            
## auditory_mn -0.109  0.000  0.000  0.010  0.000  0.048                     
## gustatry_mn -0.061  0.000  0.000 -0.011  0.000  0.109  0.211              
## haptic_mean -0.118  0.000  0.000  0.005  0.000  0.224  0.265  0.073       
## intrcptv_mn -0.112  0.000  0.000  0.021  0.000 -0.391 -0.016 -0.083  0.014
## olfactry_mn  0.011  0.000  0.000  0.008  0.000  0.097 -0.117 -0.725 -0.066
## visual_mean -0.282  0.000  0.000  0.015  0.000  0.313  0.110  0.186  0.004
##             intrc_ olfct_
## groupvi                  
## age                      
## formWS                   
## sexMale                  
## lxcl_ctgryp              
## auditory_mn              
## gustatry_mn              
## haptic_mean              
## intrcptv_mn              
## olfactry_mn  0.004       
## visual_mean  0.303 -0.119

Interaction model with group type for WS form:

glmer(value ~  age  + sex + lexical_category + auditory_mean*group +  visual_mean*group + (1|uni_lemma_clean) + (1|subj_id), 
      data  = atypical_with_norms %>%  filter(form == "WS")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: value ~ age + sex + lexical_category + auditory_mean * group +  
##     visual_mean * group + (1 | uni_lemma_clean) + (1 | subj_id)
##    Data: atypical_with_norms %>% filter(form == "WS")
## 
## REML criterion at convergence: 1535.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3585 -0.3965 -0.0584  0.2471  3.5813 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  uni_lemma_clean (Intercept) 0.009043 0.09509 
##  subj_id         (Intercept) 0.056254 0.23718 
##  Residual                    0.074546 0.27303 
## Number of obs: 4490, groups:  uni_lemma_clean, 438; subj_id, 10
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                -1.173458   1.134488  -1.034
## age                         0.058178   0.050834   1.144
## sexMale                    -0.164382   0.161938  -1.015
## lexical_categorypredicates -0.041247   0.014601  -2.825
## auditory_mean               0.011940   0.006174   1.934
## groupvi                     0.594412   0.167101   3.557
## visual_mean                 0.012429   0.009663   1.286
## auditory_mean:groupvi       0.006689   0.007378   0.907
## groupvi:visual_mean        -0.032726   0.010295  -3.179
## 
## Correlation of Fixed Effects:
##             (Intr) age    sexMal lxcl_c adtry_ groupv vsl_mn adtr_:
## age         -0.993                                                 
## sexMale     -0.276  0.209                                          
## lxcl_ctgryp -0.019  0.000  0.000                                   
## auditory_mn -0.011  0.000  0.000 -0.061                            
## groupvi     -0.238  0.203 -0.189  0.000  0.047                     
## visual_mean -0.035  0.000  0.000  0.458  0.123  0.102              
## adtry_mn:gr  0.006  0.000  0.000  0.000 -0.478 -0.099 -0.072       
## grpv:vsl_mn  0.014  0.000  0.000  0.000 -0.080 -0.239 -0.426  0.168
glmer(value ~  age  + sex + lexical_category + auditory_mean*group +  visual_mean*group + gustatory_mean*group  +  interoceptive_mean*group + haptic_mean*group + olfactory_mean*group + (1|subj_id), 
      data  = atypical_with_norms %>%  filter(form == "WS")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: value ~ age + sex + lexical_category + auditory_mean * group +  
##     visual_mean * group + gustatory_mean * group + interoceptive_mean *  
##     group + haptic_mean * group + olfactory_mean * group + (1 |      subj_id)
##    Data: atypical_with_norms %>% filter(form == "WS")
## 
## REML criterion at convergence: 1688.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2404 -0.2892 -0.0948  0.1148  3.5968 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj_id  (Intercept) 0.05624  0.2371  
##  Residual             0.08233  0.2869  
## Number of obs: 4490, groups:  subj_id, 10
## 
## Fixed effects:
##                              Estimate Std. Error t value
## (Intercept)                -1.2100727  1.1344396  -1.067
## age                         0.0581785  0.0508343   1.144
## sexMale                    -0.1643819  0.1619385  -1.015
## lexical_categorypredicates -0.0287414  0.0123194  -2.333
## auditory_mean               0.0137413  0.0051636   2.661
## groupvi                     0.4431705  0.1720879   2.575
## visual_mean                 0.0149765  0.0083467   1.794
## gustatory_mean             -0.0007099  0.0058634  -0.121
## interoceptive_mean          0.0016919  0.0071493   0.237
## haptic_mean                 0.0082815  0.0052491   1.578
## olfactory_mean              0.0002176  0.0079417   0.027
## auditory_mean:groupvi       0.0154581  0.0081599   1.894
## groupvi:visual_mean        -0.0204475  0.0127743  -1.601
## groupvi:gustatory_mean      0.0124043  0.0092136   1.346
## groupvi:interoceptive_mean  0.0295629  0.0106789   2.768
## groupvi:haptic_mean         0.0121849  0.0081416   1.497
## groupvi:olfactory_mean      0.0275351  0.0125408   2.196

Interaction model with group type for WG form:

glmer(value ~  age  + sex + lexical_category + auditory_mean*group +  visual_mean*group + (1|uni_lemma_clean) + (1|subj_id),
      data  = atypical_with_norms %>%  filter(form == "WG")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: value ~ age + sex + lexical_category + auditory_mean * group +  
##     visual_mean * group + (1 | uni_lemma_clean) + (1 | subj_id)
##    Data: atypical_with_norms %>% filter(form == "WG")
## 
## REML criterion at convergence: -28.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1247 -0.4227 -0.1037  0.1513  4.0767 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  uni_lemma_clean (Intercept) 0.008016 0.08953 
##  subj_id         (Intercept) 0.005546 0.07447 
##  Residual                    0.052323 0.22874 
## Number of obs: 4004, groups:  uni_lemma_clean, 278; subj_id, 14
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                -0.286072   0.115072  -2.486
## age                         0.023118   0.006607   3.499
## sexMale                     0.036356   0.049725   0.731
## lexical_categorypredicates -0.010466   0.015999  -0.654
## auditory_mean               0.017609   0.007594   2.319
## groupvi                     0.130002   0.062390   2.084
## visual_mean                 0.019797   0.013017   1.521
## auditory_mean:groupvi      -0.029155   0.007039  -4.142
## groupvi:visual_mean        -0.037407   0.010914  -3.428
## 
## Correlation of Fixed Effects:
##             (Intr) age    sexMal lxcl_c adtry_ groupv vsl_mn adtr_:
## age         -0.756                                                 
## sexMale     -0.409  0.119                                          
## lxcl_ctgryp -0.228  0.000  0.000                                   
## auditory_mn -0.086  0.000  0.000 -0.030                            
## groupvi     -0.432  0.080 -0.030  0.000  0.105                     
## visual_mean -0.451  0.000  0.000  0.429 -0.020  0.402              
## adtry_mn:gr  0.062  0.000  0.000  0.000 -0.662 -0.159  0.005       
## grpv:vsl_mn  0.260  0.000  0.000  0.000  0.005 -0.671 -0.599 -0.008

Interaction models with group type with conc:

glmer(value ~  age  + sex +  lexical_category + conc_m*group + (1|uni_lemma_clean) + (1|subj_id), 
      data  = atypical_with_norms %>%  filter(form == "WS")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: value ~ age + sex + lexical_category + conc_m * group + (1 |  
##     uni_lemma_clean) + (1 | subj_id)
##    Data: atypical_with_norms %>% filter(form == "WS")
## 
## REML criterion at convergence: 1523.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2620 -0.4017 -0.0427  0.2409  3.4654 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  uni_lemma_clean (Intercept) 0.009139 0.0956  
##  subj_id         (Intercept) 0.056255 0.2372  
##  Residual                    0.074563 0.2731  
## Number of obs: 4490, groups:  uni_lemma_clean, 438; subj_id, 10
## 
## Fixed effects:
##                            Estimate Std. Error t value
## (Intercept)                -1.25891    1.13539  -1.109
## age                         0.05818    0.05083   1.144
## sexMale                    -0.16438    0.16194  -1.015
## lexical_categorypredicates -0.01323    0.01984  -0.667
## conc_m                      0.03164    0.01265   2.502
## groupvi                     0.63664    0.16799   3.790
## conc_m:groupvi             -0.03557    0.01026  -3.468
## 
## Correlation of Fixed Effects:
##             (Intr) age    sexMal lxcl_c conc_m groupv
## age         -0.992                                   
## sexMale     -0.276  0.209                            
## lxcl_ctgryp -0.042  0.000  0.000                     
## conc_m      -0.053  0.000  0.000  0.735              
## groupvi     -0.237  0.202 -0.188  0.000  0.086       
## conc_m:grpv  0.016  0.000  0.000  0.000 -0.324 -0.266
glmer(value ~  age  + sex + lexical_category + conc_m*group + (1|uni_lemma_clean) + (1|subj_id), 
      data  = atypical_with_norms %>%  filter(form == "WG")) %>%
  summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: value ~ age + sex + lexical_category + conc_m * group + (1 |  
##     uni_lemma_clean) + (1 | subj_id)
##    Data: atypical_with_norms %>% filter(form == "WG")
## 
## REML criterion at convergence: -25.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1378 -0.4390 -0.0683  0.1659  4.1557 
## 
## Random effects:
##  Groups          Name        Variance Std.Dev.
##  uni_lemma_clean (Intercept) 0.007757 0.08808 
##  subj_id         (Intercept) 0.005545 0.07446 
##  Residual                    0.052661 0.22948 
## Number of obs: 4004, groups:  uni_lemma_clean, 278; subj_id, 14
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                -0.383649   0.123744  -3.100
## age                         0.023118   0.006607   3.499
## sexMale                     0.036356   0.049727   0.731
## lexical_categorypredicates  0.026115   0.021000   1.244
## conc_m                      0.042481   0.014806   2.869
## groupvi                     0.054181   0.064062   0.846
## conc_m:groupvi             -0.024975   0.010287  -2.428
## 
## Correlation of Fixed Effects:
##             (Intr) age    sexMal lxcl_c conc_m groupv
## age         -0.703                                   
## sexMale     -0.381  0.119                            
## lxcl_ctgryp -0.408  0.000  0.000                     
## conc_m      -0.565  0.000  0.000  0.669              
## groupvi     -0.410  0.078 -0.029  0.000  0.352       
## conc_m:grpv  0.262  0.000  0.000  0.000 -0.496 -0.710

Match with typically developing kids from wordbank

matched on age and sex

atypical_ws_demographics <- atypical_with_norms %>%
  filter(value == 1) %>%
  count(subj_id, group, age, sex, form, name = "total") %>%
  filter(form == "WS")

# should also match on total N words?
english_ws_admins <- get_administration_data("English (American)", "WS") %>%
  select(data_id,  age, sex)

typical_matches <- atypical_ws_demographics %>%
  left_join(english_ws_admins)

english_ws_data <- get_instrument_data("English (American)", "WS")

typical_data <- english_ws_data %>%
  filter(data_id %in% typical_matches$data_id) %>%
  left_join(typical_matches %>% select(data_id, group)) %>%
  mutate(group = paste0("T", group),
         value = case_when(value %in% c("never", "no", "not yet", NA, "") ~ 0,
                             value %in% c("often", "produces", "sometimes", "understands", "yes") ~ 1)) %>%
  left_join(cdi_dict %>% filter(form == "WS")) 


clean_uni_lemmas <- typical_data %>%
  distinct(num_item_id, uni_lemma)  %>%
  mutate(uni_lemma_clean =  map_chr(uni_lemma, ~ str_split(., " \\(")[[1]][1]))

typical_data_with_norms <- typical_data %>%
  left_join(clean_uni_lemmas) %>%
  left_join(sensory_data, by = c("uni_lemma_clean"= "word")) %>%
  left_join(conc_data, by = c("uni_lemma_clean"= "word"))  %>%
    filter(type == "word",
         lexical_category %in% c("nouns", "predicates")) 

means_by_group_typical <- typical_data_with_norms %>%
  filter(value == 1) %>%
  pivot_longer(cols = 16:22, values_to = "norm_value") %>%
  group_by(group, form, name, value) %>%
  multi_boot_standard(col = "norm_value", na.rm = T)

means_by_group_typical %>%
  bind_rows(means_by_group) %>%
  ggplot(aes(x = name, y = mean, color = group, group = group)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), position = position_dodge(0.3)) +
  facet_wrap(form~.) +
  theme(axis.text.x = element_text(angle = 90))