library(knitr)

opts_chunk$set(echo = T, message = F, warning = F, 
               error = F, cache = T, tidy = F)

library(tidyverse)
library(langcog)
library(feather)
library(lme4)

theme_set(theme_classic(base_size = 10))

Hypernym at t1 as predictor of vocab change at t2

Normalize hypernyms by category

Normalize hypernyms by POS

Get hypernym/frequency by kid

Get time point data

Fit models

raw

## Linear mixed model fit by REML ['lmerMod']
## Formula: delta_words_spoken ~ mean_hypernyms + delta_age + words_spoken +  
##     (session_num | child_id)
##    Data: full_df
## 
## REML criterion at convergence: 14641.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.4376 -0.4704 -0.0910  0.3027  8.4565 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  child_id (Intercept)  832.72  28.857        
##           session_num   22.19   4.711   -0.99
##  Residual             1710.75  41.361        
## Number of obs: 1410, groups:  child_id, 224
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)    172.622829  18.777273   9.193
## mean_hypernyms -16.322634   1.943406  -8.399
## delta_age       28.573616   1.434408  19.920
## words_spoken    -0.065999   0.009487  -6.957
## 
## Correlation of Fixed Effects:
##             (Intr) mn_hyp delt_g
## mn_hyprnyms -0.989              
## delta_age   -0.106  0.018       
## words_spokn -0.788  0.726  0.030
## Linear mixed model fit by REML ['lmerMod']
## Formula: delta_words_spoken ~ mean_hypernyms + delta_age + mean_freq +  
##     words_spoken + (session_num | child_id)
##    Data: full_df
## 
## REML criterion at convergence: 14565.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.6865 -0.4549 -0.1072  0.3253  8.2787 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  child_id (Intercept)  655.13  25.596        
##           session_num   16.71   4.087   -1.00
##  Residual             1659.17  40.733        
## Number of obs: 1410, groups:  child_id, 224
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)    436.144058  35.317034  12.349
## mean_hypernyms -13.549667   1.903433  -7.119
## delta_age       28.335029   1.397449  20.276
## mean_freq      -35.904194   4.158070  -8.635
## words_spoken    -0.096033   0.009697  -9.903
## 
## Correlation of Fixed Effects:
##             (Intr) mn_hyp delt_g mn_frq
## mn_hyprnyms -0.343                     
## delta_age   -0.081  0.012              
## mean_freq   -0.859 -0.180  0.031       
## words_spokn -0.684  0.608  0.038  0.355

scaled by category

## Linear mixed model fit by REML ['lmerMod']
## Formula: delta_words_spoken ~ mean_hypernyms_cat_scaled + delta_age +  
##     words_spoken + (session_num | child_id)
##    Data: full_df
## 
## REML criterion at convergence: 14623
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.5325 -0.4715 -0.1077  0.3273  8.2366 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  child_id (Intercept)  891.26  29.854        
##           session_num   24.63   4.963   -1.00
##  Residual             1689.16  41.099        
## Number of obs: 1410, groups:  child_id, 224
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                31.454280   3.194012   9.848
## mean_hypernyms_cat_scaled -65.536412   7.133650  -9.187
## delta_age                  28.390982   1.430550  19.846
## words_spoken               -0.039919   0.007373  -5.414
## 
## Correlation of Fixed Effects:
##             (Intr) mn_h__ delt_g
## mn_hyprny__ -0.514              
## delta_age   -0.528  0.028       
## words_spokn -0.768  0.485  0.034
## convergence code: 0
## boundary (singular) fit: see ?isSingular
## Linear mixed model fit by REML ['lmerMod']
## Formula: delta_words_spoken ~ mean_hypernyms_cat_scaled + delta_age +  
##     mean_freq + words_spoken + (session_num | child_id)
##    Data: full_df
## 
## REML criterion at convergence: 14569.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.7032 -0.4409 -0.1124  0.3313  8.0944 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  child_id (Intercept)  758.81  27.546        
##           session_num   20.27   4.502   -1.00
##  Residual             1650.42  40.625        
## Number of obs: 1410, groups:  child_id, 224
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)               277.045967  34.984817   7.919
## mean_hypernyms_cat_scaled -47.820527   7.436007  -6.431
## delta_age                  28.226577   1.404331  20.100
## mean_freq                 -30.874200   4.382563  -7.045
## words_spoken               -0.066156   0.008054  -8.214
## 
## Correlation of Fixed Effects:
##             (Intr) mn_h__ delt_g mn_frq
## mn_hyprny__  0.303                     
## delta_age   -0.072  0.017              
## mean_freq   -0.996 -0.348  0.025       
## words_spokn -0.512  0.249  0.042  0.453
## convergence code: 0
## boundary (singular) fit: see ?isSingular

scaled by pos

## Linear mixed model fit by REML ['lmerMod']
## Formula: delta_words_spoken ~ mean_hypernyms_pos_scaled + delta_age +  
##     words_spoken + (session_num | child_id)
##    Data: full_df
## 
## REML criterion at convergence: 14626
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.4953 -0.4853 -0.0978  0.3216  8.2689 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  child_id (Intercept)  786.79  28.050        
##           session_num   21.78   4.667   -1.00
##  Residual             1709.44  41.345        
## Number of obs: 1410, groups:  child_id, 224
## 
## Fixed effects:
##                            Estimate Std. Error t value
## (Intercept)                37.87321    3.62096  10.459
## mean_hypernyms_pos_scaled -60.18991    6.58832  -9.136
## delta_age                  28.76300    1.42817  20.140
## words_spoken               -0.05081    0.00804  -6.320
## 
## Correlation of Fixed Effects:
##             (Intr) mn_h__ delt_g
## mn_hyprny__ -0.659              
## delta_age   -0.456  0.007       
## words_spokn -0.811  0.605  0.023
## convergence code: 0
## boundary (singular) fit: see ?isSingular
## Linear mixed model fit by REML ['lmerMod']
## Formula: delta_words_spoken ~ mean_hypernyms_pos_scaled + delta_age +  
##     mean_freq + words_spoken + (session_num | child_id)
##    Data: full_df
## 
## REML criterion at convergence: 14566.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.6817 -0.4663 -0.1044  0.3179  8.1152 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  child_id (Intercept)  661.21  25.71         
##           session_num   17.64   4.20    -1.00
##  Residual             1661.73  40.76         
## Number of obs: 1410, groups:  child_id, 224
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)               293.515790  34.060085   8.618
## mean_hypernyms_pos_scaled -45.915045   6.741945  -6.810
## delta_age                  28.501014   1.398737  20.376
## mean_freq                 -32.253621   4.279563  -7.537
## words_spoken               -0.076954   0.008499  -9.054
## 
## Correlation of Fixed Effects:
##             (Intr) mn_h__ delt_g mn_frq
## mn_hyprny__  0.232                     
## delta_age   -0.080 -0.003              
## mean_freq   -0.995 -0.299  0.033       
## words_spokn -0.468  0.415  0.034  0.393
## convergence code: 0
## boundary (singular) fit: see ?isSingular

Hypernym as predictor of t

Fit models

raw

## 
##  Pearson's product-moment correlation
## 
## data:  word_coeffs_min5_t2_with_vars_childes$t and word_coeffs_min5_t2_with_vars_childes$hypernyms
## t = -1.0453, df = 447, p-value = 0.2965
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1412745  0.0433610
## sample estimates:
##         cor 
## -0.04937859

scaled by categories

## 
##  Pearson's product-moment correlation
## 
## data:  word_coeffs_min5_t2_with_vars_childes$t and word_coeffs_min5_t2_with_vars_childes$hypernyms_scaled_cat
## t = -0.38411, df = 443, p-value = 0.7011
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.11101496  0.07483735
## sample estimates:
##         cor 
## -0.01824642

scaled by pos

## 
##  Pearson's product-moment correlation
## 
## data:  word_coeffs_min5_t2_with_vars_childes$t and word_coeffs_min5_t2_with_vars_childes$hypernyms_scaled_pos
## t = -2.8264, df = 447, p-value = 0.004918
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.22232073 -0.04046012
## sample estimates:
##        cor 
## -0.1325054

Comparing t vs. hypernym as kid-level predictor

Get t by kid

## Linear mixed model fit by REML ['lmerMod']
## Formula: scale(perc_diff) ~ scale(mean_hypernyms) + scale(mean_t_t1) +  
##     scale(age_diff) + scale(perc_t1) + (1 | child_id)
##    Data: .
## 
## REML criterion at convergence: -10378.9
## 
## Scaled residuals: 
##        Min         1Q     Median         3Q        Max 
## -7.777e-05 -1.013e-05  9.740e-07  8.215e-06  7.713e-05 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  child_id (Intercept) 8.197e-02 0.2863106
##  Residual             1.769e-10 0.0000133
## Number of obs: 599, groups:  child_id, 59
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)           -3.718e-02  3.731e-02  -0.996
## scale(mean_hypernyms) -5.719e-11  7.205e-07   0.000
## scale(mean_t_t1)       1.406e-10  7.342e-07   0.000
## scale(age_diff)        2.800e-02  3.634e-02   0.771
## scale(perc_t1)        -3.716e-01  3.724e-02  -9.980
## 
## Correlation of Fixed Effects:
##             (Intr) scl(m_) s(__1) scl(g_)
## scl(mn_hyp)  0.000                       
## scl(mn_t_1)  0.000 -0.318                
## scal(g_dff) -0.038  0.000   0.000        
## scl(prc_t1)  0.025  0.000   0.000  0.014 
## convergence code: 3
## 
## Call:
## lm(formula = scale(perc_diff) ~ scale(mean_hypernyms) + scale(mean_t_t1) + 
##     scale(age_diff) + scale(perc_t1), data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.56806 -0.57689  0.03646  0.45645  2.64890 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -0.002566   0.034228  -0.075  0.94027    
## scale(mean_hypernyms) -0.126250   0.039102  -3.229  0.00131 ** 
## scale(mean_t_t1)       0.389288   0.036342  10.712  < 2e-16 ***
## scale(age_diff)        0.037901   0.034272   1.106  0.26923    
## scale(perc_t1)        -0.374138   0.038931  -9.610  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8377 on 594 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.3052, Adjusted R-squared:  0.3005 
## F-statistic: 65.22 on 4 and 594 DF,  p-value: < 2.2e-16