Age range: 30 - 42

Predicting mtld diff

MODEL: lm(mtld_diff ~ know_word_at_t1 + mtld_t1 + age_t1 + age_diff + log(n_transcripts_t1) + log(n_transcripts_t2), complete_df)

Sanity Check

Do kids who have a high mean t have high mtld diff at? Yes.

Does it hold controlling for stuff? Yes.

## 
## Call:
## lm(formula = mtld_diff ~ mean_t + n_transcripts_t1 + age_diff + 
##     age_t1, data = t1_word_counts_with_ts_mtld)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.92715 -0.16640 -0.02852  0.16386  0.61081 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.6260901  1.4800332   0.423 0.673718    
## mean_t            0.8254313  0.2120500   3.893 0.000242 ***
## n_transcripts_t1 -0.0017707  0.0010229  -1.731 0.088346 .  
## age_diff          0.0021861  0.0009960   2.195 0.031858 *  
## age_t1           -0.0006774  0.0013429  -0.504 0.615744    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2776 on 63 degrees of freedom
## Multiple R-squared:  0.3519, Adjusted R-squared:  0.3108 
## F-statistic: 8.552 on 4 and 63 DF,  p-value: 1.409e-05

Predicting t-value of a word with other measures

freq <- read_csv("/Users/mollylewis/Documents/research/Projects/1_in_progress/VOCAB_SEEDS/analyses/3_kid_vocabs/data/childes_adult_word_freq.csv") %>%
  select(-n)

density_norms <- read_csv("/Users/mollylewis/Documents/research/Projects/1_in_progress/VOCAB_SEEDS/analyses/3_kid_vocabs/data/bills_density_norms.csv")

aoa_norms <- read_csv("/Users/mollylewis/Documents/research/Projects/1_in_progress/next_kids/stimuli_selection/AoA_ratings_Kuperman_et_al_BRM.csv") %>%
  select(Word, Rating.Mean) %>%
  rename(word = Word,
         adult_aoa_estimate = Rating.Mean)

embedding_dist <- read_csv("/Users/mollylewis/Documents/research/Projects/1_in_progress/VOCAB_SEEDS/analyses/6_by_word_analyses/data/wiki_embedding_dist_by_word.csv") %>%
  rename(mean_dist = mean_dist_wiki)

concreteness <- read_csv("/Users/mollylewis/Documents/research/Projects/2_published/ref_complex/corpus/brysbaert_database/brysbaert_corpus.csv") %>%
  rename(word = Word) %>%
  select(word, Conc.M)

concepts <- read_tsv("/Users/mollylewis/Documents/research/Projects/1_in_progress/VOCAB_SEEDS/analyses/3_kid_vocabs/data/CONCS_brm.txt") %>%
  select(Concept, Familiarity, Length_Syllables, Bigram, 14:33) %>%
  mutate(Concept = tolower(Concept),
         Concept = map_chr(Concept, ~ pluck(str_split(., "_"),1,1))) %>%
  rename(word = Concept) %>%
  select(word, Mean_Distinct_No_Tax)

pos <- read_tsv("/Users/mollylewis/Documents/research/Projects/1_in_progress/VOCAB_SEEDS/analyses/3_kid_vocabs/data/SUBTLEX-US\ frequency\ list\ with\ PoS\ information\ text\ version.txt") %>%
  select(Word, Dom_PoS_SUBTLEX) %>%
  rename(word = Word,
         pos = Dom_PoS_SUBTLEX) %>%
  mutate(pos = ifelse(pos == "Verb", "v", "o"))

glasgow <- read_csv("/Users/mollylewis/Documents/research/Projects/1_in_progress/IATLANG/data/study1a/raw/GlasgowNorms.csv") %>%
  select(word, contains("_M")) %>%
  select(-AOA_M, -CNC_M)

ar_va <- read_csv("/Users/mollylewis/Documents/research/Projects/1_in_progress/VOCAB_SEEDS/analyses/6_by_word_analyses/data/Ratings_Warriner_et_al.csv") %>%
  select(Word, V.Mean.Sum, A.Mean.Sum, D.Mean.Sum) %>%
  rename(word = Word)

complexity <- read_csv("/Users/mollylewis/Documents/research/Projects/2_published/ref_complex/corpus/MRC_database/complexity_norms.csv") %>%
  select(word, complexity)

word_coeffs_min5_t2_with_vars <- word_coeffs_min5_t2 %>%
  mutate(word = tolower(word)) %>%
  left_join(density_norms) %>%
  left_join(freq) %>%
  left_join(embedding_dist) %>%
  left_join(concepts) %>%
  left_join(concreteness) %>%
  left_join(aoa_norms) %>%
  left_join(pos) %>%
  left_join(ar_va) %>%
  left_join(glasgow) %>%
  left_join(complexity) %>%
  mutate(word_length = nchar(word)) %>%
  filter(n_know >= 5)