Individual words as predictors

Get t-values for each word by age group

Formula: total ~ known + prop_known

MODEL_FORMULA <- "total ~ known + prop_known"

# coefficient function
get_word_beta <- function(word, mod_formula, df, min_age, max_age){
  relevant_df <- df %>% 
    filter(item == word) %>% 
    filter(childage_days >= min_age & childage_days < max_age)
  model <- lm(mod_formula, relevant_df)
  summary(model)$coefficients %>%
    data.frame() %>% 
    rownames_to_column("predictor") %>%
    filter(predictor == 'known') %>%
    mutate(item = word)
}

ache2 <- get_word_beta('ache',as.formula(MODEL_FORMULA),seed_words, min_age=1080, max_age=1260)
song <- get_word_beta('song', as.formula(MODEL_FORMULA), seed_words, min_age=1260, max_age=1440)

#### DO THE THINGS ####
word_coeffs_3036 <- map_df(paste("",unique(seed_words$item),"",sep=""), 
                      get_word_beta,
                      as.formula(MODEL_FORMULA),
                      seed_words,
                      min_age=900,
                      max_age=1080) %>% 
  select(item,Estimate,SE=Std..Error, tval=t.value, p=Pr...t..) %>% 
  arrange(tval) %>% 
  mutate(ageGroup = "youngest")

word_coeffs_3642 <- map_df(paste("",unique(seed_words$item),"",sep=""), 
                      get_word_beta,
                      as.formula(MODEL_FORMULA),
                      seed_words,
                      min_age=1080,
                      max_age=1260) %>% 
  select(item,Estimate,SE=Std..Error, tval=t.value, p=Pr...t..) %>% 
  arrange(tval) %>% 
  mutate(ageGroup = "middle")

word_coeffs_4248 <- map_df(paste("",unique(seed_words$item),"",sep=""), 
                      get_word_beta,
                      as.formula(MODEL_FORMULA),
                      seed_words,
                      min_age=1260,
                      max_age=1440) %>% 
  select(item,Estimate,SE=Std..Error, tval=t.value, p=Pr...t..) %>% 
  arrange(tval) %>% 
  mutate(ageGroup = "oldest")

word_coeffs_by_age <- bind_rows(word_coeffs_3036,word_coeffs_3642,word_coeffs_4248)

Plot distribution of t-values

ggplot(word_coeffs_by_age, aes(tval, fill=ageGroup))+
  geom_density(alpha = .2)+
  theme_classic()

Add category info

categories <- read.csv("C:/Users/Christina/Dropbox/UW Madison/Grant/AOA parent survey/survey_words_categories_2.csv") %>% 
  rename(item = word) %>% 
  select(-MCDI_Cat)

word_coeffs_with_cat <- left_join(word_coeffs_by_age, categories, by="item") %>% 
  mutate(ageGroup_num = ifelse(ageGroup=="youngest", 1, ifelse(ageGroup=="middle",2,3)))

## Warning: Column `item` joining character vector and factor, coercing into
## character vector

word_coeffs_with_cat$CatName <- as.factor(word_coeffs_with_cat$CatName)

DT::datatable(word_coeffs_with_cat)

T values by category (seed words)

ggplot(filter(word_coeffs_with_cat, !(is.na(CatName))), aes(tval, fill = CatName))+
  geom_density(alpha = .5)+
  facet_wrap(~CatName)+
  theme_classic()

T values by age and category (seed words)

Action words

ggplot(filter(word_coeffs_with_cat, CatName == "action words"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Games and routines

ggplot(filter(word_coeffs_with_cat, CatName == "games and routines"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Toys

ggplot(filter(word_coeffs_with_cat, CatName == "toys"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Food and drink

ggplot(filter(word_coeffs_with_cat, CatName == "food and drink"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Outside things

ggplot(filter(word_coeffs_with_cat, CatName == "outside things"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Body parts

ggplot(filter(word_coeffs_with_cat, CatName == "body parts"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Descriptive words

ggplot(filter(word_coeffs_with_cat, CatName == "descriptive words"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Places to go

ggplot(filter(word_coeffs_with_cat, CatName == "places to go"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Small household items

ggplot(filter(word_coeffs_with_cat, CatName == "small household items"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Mental states and attributes

ggplot(filter(word_coeffs_with_cat, CatName == "mental states and attributes"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

People

ggplot(filter(word_coeffs_with_cat, CatName == "people"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Words about time

ggplot(filter(word_coeffs_with_cat, CatName == "words about time"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Furniture and rooms

ggplot(filter(word_coeffs_with_cat, CatName == "furniture and rooms"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

Quantifiers and articles

ggplot(filter(word_coeffs_with_cat, CatName == "quantifiers and articles"), aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

note: some words disappear at the oldest or youngest age group; this is because there is no variance in knowledge (i.e. all children know it, or no children know it)

What are the most helpful seed words across ages?

The most helpful and consistent seed words? (N = 24)

ggplot(helpful_seed, aes(ageGroup_num, tval, color=item))+
  geom_line()+
  geom_point()+
  scale_y_continuous(limits=c(-2,8))+
  scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
  theme_classic()

What proportion of older (36-42 months) and younger (30-35.9 months) kids know these words?

Understands

Says

Of these words, which might be good to teach?

Understands - known by <50% of younger group

Understands - biggest change between age groups

Says - known by <50% of younger group

Says - biggest change between age groups

Descriptive info for all words surveyed

all_word_info <- read.csv("word_knowledge_by_age_with_info.csv") %>% select(-X)
colnames(all_word_info) <- str_replace_all(colnames(all_word_info), "\\.","-")
DT::datatable(all_word_info)