Formula: total ~ known + prop_known
MODEL_FORMULA <- "total ~ known + prop_known"
# coefficient function
get_word_beta <- function(word, mod_formula, df, min_age, max_age){
relevant_df <- df %>%
filter(item == word) %>%
filter(childage_days >= min_age & childage_days < max_age)
model <- lm(mod_formula, relevant_df)
summary(model)$coefficients %>%
data.frame() %>%
rownames_to_column("predictor") %>%
filter(predictor == 'known') %>%
mutate(item = word)
}
ache2 <- get_word_beta('ache',as.formula(MODEL_FORMULA),seed_words, min_age=1080, max_age=1260)
song <- get_word_beta('song', as.formula(MODEL_FORMULA), seed_words, min_age=1260, max_age=1440)
#### DO THE THINGS ####
word_coeffs_3036 <- map_df(paste("",unique(seed_words$item),"",sep=""),
get_word_beta,
as.formula(MODEL_FORMULA),
seed_words,
min_age=900,
max_age=1080) %>%
select(item,Estimate,SE=Std..Error, tval=t.value, p=Pr...t..) %>%
arrange(tval) %>%
mutate(ageGroup = "youngest")
word_coeffs_3642 <- map_df(paste("",unique(seed_words$item),"",sep=""),
get_word_beta,
as.formula(MODEL_FORMULA),
seed_words,
min_age=1080,
max_age=1260) %>%
select(item,Estimate,SE=Std..Error, tval=t.value, p=Pr...t..) %>%
arrange(tval) %>%
mutate(ageGroup = "middle")
word_coeffs_4248 <- map_df(paste("",unique(seed_words$item),"",sep=""),
get_word_beta,
as.formula(MODEL_FORMULA),
seed_words,
min_age=1260,
max_age=1440) %>%
select(item,Estimate,SE=Std..Error, tval=t.value, p=Pr...t..) %>%
arrange(tval) %>%
mutate(ageGroup = "oldest")
word_coeffs_by_age <- bind_rows(word_coeffs_3036,word_coeffs_3642,word_coeffs_4248)
ggplot(word_coeffs_by_age, aes(tval, fill=ageGroup))+
geom_density(alpha = .2)+
theme_classic()
categories <- read.csv("C:/Users/Christina/Dropbox/UW Madison/Grant/AOA parent survey/survey_words_categories_2.csv") %>%
rename(item = word) %>%
select(-MCDI_Cat)
word_coeffs_with_cat <- left_join(word_coeffs_by_age, categories, by="item") %>%
mutate(ageGroup_num = ifelse(ageGroup=="youngest", 1, ifelse(ageGroup=="middle",2,3)))
## Warning: Column `item` joining character vector and factor, coercing into
## character vector
word_coeffs_with_cat$CatName <- as.factor(word_coeffs_with_cat$CatName)
DT::datatable(word_coeffs_with_cat)
ggplot(filter(word_coeffs_with_cat, !(is.na(CatName))), aes(tval, fill = CatName))+
geom_density(alpha = .5)+
facet_wrap(~CatName)+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "action words"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "games and routines"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "toys"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "food and drink"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "outside things"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "body parts"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "descriptive words"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "places to go"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "small household items"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "mental states and attributes"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "people"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "words about time"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "furniture and rooms"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
ggplot(filter(word_coeffs_with_cat, CatName == "quantifiers and articles"), aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
note: some words disappear at the oldest or youngest age group; this is because there is no variance in knowledge (i.e. all children know it, or no children know it)
ggplot(helpful_seed, aes(ageGroup_num, tval, color=item))+
geom_line()+
geom_point()+
scale_y_continuous(limits=c(-2,8))+
scale_x_continuous(name="Age group", breaks=c(1,2,3), labels=c("30-36m","36-42m","42-48m"))+
theme_classic()
all_word_info <- read.csv("word_knowledge_by_age_with_info.csv") %>% select(-X)
colnames(all_word_info) <- str_replace_all(colnames(all_word_info), "\\.","-")
DT::datatable(all_word_info)