For each age (12-144 months), calculate proportion of children who produce word.
Remaining questions:
DT::datatable(number_of_kids_1month, caption = "One-month age bins")
DT::datatable(number_of_kids_3month, caption = "Three-month age bins")
ggplot(nkids_3month_samplewords, aes(x=age_group, y=prop_say))+
geom_point()+
stat_smooth(method="glm", method.args=list(family="binomial"), se=TRUE)+
scale_x_continuous(breaks=c(seq(0,144,12)))+
theme_classic()+
facet_wrap(~as.factor(word))
For each age (12-144 months), calculate frequency of word production controlling for transcript length.
1. Group all data by transcript
2. Transcript length = number of child-produced words in transcript
3. Relative word frequency = N times word produced / N words in transcript
4. Calculated mean relative word frequency for each word in each age bin (1-month or 3-month)
ggplot(frequency_3month_samplewords, aes(x=age_group, y=mean_proportion_frequency))+
geom_point()+
stat_smooth(method="glm", method.args=list(family="binomial"), se=TRUE)+
scale_x_continuous(breaks=c(seq(0,144,12)))+
theme_classic()+
facet_wrap(~as.factor(word))
ggplot(frequency_3month_samplewords, aes(x=age_group, y=log_mean_freq))+
geom_point()+
scale_x_continuous(breaks=c(seq(0,144,12)))+
theme_classic()+
facet_wrap(~as.factor(word))
estimate_childes_aoa <- function(df, proportion){
df %>%
group_by(word) %>%
filter(prop_say >= proportion) %>%
filter(age_group == min(age_group)) %>%
ungroup() %>%
arrange(age_group) %>%
mutate(raw_rank_childes = seq(1, nrow(.))) %>%
group_by(age_group) %>%
mutate(averaged_rank_childes = mean(raw_rank_childes))
}
strictest_aoa_words <- estimate_childes_aoa(nkids_3months, .75)
middle_aoa_words <- estimate_childes_aoa(nkids_3months, .6)
lenient_aoa_words <- estimate_childes_aoa(nkids_3months, .5)
spearman_corr_df <- cdi %>%
left_join(kuperman, by="word") %>%
left_join(lenient_aoa_words, by="word")
CDI vs. Kuperman
cor.test(spearman_corr_df$averaged_rank_cdi, spearman_corr_df$averaged_rank_kuperman, method="spearman")
##
## Spearman's rank correlation rho
##
## data: spearman_corr_df$averaged_rank_cdi and spearman_corr_df$averaged_rank_kuperman
## S = 4739399, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5813061
#rho = .581, p < .001
CHILDES vs. Kuperman
cor.test(spearman_corr_df$averaged_rank_childes, spearman_corr_df$averaged_rank_kuperman, method="spearman")
##
## Spearman's rank correlation rho
##
## data: spearman_corr_df$averaged_rank_childes and spearman_corr_df$averaged_rank_kuperman
## S = 69889, p-value = 8.222e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4050951
#rho = .41, p < .001
CHILDES vs. CDI
cor.test(spearman_corr_df$averaged_rank_cdi, spearman_corr_df$averaged_rank_childes, method="spearman")
##
## Spearman's rank correlation rho
##
## data: spearman_corr_df$averaged_rank_cdi and spearman_corr_df$averaged_rank_childes
## S = 99201, p-value = 0.1454
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1555928
#rho = .15, p = .14