High arousing words tend to be less frequent in children’s text; the opposite is true for the adult corpus. These corpora also differ on the written vs. spoken dimension.
RATING_RATINGS_PATH <- here("data/processed/words/gender_ratings_mean.csv")
mean_rating_no_sense <- read_csv(RATING_RATINGS_PATH) %>%
mutate(word = map_chr(word, ~unlist(str_split(.x, " "))[[1]])) %>%
group_by(word) %>%
summarize(gender = mean(mean))
# tasa norms
TASA_PATH <- here("data/raw/other_norms/TASA formatted.txt")
tasa_norms <- read_tsv(TASA_PATH) %>%
clean_names()
tasa_norms_tidy <- tasa_norms %>%
mutate(log_tasa_freq = log(all)) %>%
select(word, log_tasa_freq)
SUBTLEXUS_PATH <- here("data/raw/other_norms/SUBTLEXus_corpus.txt")
subtlexus_norms <- read_tsv(SUBTLEXUS_PATH) %>%
clean_names() %>%
select(word, lg10wf) %>%
rename(log_subt_freq = lg10wf)
# valence and arousal norms
EMOT_PATH <- here("data/raw/other_norms/BRM-emot-submit.csv")
emot_norms <- read_csv(EMOT_PATH) %>%
clean_names()
emot_norms_tidy <- emot_norms %>%
select(word, v_mean_sum, a_mean_sum) %>%
rename(valence = v_mean_sum,
arousal = a_mean_sum)
# conc norms
CONC_PATH <- here("data/raw/other_norms/brysbaert_corpus.csv")
conc_norms <- read_csv(CONC_PATH) %>%
clean_names()
conc_norms_tidy <- conc_norms %>%
select(word, conc_m) %>%
rename(concreteness = conc_m)
# aoa norms
AOA_PATH <- here("data/raw/other_norms/AoA_ratings_Kuperman_et_al_BRM.csv")
aoa_norms <- read_csv(AOA_PATH) %>%
clean_names()
aoa_norms_tidy <- aoa_norms %>%
select(word, rating_mean) %>%
rename(adult_aoa = rating_mean)
all_words_with_norms <- list(mean_rating_no_sense %>% select(word, gender),
tasa_norms_tidy,
subtlexus_norms,
emot_norms_tidy,
conc_norms_tidy,
aoa_norms_tidy) %>%
reduce(left_join, by = "word")
all_words_with_norms_no_na <- all_words_with_norms %>%
drop_na()
ggplot(all_words_with_norms_no_na,
aes(x = log_tasa_freq, y = arousal)) +
geom_point(size = .4) +
geom_smooth(method = "lm") +
#geom_smooth(color = "red") +
theme_classic()
ggplot(all_words_with_norms_no_na,
aes(x = log_tasa_freq, y = arousal)) +
geom_text(aes(label = word), size = 1.2) +
geom_smooth(method = "lm") +
theme_classic()
ggplot(all_words_with_norms_no_na,
aes(x = log_subt_freq, y = arousal)) +
geom_point(size = .2) +
geom_smooth(method = "lm") +
#geom_smooth(color = "red") +
theme_classic()
ggplot(all_words_with_norms_no_na,
aes(x = log_subt_freq, y = arousal)) +
geom_text(aes(label = word), size = 1) +
geom_smooth(method = "lm") +
theme_classic()