SCORE_PATH <- here("analysis/books/adult_comparision/correlation_analysis_2/tidy_gender_scores.csv")
model_biases <- read_csv(SCORE_PATH)
model_scores <- model_biases %>%
group_by(model_type, word) %>%
summarize(male_score = mean(male_score))
# Merge with human data
GENDER_NORMS <- here("data/processed/words/gender_ratings_mean.csv")
gender_words <- read_csv(GENDER_NORMS)
gender_norms <- gender_words %>%
mutate(word = map_chr(word, ~unlist(str_split(.x, " "))[[1]]),
word = tolower(word),
word = str_remove_all(word, '[:punct:]')) %>%
distinct(word, .keep_all = T) %>%
group_by(word) %>%
summarize(human_gender_rating = mean(mean, na.rm = T))
all_scores <- gender_norms %>%
inner_join(model_scores)
all_scores %>%
ggplot(aes( x = male_score,
y = human_gender_rating,
color = model_type)) +
geom_point(size = .5, alpha = .2)+
geom_smooth(method = "lm") +
theme_classic()

all_scores %>%
group_by(model_type) %>%
nest() %>%
mutate(test = map(data, ~ tidy(cor.test(.x$male_score,
.x$human_gender_rating)))) %>%
select(-data) %>%
unnest() %>%
select(1:7) %>%
mutate(sig = case_when(p.value < .05 ~ "*", TRUE ~ "")) %>%
kable()
| coca |
-0.4080415 |
-15.36596 |
0 |
1182 |
-0.4544480 |
-0.3594261 |
* |
| kid |
-0.3612783 |
-12.93749 |
0 |
1115 |
-0.4112195 |
-0.3091748 |
* |
gender_norms %>%
inner_join(model_biases) %>%
group_by(model_type, run) %>%
nest() %>%
mutate(test = map(data, ~ tidy(cor.test(.x$male_score,
.x$human_gender_rating)))) %>%
select(-data) %>%
unnest() %>%
select(1:8) %>%
mutate(sig = case_when(p.value < .05 ~ "*", TRUE ~ "")) %>%
kable()
| coca |
1 |
-0.3005056 |
-9.087952 |
0 |
832 |
-0.3610265 |
-0.2374641 |
* |
| coca |
10 |
-0.3015096 |
-8.949804 |
0 |
801 |
-0.3631195 |
-0.2372746 |
* |
| coca |
2 |
-0.3352277 |
-10.244789 |
0 |
829 |
-0.3942478 |
-0.2734536 |
* |
| coca |
3 |
-0.3960748 |
-12.329411 |
0 |
817 |
-0.4523073 |
-0.3367057 |
* |
| coca |
4 |
-0.2815993 |
-8.383340 |
0 |
816 |
-0.3435154 |
-0.2172458 |
* |
| coca |
5 |
-0.3375960 |
-10.307739 |
0 |
826 |
-0.3966052 |
-0.2758084 |
* |
| coca |
6 |
-0.3766578 |
-11.699999 |
0 |
828 |
-0.4335935 |
-0.3167266 |
* |
| coca |
7 |
-0.3281883 |
-9.894183 |
0 |
811 |
-0.3881862 |
-0.2654202 |
* |
| coca |
8 |
-0.2932725 |
-8.874891 |
0 |
837 |
-0.3539302 |
-0.2301579 |
* |
| coca |
9 |
-0.3610774 |
-11.181957 |
0 |
834 |
-0.4186327 |
-0.3006332 |
* |
| kid |
1 |
-0.3392428 |
-12.041968 |
0 |
1115 |
-0.3901350 |
-0.2862842 |
* |
| kid |
10 |
-0.3244020 |
-11.451617 |
0 |
1115 |
-0.3759045 |
-0.2709014 |
* |
| kid |
2 |
-0.3497329 |
-12.465340 |
0 |
1115 |
-0.4001790 |
-0.2971738 |
* |
| kid |
3 |
-0.3341407 |
-11.837901 |
0 |
1115 |
-0.3852454 |
-0.2809926 |
* |
| kid |
4 |
-0.3497485 |
-12.465973 |
0 |
1115 |
-0.4001939 |
-0.2971900 |
* |
| kid |
5 |
-0.3104181 |
-10.904019 |
0 |
1115 |
-0.3624735 |
-0.2564319 |
* |
| kid |
6 |
-0.3103981 |
-10.903241 |
0 |
1115 |
-0.3624543 |
-0.2564112 |
* |
| kid |
7 |
-0.3521110 |
-12.562052 |
0 |
1115 |
-0.4024543 |
-0.2996444 |
* |
| kid |
8 |
-0.3212276 |
-11.326595 |
0 |
1115 |
-0.3728575 |
-0.2676147 |
* |
| kid |
9 |
-0.3284790 |
-11.612826 |
0 |
1115 |
-0.3798163 |
-0.2751246 |
* |
Same words across all models
common_words <- count(all_scores, word) %>%
filter(n == 2) %>%
pull(word)
all_scores %>%
filter(word %in% common_words) %>%
ggplot(aes( x = male_score,
y = human_gender_rating,
color = model_type)) +
geom_point(size = .5, alpha = .2)+
geom_smooth(method = "lm") +
theme_classic()

all_scores %>%
filter(word %in% common_words) %>%
group_by(model_type) %>%
nest() %>%
mutate(test = map(data, ~ tidy(cor.test(.x$male_score,
.x$human_gender_rating)))) %>%
select(-data) %>%
unnest() %>%
select(1:7) %>%
mutate(sig = case_when(p.value < .05 ~ "*", TRUE ~ "")) %>%
kable()
| coca |
-0.4562223 |
-14.95605 |
0 |
851 |
-0.5077967 |
-0.4013893 |
* |
| kid |
-0.3867795 |
-12.23535 |
0 |
851 |
-0.4424183 |
-0.3281747 |
* |
Middle range only
all_scores %>%
filter(male_score > -.1, male_score < .1) %>%
ggplot(aes( x = male_score,
y = human_gender_rating,
color = model_type)) +
geom_point(size = .5, alpha = .2)+
geom_smooth(method = "lm") +
theme_classic()

all_scores %>%
filter(male_score > -.1, male_score < .1) %>%
group_by(model_type) %>%
nest() %>%
mutate(test = map(data, ~ tidy(cor.test(.x$male_score,
.x$human_gender_rating)))) %>%
select(-data) %>%
unnest() %>%
select(1:7) %>%
mutate(sig = case_when(p.value < .05 ~ "*", TRUE ~ "")) %>%
kable()
| coca |
-0.3142719 |
-11.323480 |
0 |
1170 |
-0.3649659 |
-0.2617199 |
* |
| kid |
-0.2822794 |
-9.763462 |
0 |
1101 |
-0.3357123 |
-0.2270358 |
* |