Read in model data
This is with 10 runs per model each.
model_biases <- read_csv(here('data/processed/other/tidy_gender_scores_10.csv')) %>%
mutate(window_size = 10) %>%
bind_rows( read_csv(here('data/processed/other/tidy_gender_scores_5.csv')) %>%
mutate(window_size = 5)) %>%
bind_rows( read_csv(here('data/processed/other/tidy_gender_scores_20.csv')) %>%
mutate(window_size = 20))
# bind_rows( read_csv(here('data/processed/other/tidy_gender_scores_40.csv')) %>%
#mutate(window_size = 40))
model_biases %>%
mutate(path_short = basename(path)) %>%
group_by(path_short, window_size, model_type)%>%
multi_boot_standard(col = "male_score") %>%
ggplot(aes(x = path_short, y = mean, color = model_type)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
geom_hline(aes(yintercept = 0), linetype = 2)+
facet_wrap(~window_size) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# take mean across model runs
model_scores <- model_biases %>%
group_by(model_type, window_size, word) %>%
summarize(male_score = mean(male_score))
# female_target = mean(female_target),
# male_target = mean(male_target)) %>%
# mutate(female_target = log(1-female_target),
# male_target = log(1-male_target))
Merge with human data
GENDER_NORMS <- here("data/processed/words/gender_ratings_mean.csv")
gender_norms <- read_csv(GENDER_NORMS) %>%
select(word, mean) %>%
rename(human_gender_rating = mean)
all_scores <- gender_norms %>%
left_join(model_scores)
# filter to only those words that are present in both adult and kid models
good_words <- count(all_scores, word, window_size) %>%
filter(n == 2) %>%
pull(word)
all_scores_tidy <- all_scores %>%
filter(word %in% good_words) %>%
gather("language_measure", "value", -1:-4)
all_scores_tidy %>%
ggplot(aes( x = value,
y = human_gender_rating,
color = model_type)) +
geom_point(size = .5, alpha = .2)+
geom_smooth(method = "lm") +
facet_grid(window_size~language_measure, scale = "free") +
theme_classic()
all_scores_tidy %>%
group_by(model_type, window_size, language_measure) %>%
nest() %>%
mutate(test = map(data, ~ tidy(cor.test(.x$value,
.x$human_gender_rating)))) %>%
select(-data) %>%
unnest() %>%
select(1:7) %>%
mutate(sig = case_when(p.value < .05 ~ "*", TRUE ~ "")) %>%
arrange(window_size, language_measure)%>%
kable()
| model_type | window_size | language_measure | estimate | statistic | p.value | parameter | sig |
|---|---|---|---|---|---|---|---|
| adult | 5 | male_score | -0.0930577 | -2.669845 | 0.0077395 | 816 | * |
| kid | 5 | male_score | -0.1093975 | -3.143888 | 0.0017275 | 816 | * |
| adult | 10 | male_score | -0.0808065 | -2.315867 | 0.0208126 | 816 | * |
| kid | 10 | male_score | -0.0896267 | -2.570597 | 0.0103286 | 816 | * |
| adult | 20 | male_score | -0.0916545 | -2.614704 | 0.0090971 | 807 | * |
| kid | 20 | male_score | -0.0943975 | -2.708627 | 0.0068978 | 816 | * |
window size = 10; for verb/adjectives, kid > adult with human data.
freq_data <- read_tsv("/Users/mollylewis/Documents/research/Projects/1_in_progress/VOCAB_SEEDS/analyses/3_kid_vocabs/data/SUBTLEX-US\ frequency\ list\ with\ PoS\ information\ text\ version.txt") %>% janitor::clean_names() %>%
select(word, dom_po_s_subtlex, lg10wf)
target_data <- all_scores_tidy %>%
left_join(freq_data) %>%
filter(dom_po_s_subtlex %in% c("Verb","Adjective")) %>%
filter(window_size == 10)
target_data %>%
mutate_if(is.numeric, scale) %>%
ggplot(aes( x = value,
y = human_gender_rating,
color = model_type)) +
geom_point(size = .5, alpha = .2)+
geom_smooth(method = "lm") +
theme_classic()
target_data %>%
group_by(model_type, window_size, language_measure) %>%
nest() %>%
mutate(test = map(data, ~ tidy(cor.test(.x$value,
.x$human_gender_rating)))) %>%
select(-data) %>%
unnest() %>%
#select(1:7) %>%
mutate(sig = case_when(p.value < .05 ~ "*", TRUE ~ "")) %>%
arrange(window_size, language_measure)%>%
kable()
| model_type | window_size | language_measure | estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative | sig |
|---|---|---|---|---|---|---|---|---|---|---|---|
| adult | 10 | male_score | -0.1005891 | -1.886031 | 0.0601233 | 348 | -0.2032754 | 0.0042859 | Pearson’s product-moment correlation | two.sided | |
| kid | 10 | male_score | -0.1655878 | -3.132240 | 0.0018819 | 348 | -0.2658036 | -0.0618312 | Pearson’s product-moment correlation | two.sided | * |
df = target_data %>%
spread(model_type, value)
r.jk <- cor(df$human_gender_rating, df$kid) # Correlation (age, intelligence)
r.jh <- cor(df$human_gender_rating, df$adult) # Correlation (age, shoe size)
r.kh <- cor(df$adult, df$kid) # cor(df$adult, df$kid)
n <- nrow(df)# Size of the group
cocor::cocor.dep.groups.overlap(r.jk, r.jh, r.kh, n, var.labels=c("human", "kid",
"adult"))
##
## Results of a comparison of two overlapping correlations based on dependent groups
##
## Comparison between r.jk (human, kid) = -0.1656 and r.jh (human, adult) = -0.1006
## Difference: r.jk - r.jh = -0.065
## Related correlation: r.kh = 0.8844
## Data: j = human, k = kid, h = adult
## Group size: n = 350
## Null hypothesis: r.jk is equal to r.jh
## Alternative hypothesis: r.jk is not equal to r.jh (two-sided)
## Alpha: 0.05
##
## pearson1898: Pearson and Filon's z (1898)
## z = -2.5556, p-value = 0.0106
## Null hypothesis rejected
##
## hotelling1940: Hotelling's t (1940)
## t = -2.5660, df = 347, p-value = 0.0107
## Null hypothesis rejected
##
## williams1959: Williams' t (1959)
## t = -2.5659, df = 347, p-value = 0.0107
## Null hypothesis rejected
##
## olkin1967: Olkin's z (1967)
## z = -2.5556, p-value = 0.0106
## Null hypothesis rejected
##
## dunn1969: Dunn and Clark's z (1969)
## z = -2.5445, p-value = 0.0109
## Null hypothesis rejected
##
## hendrickson1970: Hendrickson, Stanley, and Hills' (1970) modification of Williams' t (1959)
## t = -2.5660, df = 347, p-value = 0.0107
## Null hypothesis rejected
##
## steiger1980: Steiger's (1980) modification of Dunn and Clark's z (1969) using average correlations
## z = -2.5429, p-value = 0.0110
## Null hypothesis rejected
##
## meng1992: Meng, Rosenthal, and Rubin's z (1992)
## z = -2.5416, p-value = 0.0110
## Null hypothesis rejected
## 95% confidence interval for r.jk - r.jh: -0.1172 -0.0151
## Null hypothesis rejected (Interval does not include 0)
##
## hittner2003: Hittner, May, and Silver's (2003) modification of Dunn and Clark's z (1969) using a backtransformed average Fisher's (1921) Z procedure
## z = -2.5428, p-value = 0.0110
## Null hypothesis rejected
##
## zou2007: Zou's (2007) confidence interval
## 95% confidence interval for r.jk - r.jh: -0.1149 -0.0150
## Null hypothesis rejected (Interval does not include 0)
target_data <- all_scores_tidy %>%
left_join(freq_data) %>%
filter(dom_po_s_subtlex %in% c("Noun")) %>%
filter(window_size == 10)
target_data %>%
mutate_if(is.numeric, scale) %>%
ggplot(aes( x = value,
y = human_gender_rating,
color = model_type)) +
geom_point(size = .5, alpha = .2)+
geom_smooth(method = "lm") +
theme_classic()
target_data %>%
group_by(model_type, window_size, language_measure) %>%
nest() %>%
mutate(test = map(data, ~ tidy(cor.test(.x$value,
.x$human_gender_rating)))) %>%
select(-data) %>%
unnest() %>%
#select(1:7) %>%
mutate(sig = case_when(p.value < .05 ~ "*", TRUE ~ "")) %>%
arrange(window_size, language_measure)%>%
kable()
| model_type | window_size | language_measure | estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative | sig |
|---|---|---|---|---|---|---|---|---|---|---|---|
| adult | 10 | male_score | -0.1015569 | -1.671199 | 0.0958496 | 268 | -0.2182864 | 0.0180376 | Pearson’s product-moment correlation | two.sided | |
| kid | 10 | male_score | -0.0740142 | -1.214998 | 0.2254362 | 268 | -0.1916964 | 0.0457660 | Pearson’s product-moment correlation | two.sided |
df = target_data %>%
spread(model_type, value)
r.jk <- cor(df$human_gender_rating, df$kid) # Correlation (age, intelligence)
r.jh <- cor(df$human_gender_rating, df$adult) # Correlation (age, shoe size)
r.kh <- cor(df$adult, df$kid) # cor(df$adult, df$kid)
n <- nrow(df)# Size of the group
cocor::cocor.dep.groups.overlap(r.jk, r.jh, r.kh, n, var.labels=c("human", "kid",
"adult"))
##
## Results of a comparison of two overlapping correlations based on dependent groups
##
## Comparison between r.jk (human, kid) = -0.074 and r.jh (human, adult) = -0.1016
## Difference: r.jk - r.jh = 0.0275
## Related correlation: r.kh = 0.8036
## Data: j = human, k = kid, h = adult
## Group size: n = 270
## Null hypothesis: r.jk is equal to r.jh
## Alternative hypothesis: r.jk is not equal to r.jh (two-sided)
## Alpha: 0.05
##
## pearson1898: Pearson and Filon's z (1898)
## z = 0.7254, p-value = 0.4682
## Null hypothesis retained
##
## hotelling1940: Hotelling's t (1940)
## t = 0.7219, df = 267, p-value = 0.4710
## Null hypothesis retained
##
## williams1959: Williams' t (1959)
## t = 0.7218, df = 267, p-value = 0.4710
## Null hypothesis retained
##
## olkin1967: Olkin's z (1967)
## z = 0.7254, p-value = 0.4682
## Null hypothesis retained
##
## dunn1969: Dunn and Clark's z (1969)
## z = 0.7213, p-value = 0.4708
## Null hypothesis retained
##
## hendrickson1970: Hendrickson, Stanley, and Hills' (1970) modification of Williams' t (1959)
## t = 0.7219, df = 267, p-value = 0.4710
## Null hypothesis retained
##
## steiger1980: Steiger's (1980) modification of Dunn and Clark's z (1969) using average correlations
## z = 0.7212, p-value = 0.4708
## Null hypothesis retained
##
## meng1992: Meng, Rosenthal, and Rubin's z (1992)
## z = 0.7211, p-value = 0.4708
## Null hypothesis retained
## 95% confidence interval for r.jk - r.jh: -0.0477 0.1032
## Null hypothesis retained (Interval includes 0)
##
## hittner2003: Hittner, May, and Silver's (2003) modification of Dunn and Clark's z (1969) using a backtransformed average Fisher's (1921) Z procedure
## z = 0.7212, p-value = 0.4708
## Null hypothesis retained
##
## zou2007: Zou's (2007) confidence interval
## 95% confidence interval for r.jk - r.jh: -0.0471 0.1020
## Null hypothesis retained (Interval includes 0)