Word gender bias

POS
- Verb/adjectives
- Nouns

Read in model data

This is with 10 runs per model each.

model_biases <- read_csv(here('data/processed/other/tidy_gender_scores_10.csv')) %>%
  mutate(window_size = 10) %>%
  bind_rows( read_csv(here('data/processed/other/tidy_gender_scores_5.csv')) %>%
  mutate(window_size = 5)) %>%
  bind_rows( read_csv(here('data/processed/other/tidy_gender_scores_20.csv')) %>%
  mutate(window_size = 20))
  # bind_rows( read_csv(here('data/processed/other/tidy_gender_scores_40.csv')) %>%
  #mutate(window_size = 40))


model_biases %>%
  mutate(path_short = basename(path))  %>%
  group_by(path_short, window_size, model_type)%>%
  multi_boot_standard(col = "male_score") %>%
  ggplot(aes(x = path_short, y = mean, color = model_type)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  geom_hline(aes(yintercept = 0), linetype = 2)+
  facet_wrap(~window_size) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

# take mean across model runs
model_scores <- model_biases %>%
  group_by(model_type, window_size, word) %>%
  summarize(male_score = mean(male_score))
           # female_target = mean(female_target),
           # male_target = mean(male_target)) %>%
#    mutate(female_target = log(1-female_target),
        #   male_target = log(1-male_target))

Merge with human data

GENDER_NORMS <- here("data/processed/words/gender_ratings_mean.csv")

gender_norms <- read_csv(GENDER_NORMS) %>%
  select(word, mean) %>%
  rename(human_gender_rating = mean)

all_scores <- gender_norms %>%
  left_join(model_scores) 

# filter to only those words that are present in both adult and kid models
good_words <- count(all_scores, word, window_size) %>%
  filter(n == 2) %>%
  pull(word)

all_scores_tidy <- all_scores %>%
  filter(word %in% good_words) %>%
  gather("language_measure", "value", -1:-4)

all_scores_tidy %>%
  ggplot(aes( x = value,
              y = human_gender_rating,
              color = model_type)) +
  geom_point(size = .5, alpha = .2)+
  geom_smooth(method = "lm") +
  facet_grid(window_size~language_measure, scale = "free") +
  theme_classic()

all_scores_tidy %>%
  group_by(model_type, window_size, language_measure) %>%
  nest() %>%
  mutate(test = map(data, ~ tidy(cor.test(.x$value, 
                                          .x$human_gender_rating)))) %>%
  select(-data) %>%
  unnest()  %>%
  select(1:7) %>%
  mutate(sig = case_when(p.value  < .05  ~ "*", TRUE ~ "")) %>%
  arrange(window_size, language_measure)%>%
  kable()

model_type	window_size	language_measure	estimate	statistic	p.value	parameter	sig
adult	5	male_score	-0.0930577	-2.669845	0.0077395	816	*
kid	5	male_score	-0.1093975	-3.143888	0.0017275	816	*
adult	10	male_score	-0.0808065	-2.315867	0.0208126	816	*
kid	10	male_score	-0.0896267	-2.570597	0.0103286	816	*
adult	20	male_score	-0.0916545	-2.614704	0.0090971	807	*
kid	20	male_score	-0.0943975	-2.708627	0.0068978	816	*

POS

Verb/adjectives

window size = 10; for verb/adjectives, kid > adult with human data.

freq_data <- read_tsv("/Users/mollylewis/Documents/research/Projects/1_in_progress/VOCAB_SEEDS/analyses/3_kid_vocabs/data/SUBTLEX-US\ frequency\ list\ with\ PoS\ information\ text\ version.txt") %>% janitor::clean_names() %>%
  select(word, dom_po_s_subtlex, lg10wf)

target_data <-  all_scores_tidy %>%
      left_join(freq_data) %>%
      filter(dom_po_s_subtlex %in% c("Verb","Adjective")) %>%
      filter(window_size == 10) 

target_data %>%
  mutate_if(is.numeric, scale) %>%
  ggplot(aes( x = value,
              y = human_gender_rating,
              color = model_type)) +
  geom_point(size = .5, alpha = .2)+
  geom_smooth(method = "lm") +
  theme_classic()

target_data %>%
  group_by(model_type, window_size, language_measure) %>%
  nest() %>%
  mutate(test = map(data, ~ tidy(cor.test(.x$value, 
                                          .x$human_gender_rating)))) %>%
  select(-data) %>%
  unnest()  %>%
  #select(1:7) %>%
  mutate(sig = case_when(p.value  < .05  ~ "*", TRUE ~ "")) %>%
  arrange(window_size, language_measure)%>%
  kable()

model_type	window_size	language_measure	estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative	sig
adult	10	male_score	-0.1005891	-1.886031	0.0601233	348	-0.2032754	0.0042859	Pearson’s product-moment correlation	two.sided
kid	10	male_score	-0.1655878	-3.132240	0.0018819	348	-0.2658036	-0.0618312	Pearson’s product-moment correlation	two.sided	*

df = target_data %>%
  spread(model_type, value)

r.jk <- cor(df$human_gender_rating, df$kid) # Correlation (age, intelligence)
r.jh <- cor(df$human_gender_rating, df$adult) # Correlation (age, shoe size)
r.kh <- cor(df$adult, df$kid) # cor(df$adult, df$kid)
n <- nrow(df)# Size of the group
cocor::cocor.dep.groups.overlap(r.jk, r.jh, r.kh, n, var.labels=c("human", "kid",
"adult"))

## 
##   Results of a comparison of two overlapping correlations based on dependent groups
## 
## Comparison between r.jk (human, kid) = -0.1656 and r.jh (human, adult) = -0.1006
## Difference: r.jk - r.jh = -0.065
## Related correlation: r.kh = 0.8844
## Data: j = human, k = kid, h = adult
## Group size: n = 350
## Null hypothesis: r.jk is equal to r.jh
## Alternative hypothesis: r.jk is not equal to r.jh (two-sided)
## Alpha: 0.05
## 
## pearson1898: Pearson and Filon's z (1898)
##   z = -2.5556, p-value = 0.0106
##   Null hypothesis rejected
## 
## hotelling1940: Hotelling's t (1940)
##   t = -2.5660, df = 347, p-value = 0.0107
##   Null hypothesis rejected
## 
## williams1959: Williams' t (1959)
##   t = -2.5659, df = 347, p-value = 0.0107
##   Null hypothesis rejected
## 
## olkin1967: Olkin's z (1967)
##   z = -2.5556, p-value = 0.0106
##   Null hypothesis rejected
## 
## dunn1969: Dunn and Clark's z (1969)
##   z = -2.5445, p-value = 0.0109
##   Null hypothesis rejected
## 
## hendrickson1970: Hendrickson, Stanley, and Hills' (1970) modification of Williams' t (1959)
##   t = -2.5660, df = 347, p-value = 0.0107
##   Null hypothesis rejected
## 
## steiger1980: Steiger's (1980) modification of Dunn and Clark's z (1969) using average correlations
##   z = -2.5429, p-value = 0.0110
##   Null hypothesis rejected
## 
## meng1992: Meng, Rosenthal, and Rubin's z (1992)
##   z = -2.5416, p-value = 0.0110
##   Null hypothesis rejected
##   95% confidence interval for r.jk - r.jh: -0.1172 -0.0151
##   Null hypothesis rejected (Interval does not include 0)
## 
## hittner2003: Hittner, May, and Silver's (2003) modification of Dunn and Clark's z (1969) using a backtransformed average Fisher's (1921) Z procedure
##   z = -2.5428, p-value = 0.0110
##   Null hypothesis rejected
## 
## zou2007: Zou's (2007) confidence interval
##   95% confidence interval for r.jk - r.jh: -0.1149 -0.0150
##   Null hypothesis rejected (Interval does not include 0)

Nouns

target_data <-  all_scores_tidy %>%
      left_join(freq_data) %>%
      filter(dom_po_s_subtlex %in% c("Noun")) %>%
      filter(window_size == 10) 

target_data %>%
  mutate_if(is.numeric, scale) %>%
  ggplot(aes( x = value,
              y = human_gender_rating,
              color = model_type)) +
  geom_point(size = .5, alpha = .2)+
  geom_smooth(method = "lm") +
  theme_classic()

target_data %>%
  group_by(model_type, window_size, language_measure) %>%
  nest() %>%
  mutate(test = map(data, ~ tidy(cor.test(.x$value, 
                                          .x$human_gender_rating)))) %>%
  select(-data) %>%
  unnest()  %>%
  #select(1:7) %>%
  mutate(sig = case_when(p.value  < .05  ~ "*", TRUE ~ "")) %>%
  arrange(window_size, language_measure)%>%
  kable()

model_type	window_size	language_measure	estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative	sig
adult	10	male_score	-0.1015569	-1.671199	0.0958496	268	-0.2182864	0.0180376	Pearson’s product-moment correlation	two.sided
kid	10	male_score	-0.0740142	-1.214998	0.2254362	268	-0.1916964	0.0457660	Pearson’s product-moment correlation	two.sided

df = target_data %>%
  spread(model_type, value)

r.jk <- cor(df$human_gender_rating, df$kid) # Correlation (age, intelligence)
r.jh <- cor(df$human_gender_rating, df$adult) # Correlation (age, shoe size)
r.kh <- cor(df$adult, df$kid) # cor(df$adult, df$kid)
n <- nrow(df)# Size of the group
cocor::cocor.dep.groups.overlap(r.jk, r.jh, r.kh, n, var.labels=c("human", "kid",
"adult"))

## 
##   Results of a comparison of two overlapping correlations based on dependent groups
## 
## Comparison between r.jk (human, kid) = -0.074 and r.jh (human, adult) = -0.1016
## Difference: r.jk - r.jh = 0.0275
## Related correlation: r.kh = 0.8036
## Data: j = human, k = kid, h = adult
## Group size: n = 270
## Null hypothesis: r.jk is equal to r.jh
## Alternative hypothesis: r.jk is not equal to r.jh (two-sided)
## Alpha: 0.05
## 
## pearson1898: Pearson and Filon's z (1898)
##   z = 0.7254, p-value = 0.4682
##   Null hypothesis retained
## 
## hotelling1940: Hotelling's t (1940)
##   t = 0.7219, df = 267, p-value = 0.4710
##   Null hypothesis retained
## 
## williams1959: Williams' t (1959)
##   t = 0.7218, df = 267, p-value = 0.4710
##   Null hypothesis retained
## 
## olkin1967: Olkin's z (1967)
##   z = 0.7254, p-value = 0.4682
##   Null hypothesis retained
## 
## dunn1969: Dunn and Clark's z (1969)
##   z = 0.7213, p-value = 0.4708
##   Null hypothesis retained
## 
## hendrickson1970: Hendrickson, Stanley, and Hills' (1970) modification of Williams' t (1959)
##   t = 0.7219, df = 267, p-value = 0.4710
##   Null hypothesis retained
## 
## steiger1980: Steiger's (1980) modification of Dunn and Clark's z (1969) using average correlations
##   z = 0.7212, p-value = 0.4708
##   Null hypothesis retained
## 
## meng1992: Meng, Rosenthal, and Rubin's z (1992)
##   z = 0.7211, p-value = 0.4708
##   Null hypothesis retained
##   95% confidence interval for r.jk - r.jh: -0.0477 0.1032
##   Null hypothesis retained (Interval includes 0)
## 
## hittner2003: Hittner, May, and Silver's (2003) modification of Dunn and Clark's z (1969) using a backtransformed average Fisher's (1921) Z procedure
##   z = 0.7212, p-value = 0.4708
##   Null hypothesis retained
## 
## zou2007: Zou's (2007) confidence interval
##   95% confidence interval for r.jk - r.jh: -0.0471 0.1020
##   Null hypothesis retained (Interval includes 0)

Word gender bias

Molly Lewis

2019-06-06

POS

Verb/adjectives

Nouns