Get human gender correlations by model

MODELS <- here("exploratory_studies/15_udbank/gender_ratings/")
all_model_ratings <- map_df(list.files(MODELS, full.names = T), fread)  %>%
  mutate(epochs = case_when(str_detect(model, "15ep") ~ 15,
                            str_detect(model, "30ep") ~ 30,
                            str_detect(model, "50ep") ~ 50,
                                str_detect(model, "5ep") ~ 5),
         size = case_when(str_detect(model, "300d") ~ "300",
                                str_detect(model, "200d") ~ "200")) %>%
  select(epochs, size, word, male_score)

GENDER_NORMS <- here("data/study1a/raw/GlasgowNorms.csv")
glasgow_norms <- read_csv(GENDER_NORMS) %>%
  select(word, GEND_M) %>%
  rename(human_gender_rating = GEND_M) %>%
  rowwise() %>%
  mutate(word =  str_split(word, " ", simplify = T)[1],
         word = tolower(word)) %>%
  distinct() %>%
  ungroup()

all_ratings <- all_model_ratings %>%
  left_join(glasgow_norms) 

ggplot(all_ratings, aes(x  = human_gender_rating, y = male_score)) +
  geom_point(size = .2, alpha = .2) +
  geom_smooth(method = "lm") +
  facet_grid(epochs ~ size) +
  theme_bw()

cor_values <- all_ratings %>%
  group_by(epochs, size) %>%
  nest() %>%
  mutate(temp = map(data, ~tidy(cor.test(.$human_gender_rating, .$male_score)))) %>%
  select(-data) %>%
  unnest() 


kable(cor_values)

epochs	size	estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
15	200	0.1103832	5.854764	0.00e+00	2779	0.0735158	0.1469494	Pearson’s product-moment correlation	two.sided
30	200	0.1103498	5.852967	0.00e+00	2779	0.0734821	0.1469162	Pearson’s product-moment correlation	two.sided
50	200	0.1064774	5.645176	0.00e+00	2779	0.0695837	0.1430802	Pearson’s product-moment correlation	two.sided
5	200	0.0831735	4.399839	1.12e-05	2779	0.0461471	0.1199717	Pearson’s product-moment correlation	two.sided
15	300	0.1122461	5.954820	0.00e+00	2779	0.0753915	0.1487944	Pearson’s product-moment correlation	two.sided
5	300	0.0746779	3.947760	8.08e-05	2779	0.0376132	0.1115374	Pearson’s product-moment correlation	two.sided

ggplot(cor_values, aes(y = estimate, x = epochs, color = size, group = size)) +
  geom_point() +
  geom_line() +
  theme_bw()

CAREER_WORDS <- c("career", "executive", "management", "professional", "corporation", "salary", "office", "business")
FAMILY_WORDS <- c("family", "home", "parents", "children", "cousins", "marriage", "wedding", "relatives")

# parents and cousins
all_ratings %>%
  filter(word %in% c(CAREER_WORDS, FAMILY_WORDS)) %>%
  mutate(career_word_type = case_when(word %in% CAREER_WORDS ~ "career",
                                     word %in% FAMILY_WORDS ~ "family")) %>%
  group_by(epochs, size, career_word_type) %>%
  multi_boot_standard(col = "male_score")

## # A tibble: 12 x 6
## # Groups:   epochs, size, career_word_type [12]
##    epochs size  career_word_type ci_lower   ci_upper    mean
##     <dbl> <chr> <chr>               <dbl>      <dbl>   <dbl>
##  1      5 200   career            -0.0519 -0.00265   -0.0285
##  2      5 200   family            -0.0768  0.0124    -0.0334
##  3      5 300   career            -0.0529 -0.00205   -0.0304
##  4      5 300   family            -0.0740  0.0127    -0.0324
##  5     15 200   career            -0.0394  0.00947   -0.0175
##  6     15 200   family            -0.0762  0.0114    -0.0303
##  7     15 300   career            -0.0411  0.00699   -0.0201
##  8     15 300   family            -0.0806 -0.0000623 -0.0403
##  9     30 200   career            -0.0279  0.00304   -0.0131
## 10     30 200   family            -0.0543  0.0171    -0.0213
## 11     50 200   career            -0.0272 -0.00149   -0.0146
## 12     50 200   family            -0.0561  0.00726   -0.0240

Get human gender correlations by model

Molly Lewis

2019-09-16