MODELS <- here("exploratory_studies/13_europarl/gender_ratings/")
all_model_ratings <- map_df(list.files(MODELS, full.names = T), fread)  %>%
  mutate(epochs = map(model, ~str_split(pluck(pluck(str_split(., "en_"),1), 2), "ep")),
         epochs = map(epochs, ~pluck(pluck(.,1),1)),
         model_type = case_when(str_detect(model, "trans_f") ~ "translated",
                                str_detect(model, "untrans") ~ "untranslated")) %>%
  select(model_type, epochs, word, male_score) %>%
  mutate(epochs = as.character(epochs))

GENDER_NORMS <- here("data/study1a/raw/GlasgowNorms.csv")
glasgow_norms <- read_csv(GENDER_NORMS) %>%
  select(word, GEND_M) %>%
  rename(human_gender_rating = GEND_M) %>%
  rowwise() %>%
  mutate(word =  str_split(word, " ", simplify = T)[1],
         word = tolower(word)) %>%
  distinct() %>%
  ungroup()

all_ratings <- all_model_ratings %>%
  left_join(glasgow_norms) 

ggplot(all_ratings, aes(x  = human_gender_rating, y = male_score)) +
  geom_point(size = .2, alpha = .2) +
  geom_smooth(method = "lm") +
  facet_grid(epochs ~ model_type) +
  theme_bw()

cor_values <- all_ratings %>%
  mutate(epochs = as.numeric(epochs)) %>%
  group_by(model_type, epochs) %>%
  nest() %>%
  mutate(temp = map(data, ~tidy(cor.test(.$human_gender_rating, .$male_score)))) %>%
  select(-data) %>%
  unnest() 


kable(cor_values)
model_type epochs estimate statistic p.value parameter conf.low conf.high method alternative
translated 10 0.1337143 8.903065 0.0e+00 4354 0.1044310 0.1627658 Pearson’s product-moment correlation two.sided
translated 20 0.1117646 7.421269 0.0e+00 4354 0.0823400 0.1409946 Pearson’s product-moment correlation two.sided
translated 5 0.1340506 8.925870 0.0e+00 4354 0.1047698 0.1630992 Pearson’s product-moment correlation two.sided
untranslated 10 0.0802058 4.949089 8.0e-07 3783 0.0484700 0.1117798 Pearson’s product-moment correlation two.sided
untranslated 20 0.0726241 4.478651 7.7e-06 3783 0.0408590 0.1042425 Pearson’s product-moment correlation two.sided
untranslated 5 0.1023304 6.327158 0.0e+00 3783 0.0707013 0.1337539 Pearson’s product-moment correlation two.sided
ggplot(cor_values, aes(y = estimate, x = epochs, group = model_type)) +
  facet_wrap(~model_type) +
  geom_point() +
  geom_line() +
  theme_bw()