ALPHA <- .1

Six gender parity indicators:

We exclude GPI because it’s missing for about half of countries - shoud look into this.

IAT bias measures

# read in country to language
all_countries <- read_csv("data/other/country_to_lang.csv")
all_countries[all_countries$country_name == "UK", "country_code"] = "GB"
          
# read in bias measures
bias_measures <- read_csv("data/other/all_es_wide.csv") %>%
  select(-wps_index) 

#bias_measures[bias_measures$weapons_google < .5, "weapons_google"] = NA
#bias_measures[bias_measures$flowers_google < .9, "flowers_google"] = NA
bias_measures %>%
  gather("measure", "value", c(-1,-6)) %>%
  ggplot(aes(x = measure, y = value)) +
  geom_boxplot() +
  theme_bw()

Objective gender measures

#read in gender measures
hdi <- read_csv("data/gender_measures/HDI_complete.csv") %>%
  mutate_at(3:28, as.numeric) %>%
  mutate(mean_value = rowMeans(.[,3:28], na.rm = T))   %>%
  mutate(country_code = countrycode(country, "country.name", "iso2c")) %>%
  select(kpi_name, country_code, mean_value) %>%
  spread(kpi_name, mean_value, -2) %>%
  select(country_code, `Median Age`) %>%
  rename(median_age = `Median Age`) 

all_gender_measures <- read_csv("data/gender_measures/all_gender_measures.csv") %>%
  select(-sigi, -sigi_physical, -wb_cpia, -contains("schooling"), -gpi_literacy, -contains("ggi_"), -sigi_son) %>%
  left_join(hdi)
all_gender_measures %>%
  gather(measure, value, -1:-3) %>%
  ggplot(aes(x = value)) +
  geom_histogram() +
  facet_wrap(~measure, scales = "free", ncol = 4) +
  ggtitle("raw") +
  theme_bw()

Transform skewed measures.

all_gender_measures_transformed <- all_gender_measures %>%
  mutate(sigi_fam_log = log(sigi_fam),
         gii_log = log(gii),
         gdi_exp = gdi^10) %>%
  select(-sigi_fam,  -gii, -gdi) %>%
  mutate(sigi_fam_log = ifelse(is.infinite(sigi_fam_log), NA,  sigi_fam_log))

all_gender_measures_transformed %>%
  gather(measure, value, -1:-3) %>%
  ggplot(aes(x = value)) +
  geom_histogram() +
  ggtitle("transformed")+
  facet_wrap(~measure, scales = "free", ncol = 4) +
  theme_bw()

Correlation between measures

# merge together
full_df_partial = all_countries %>%
  left_join(bias_measures, 
            by = "wiki_language_code") %>%
  left_join(all_gender_measures_transformed,  by = "country_code") %>%
  select(-contains(".y"))  %>%
  #select(c(-10:-13, -16)) %>%
  select(1:5, 8,  everything())  %>%
  group_by(wiki_language_code.x) %>%
  summarize_at(vars(career_google:gdi_exp), mean, na.rm = T) 
mod1 <- lm(career_behavioral_iat ~ median_age, 
           data = full_df_partial)


full_df = full_df_partial %>%
  modelr::add_residuals(mod1) %>%
  rename(career_behavioral_iat_resid = resid) %>%
  select(1:4, career_behavioral_iat_resid, 5, 6, everything())
corr_mat <- cor(full_df[,c(-1)], 
                use = "pairwise.complete.obs")

p.mat <- cor.mtest(full_df[,c(-1)], 
                  conf.level = (1-ALPHA),  
                  use = "pairwise.complete.obs")$p

cols = rev(colorRampPalette(c("red", "white", "blue"))(100))

corrplot(corr_mat, method = "color",  col = cols,
         type = "upper", order = "original", number.cex = .7,
         addCoef.col = "black", 
         p.mat = p.mat, sig.level = ALPHA, insig = "blank", 
         tl.col = "black", tl.srt = 90,
         diag = FALSE)

MEASURES <- names(full_df)[-1]
unique_pairs <- tidyr::crossing(MEASURES, MEASURES) %>% 
  magrittr::set_colnames(c("test1", "test2")) %>%
  rowwise() %>%
  mutate(test1 = sort(c(test1, test2))[1],       
         test2 = sort(c(test1, test2))[2]) %>%
  filter(test1 != test2) %>%                      
  unique() 

get_corr <- function(test1, test2, df){
  
  df %>%
    select(test1, test2) %>%
    do(tidy(cor.test(unlist(.[,1]), unlist(.[,2])))) %>%
    mutate(test1 = test1, 
           test2 = test2)
}

map2_df(unique_pairs$test1, unique_pairs$test2, get_corr, full_df)  %>%
  select(test1, test2, estimate, statistic, p.value, parameter) %>%
  mutate(sig = ifelse(p.value < ALPHA, "*", "")) %>%
  #arrange(p.value) %>%
  kable()
test1 test2 estimate statistic p.value parameter sig
career_behavioral_iat career_behavioral_iat_resid 0.7426395 5.3182321 0.0000213 23 *
career_behavioral_iat career_google 0.2198718 1.0809194 0.2909371 23
career_behavioral_iat career_hand 0.4862996 2.2261570 0.0407233 16 *
career_behavioral_iat flowers_google 0.2170289 1.0662478 0.2973769 23
career_behavioral_iat gdi_exp 0.0786312 0.3699586 0.7149509 22
career_behavioral_iat ggi 0.1221328 0.5771745 0.5696815 22
career_behavioral_iat gii_log -0.4335201 -2.3071667 0.0303913 23 *
career_behavioral_iat median_age 0.6696914 4.3247454 0.0002506 23 *
career_behavioral_iat sigi_fam_log -0.5507151 -3.0946494 0.0052909 22 *
career_behavioral_iat weapons_google 0.5435156 3.1053274 0.0049843 23 *
career_behavioral_iat wps 0.4501826 2.4178650 0.0239371 23 *
career_behavioral_iat_resid career_google 0.2195204 1.0791046 0.2917282 23
career_behavioral_iat_resid career_hand 0.4162002 1.8309140 0.0858011 16 *
career_behavioral_iat_resid flowers_google -0.0507677 -0.2437876 0.8095567 23
career_behavioral_iat_resid gdi_exp -0.3960735 -2.0232100 0.0553688 22 *
career_behavioral_iat_resid ggi -0.1984857 -0.9498793 0.3524927 22
career_behavioral_iat_resid gii_log 0.1051289 0.5069901 0.6169883 23
career_behavioral_iat_resid median_age 0.0000000 0.0000000 1.0000000 23
career_behavioral_iat_resid sigi_fam_log -0.1231142 -0.5818833 0.5665609 22
career_behavioral_iat_resid weapons_google 0.3154230 1.5940920 0.1245664 23
career_behavioral_iat_resid wps -0.1782034 -0.8685354 0.3940742 23
career_google career_hand 0.7110338 4.0448036 0.0009393 16 *
career_google flowers_google 0.1572407 0.7635988 0.4528687 23
career_google gdi_exp 0.0957022 0.4509532 0.6564367 22
career_google ggi 0.2928861 1.4367636 0.1648552 22
career_google gii_log -0.4149863 -2.1874523 0.0391340 23 *
career_google median_age 0.0848857 0.4085722 0.6866328 23
career_google sigi_fam_log -0.4031658 -2.0663965 0.0507618 22 *
career_google weapons_google 0.1591782 0.7732506 0.4472489 23
career_google wps 0.3504214 1.7943372 0.0859162 23 *
career_hand flowers_google 0.1634815 0.6628436 0.5168667 16
career_hand gdi_exp 0.0317343 0.1270012 0.9005218 16
career_hand ggi 0.3909580 1.6451108 0.1207349 15
career_hand gii_log -0.4128510 -1.8131387 0.0886129 16 *
career_hand median_age 0.1802039 0.7328122 0.4742730 16
career_hand sigi_fam_log -0.4644305 -2.0976759 0.0521716 16 *
career_hand weapons_google 0.4630674 2.0898354 0.0529579 16 *
career_hand wps 0.2754849 1.1462951 0.2685280 16
flowers_google gdi_exp 0.0028962 0.0135845 0.9892839 22
flowers_google ggi 0.3607534 1.8142531 0.0833011 22 *
flowers_google gii_log -0.4980699 -2.7546505 0.0112836 23 *
flowers_google median_age 0.3803707 1.9724552 0.0606975 23 *
flowers_google sigi_fam_log -0.5102751 -2.7829899 0.0108461 22 *
flowers_google weapons_google 0.4098315 2.1547536 0.0418896 23 *
flowers_google wps 0.4879503 2.6809530 0.0133427 23 *
gdi_exp ggi 0.3994582 2.2641316 0.0318086 27 *
gdi_exp gii_log -0.2678917 -1.4713283 0.1523530 28
gdi_exp median_age 0.4827340 2.9167436 0.0068948 28 *
gdi_exp sigi_fam_log -0.1649747 -0.8691428 0.3924299 27
gdi_exp weapons_google -0.0443527 -0.2082376 0.8369592 22
gdi_exp wps 0.5704860 3.6755160 0.0009958 28 *
ggi gii_log -0.4872814 -2.9527236 0.0063122 28 *
ggi median_age 0.2565616 1.4046120 0.1711336 28
ggi sigi_fam_log -0.5676059 -3.5823702 0.0013209 27 *
ggi weapons_google 0.1439558 0.6823194 0.5021586 22
ggi wps 0.5833856 3.8007932 0.0007150 28 *
gii_log median_age -0.7809519 -6.7332923 0.0000002 29 *
gii_log sigi_fam_log 0.8299267 7.8719765 0.0000000 28 *
gii_log weapons_google -0.2938248 -1.4742068 0.1539828 23
gii_log wps -0.8248617 -7.8573228 0.0000000 29 *
median_age sigi_fam_log -0.6418743 -4.4293628 0.0001317 28 *
median_age weapons_google 0.4618097 2.4969718 0.0201281 23 *
median_age wps 0.8352517 8.1800899 0.0000000 29 *
sigi_fam_log weapons_google -0.3109451 -1.5345318 0.1391558 22
sigi_fam_log wps -0.7096519 -5.3298023 0.0000113 28 *
weapons_google wps 0.4093577 2.1517607 0.0421504 23 *