ALPHA <- .1

Six gender parity indicators:

We exclude GPI because it’s missing for about half of countries - shoud look into this.

IAT bias measures

# read in country to language
all_countries <- read_csv("data/other/country_to_lang.csv")
all_countries[all_countries$country_name == "UK", "country_code"] = "GB"
          
# read in bias measures
bias_measures <- read_csv("data/other/all_es_wide.csv") %>%
  select(-wps_index) 

#bias_measures[bias_measures$weapons_google < .5, "weapons_google"] = NA
#bias_measures[bias_measures$flowers_google < .9, "flowers_google"] = NA
bias_measures %>%
  gather("measure", "value", c(-1,-6)) %>%
  ggplot(aes(x = measure, y = value)) +
  geom_boxplot() +
  theme_bw()

Objective gender measures

#read in gender measures
hdi <- read_csv("data/gender_measures/HDI_complete.csv") %>%
  mutate_at(3:28, as.numeric) %>%
  mutate(mean_value = rowMeans(.[,3:28], na.rm = T))   %>%
  mutate(country_code = countrycode(country, "country.name", "iso2c")) %>%
  select(kpi_name, country_code, mean_value) %>%
  spread(kpi_name, mean_value, -2) %>%
  select(country_code, `Median Age`) %>%
  rename(median_age = `Median Age`) 

all_gender_measures <- read_csv("data/gender_measures/all_gender_measures.csv") %>%
  select(-sigi, -sigi_physical, -wb_cpia, -contains("schooling"), -gpi_literacy, -contains("ggi_"), -sigi_son) %>%
  left_join(hdi)
all_gender_measures %>%
  gather(measure, value, -1:-3) %>%
  ggplot(aes(x = value)) +
  geom_histogram() +
  facet_wrap(~measure, scales = "free", ncol = 4) +
  ggtitle("raw") +
  theme_bw()

Transform skewed measures.

all_gender_measures_transformed <- all_gender_measures %>%
  mutate(sigi_fam_log = log(sigi_fam),
         gii_log = log(gii),
         gdi_exp = gdi^10) %>%
  select(-sigi_fam,  -gii, -gdi) %>%
  mutate(sigi_fam_log = ifelse(is.infinite(sigi_fam_log), NA,  sigi_fam_log))

all_gender_measures_transformed %>%
  gather(measure, value, -1:-3) %>%
  ggplot(aes(x = value)) +
  geom_histogram() +
  ggtitle("transformed")+
  facet_wrap(~measure, scales = "free", ncol = 4) +
  theme_bw()

Correlation between measures

# merge together
full_df_partial = all_countries %>%
  left_join(bias_measures, 
            by = "wiki_language_code") %>%
  left_join(all_gender_measures_transformed,  by = "country_code") %>%
  select(-contains(".y"))  %>%
  #select(c(-10:-13, -16)) %>%
  select(1:5, 8,  everything()) 

mod1 <- lm(career_behavioral_iat ~ median_age, 
           data = full_df_partial)


full_df = full_df_partial %>%
  modelr::add_residuals(mod1) %>%
  rename(career_behavioral_iat_resid = resid) %>%
  select(1:6, career_behavioral_iat_resid, everything())
corr_mat <- cor(full_df[,c(-1:-3)], 
                use = "pairwise.complete.obs")

p.mat <- cor.mtest(full_df[,c(-1:-3)], 
                  conf.level = (1-ALPHA),  
                  use = "pairwise.complete.obs")$p

cols = rev(colorRampPalette(c("red", "white", "blue"))(100))

corrplot(corr_mat, method = "color",  col = cols,
         type = "upper", order = "original", number.cex = .7,
         addCoef.col = "black", 
         p.mat = p.mat, sig.level = ALPHA, insig = "blank", 
         tl.col = "black", tl.srt = 90,
         diag = FALSE)

MEASURES <- names(full_df)[-1:-3]
unique_pairs <- tidyr::crossing(MEASURES ,MEASURES) %>% 
  magrittr::set_colnames(c("test1", "test2")) %>%
  rowwise() %>%
  mutate(test1 = sort(c(test1, test2))[1],       
         test2 = sort(c(test1, test2))[2]) %>%
  filter(test1 != test2) %>%                      
  unique() 

get_corr <- function(test1, test2, df){
  
  df %>%
    select(test1, test2) %>%
    do(tidy(cor.test(unlist(.[,1]), unlist(.[,2])))) %>%
    mutate(test1 = test1, 
           test2 = test2)
}

map2_df(unique_pairs$test1, unique_pairs$test2, get_corr, full_df)  %>%
  select(test1, test2, estimate, statistic, p.value, parameter) %>%
  mutate(sig = ifelse(p.value < ALPHA, "*", "")) %>%
  #arrange(p.value) %>%
  kable()
test1 test2 estimate statistic p.value parameter sig
career_behavioral_iat career_behavioral_iat_resid 0.7566632 7.0397187 0.0000000 37 *
career_behavioral_iat career_google 0.2387366 1.4954187 0.1432865 37
career_behavioral_iat career_hand 0.4450900 2.6766289 0.0121068 29 *
career_behavioral_iat flowers_google 0.1403559 0.8622875 0.3940839 37
career_behavioral_iat gdi_exp 0.0180289 0.1081909 0.9144450 36
career_behavioral_iat ggi 0.1713978 1.0438335 0.3035214 36
career_behavioral_iat gii_log -0.4891714 -3.3651343 0.0018293 36 *
career_behavioral_iat median_age 0.6538049 5.2558919 0.0000064 37 *
career_behavioral_iat sigi_fam_log -0.5532607 -3.9292949 0.0003829 35 *
career_behavioral_iat weapons_google 0.5251447 3.7535628 0.0005979 37 *
career_behavioral_iat wps 0.4970552 3.4843870 0.0012863 37 *
career_behavioral_iat_resid career_google 0.2723334 1.7216112 0.0934922 37 *
career_behavioral_iat_resid career_hand 0.4081303 2.4074835 0.0226513 29 *
career_behavioral_iat_resid flowers_google 0.0068319 0.0415577 0.9670747 37
career_behavioral_iat_resid gdi_exp -0.2756604 -1.7206277 0.0939043 36 *
career_behavioral_iat_resid ggi -0.0802939 -0.4833240 0.6317924 36
career_behavioral_iat_resid gii_log 0.0406180 0.2439091 0.8086859 36
career_behavioral_iat_resid median_age 0.0000000 0.0000000 1.0000000 37
career_behavioral_iat_resid sigi_fam_log -0.2065382 -1.2488231 0.2200183 35
career_behavioral_iat_resid weapons_google 0.3713526 2.4328155 0.0199352 37 *
career_behavioral_iat_resid wps -0.0784735 -0.4788123 0.6348897 37
career_google career_hand 0.7135672 5.4849400 0.0000066 29 *
career_google flowers_google 0.2833024 1.7968784 0.0805220 37 *
career_google gdi_exp 0.0991326 0.5977400 0.5537553 36
career_google ggi 0.3155068 1.9949355 0.0536626 36 *
career_google gii_log -0.3211243 -2.0344987 0.0493183 36 *
career_google median_age 0.0499719 0.3043477 0.7625683 37
career_google sigi_fam_log -0.2895104 -1.7893981 0.0822074 35 *
career_google weapons_google 0.2985659 1.9028982 0.0648572 37 *
career_google wps 0.3320650 2.1413819 0.0388958 37 *
career_hand flowers_google 0.3338619 1.9073408 0.0664250 29 *
career_hand gdi_exp 0.0418846 0.2257537 0.8229765 29
career_hand ggi 0.4117935 2.3911565 0.0237578 28 *
career_hand gii_log -0.3481839 -1.9653986 0.0593581 28 *
career_hand median_age 0.1629994 0.8896769 0.3809606 29
career_hand sigi_fam_log -0.3377715 -1.8989217 0.0679238 28 *
career_hand weapons_google 0.5723906 3.7591327 0.0007662 29 *
career_hand wps 0.3164743 1.7966101 0.0828217 29 *
flowers_google gdi_exp 0.0092419 0.0554539 0.9560835 36
flowers_google ggi 0.3823689 2.4828885 0.0178286 36 *
flowers_google gii_log -0.2903693 -1.8206593 0.0769794 36 *
flowers_google median_age 0.2067689 1.2855059 0.2066036 37
flowers_google sigi_fam_log -0.3115866 -1.9399461 0.0604780 35 *
flowers_google weapons_google 0.5031787 3.5417480 0.0010945 37 *
flowers_google wps 0.4333633 2.9249772 0.0058530 37 *
gdi_exp ggi 0.3056067 2.0800727 0.0436584 42 *
gdi_exp gii_log -0.1555047 -1.0201960 0.3134763 42
gdi_exp median_age 0.3308739 2.3257684 0.0247030 44 *
gdi_exp sigi_fam_log -0.0993460 -0.6470367 0.5211290 42
gdi_exp weapons_google -0.0402823 -0.2418900 0.8102380 36
gdi_exp wps 0.4281157 3.1064158 0.0033488 43 *
ggi gii_log -0.4748966 -3.4971999 0.0011249 42 *
ggi median_age 0.2786336 1.9024650 0.0638211 43 *
ggi sigi_fam_log -0.5573804 -4.2986386 0.0001034 41 *
ggi weapons_google 0.2453969 1.5188228 0.1375415 36
ggi wps 0.5705411 4.5554970 0.0000428 43 *
gii_log median_age -0.8006062 -8.7616930 0.0000000 43 *
gii_log sigi_fam_log 0.7240202 6.7209832 0.0000000 41 *
gii_log weapons_google -0.2702176 -1.6839498 0.1008443 36
gii_log wps -0.8311771 -9.8027272 0.0000000 43 *
median_age sigi_fam_log -0.5790572 -4.6574184 0.0000308 43 *
median_age weapons_google 0.3734384 2.4486878 0.0191977 37 *
median_age wps 0.8312096 9.9173126 0.0000000 44 *
sigi_fam_log weapons_google -0.2877750 -1.7776997 0.0841436 35 *
sigi_fam_log wps -0.6535854 -5.5964887 0.0000015 42 *
weapons_google wps 0.4334082 2.9253505 0.0058474 37 *

Flowers

full_df %>%
  select(1, 8, c(10:15)) %>%
  gather("measure", "value", c(-1,-2)) %>%
  ggplot(aes(x = value, y = flowers_google)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~measure, scales = "free") +
  theme_bw()

Weapons

full_df %>%
  select(1, 9, c(10:15)) %>%
  gather("measure", "value", c(-1,-2)) %>%
  ggplot(aes(x = value, y = weapons_google)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~measure, scales = "free") +
  theme_bw()

Career

full_df %>%
  select(1, 4, c(10:15)) %>%
  gather("measure", "value", c(-1,-2)) %>%
  ggplot(aes(x = value, y =career_google)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~measure, scales = "free") +
  theme_bw()