ALPHA <- .1
Six gender parity indicators:
We exclude GPI because it’s missing for about half of countries - shoud look into this.
# read in country to language
all_countries <- read_csv("data/other/country_to_lang.csv")
all_countries[all_countries$country_name == "UK", "country_code"] = "GB"
# read in bias measures
bias_measures <- read_csv("data/other/all_es_wide.csv") %>%
select(-wps_index)
#bias_measures[bias_measures$weapons_google < .5, "weapons_google"] = NA
#bias_measures[bias_measures$flowers_google < .9, "flowers_google"] = NA
bias_measures %>%
gather("measure", "value", c(-1,-6)) %>%
ggplot(aes(x = measure, y = value)) +
geom_boxplot() +
theme_bw()
#read in gender measures
hdi <- read_csv("data/gender_measures/HDI_complete.csv") %>%
mutate_at(3:28, as.numeric) %>%
mutate(mean_value = rowMeans(.[,3:28], na.rm = T)) %>%
mutate(country_code = countrycode(country, "country.name", "iso2c")) %>%
select(kpi_name, country_code, mean_value) %>%
spread(kpi_name, mean_value, -2) %>%
select(country_code, `Median Age`) %>%
rename(median_age = `Median Age`)
all_gender_measures <- read_csv("data/gender_measures/all_gender_measures.csv") %>%
select(-sigi, -sigi_physical, -wb_cpia, -contains("schooling"), -gpi_literacy, -contains("ggi_"), -sigi_son) %>%
left_join(hdi)
all_gender_measures %>%
gather(measure, value, -1:-3) %>%
ggplot(aes(x = value)) +
geom_histogram() +
facet_wrap(~measure, scales = "free", ncol = 4) +
ggtitle("raw") +
theme_bw()
Transform skewed measures.
all_gender_measures_transformed <- all_gender_measures %>%
mutate(sigi_fam_log = log(sigi_fam),
gii_log = log(gii),
gdi_exp = gdi^10) %>%
select(-sigi_fam, -gii, -gdi) %>%
mutate(sigi_fam_log = ifelse(is.infinite(sigi_fam_log), NA, sigi_fam_log))
all_gender_measures_transformed %>%
gather(measure, value, -1:-3) %>%
ggplot(aes(x = value)) +
geom_histogram() +
ggtitle("transformed")+
facet_wrap(~measure, scales = "free", ncol = 4) +
theme_bw()
# merge together
full_df_partial = all_countries %>%
left_join(bias_measures,
by = "wiki_language_code") %>%
left_join(all_gender_measures_transformed, by = "country_code") %>%
select(-contains(".y")) %>%
#select(c(-10:-13, -16)) %>%
select(1:5, 8, everything()) %>%
group_by(wiki_language_code.x) %>%
summarize_at(vars(career_google:gdi_exp), mean, na.rm = T)
mod1 <- lm(career_behavioral_iat ~ median_age,
data = full_df_partial)
full_df = full_df_partial %>%
modelr::add_residuals(mod1) %>%
rename(career_behavioral_iat_resid = resid) %>%
select(1:4, career_behavioral_iat_resid, 5, 6, everything())
corr_mat <- cor(full_df[,c(-1)],
use = "pairwise.complete.obs")
p.mat <- cor.mtest(full_df[,c(-1)],
conf.level = (1-ALPHA),
use = "pairwise.complete.obs")$p
cols = rev(colorRampPalette(c("red", "white", "blue"))(100))
corrplot(corr_mat, method = "color", col = cols,
type = "upper", order = "original", number.cex = .7,
addCoef.col = "black",
p.mat = p.mat, sig.level = ALPHA, insig = "blank",
tl.col = "black", tl.srt = 90,
diag = FALSE)
MEASURES <- names(full_df)[-1]
unique_pairs <- tidyr::crossing(MEASURES, MEASURES) %>%
magrittr::set_colnames(c("test1", "test2")) %>%
rowwise() %>%
mutate(test1 = sort(c(test1, test2))[1],
test2 = sort(c(test1, test2))[2]) %>%
filter(test1 != test2) %>%
unique()
get_corr <- function(test1, test2, df){
df %>%
select(test1, test2) %>%
do(tidy(cor.test(unlist(.[,1]), unlist(.[,2])))) %>%
mutate(test1 = test1,
test2 = test2)
}
map2_df(unique_pairs$test1, unique_pairs$test2, get_corr, full_df) %>%
select(test1, test2, estimate, statistic, p.value, parameter) %>%
mutate(sig = ifelse(p.value < ALPHA, "*", "")) %>%
#arrange(p.value) %>%
kable()
test1 | test2 | estimate | statistic | p.value | parameter | sig |
---|---|---|---|---|---|---|
career_behavioral_iat | career_behavioral_iat_resid | 0.7426395 | 5.3182321 | 0.0000213 | 23 | * |
career_behavioral_iat | career_google | 0.2198718 | 1.0809194 | 0.2909371 | 23 | |
career_behavioral_iat | career_hand | 0.4862996 | 2.2261570 | 0.0407233 | 16 | * |
career_behavioral_iat | flowers_google | 0.2170289 | 1.0662478 | 0.2973769 | 23 | |
career_behavioral_iat | gdi_exp | 0.0786312 | 0.3699586 | 0.7149509 | 22 | |
career_behavioral_iat | ggi | 0.1221328 | 0.5771745 | 0.5696815 | 22 | |
career_behavioral_iat | gii_log | -0.4335201 | -2.3071667 | 0.0303913 | 23 | * |
career_behavioral_iat | median_age | 0.6696914 | 4.3247454 | 0.0002506 | 23 | * |
career_behavioral_iat | sigi_fam_log | -0.5507151 | -3.0946494 | 0.0052909 | 22 | * |
career_behavioral_iat | weapons_google | 0.5435156 | 3.1053274 | 0.0049843 | 23 | * |
career_behavioral_iat | wps | 0.4501826 | 2.4178650 | 0.0239371 | 23 | * |
career_behavioral_iat_resid | career_google | 0.2195204 | 1.0791046 | 0.2917282 | 23 | |
career_behavioral_iat_resid | career_hand | 0.4162002 | 1.8309140 | 0.0858011 | 16 | * |
career_behavioral_iat_resid | flowers_google | -0.0507677 | -0.2437876 | 0.8095567 | 23 | |
career_behavioral_iat_resid | gdi_exp | -0.3960735 | -2.0232100 | 0.0553688 | 22 | * |
career_behavioral_iat_resid | ggi | -0.1984857 | -0.9498793 | 0.3524927 | 22 | |
career_behavioral_iat_resid | gii_log | 0.1051289 | 0.5069901 | 0.6169883 | 23 | |
career_behavioral_iat_resid | median_age | 0.0000000 | 0.0000000 | 1.0000000 | 23 | |
career_behavioral_iat_resid | sigi_fam_log | -0.1231142 | -0.5818833 | 0.5665609 | 22 | |
career_behavioral_iat_resid | weapons_google | 0.3154230 | 1.5940920 | 0.1245664 | 23 | |
career_behavioral_iat_resid | wps | -0.1782034 | -0.8685354 | 0.3940742 | 23 | |
career_google | career_hand | 0.7110338 | 4.0448036 | 0.0009393 | 16 | * |
career_google | flowers_google | 0.1572407 | 0.7635988 | 0.4528687 | 23 | |
career_google | gdi_exp | 0.0957022 | 0.4509532 | 0.6564367 | 22 | |
career_google | ggi | 0.2928861 | 1.4367636 | 0.1648552 | 22 | |
career_google | gii_log | -0.4149863 | -2.1874523 | 0.0391340 | 23 | * |
career_google | median_age | 0.0848857 | 0.4085722 | 0.6866328 | 23 | |
career_google | sigi_fam_log | -0.4031658 | -2.0663965 | 0.0507618 | 22 | * |
career_google | weapons_google | 0.1591782 | 0.7732506 | 0.4472489 | 23 | |
career_google | wps | 0.3504214 | 1.7943372 | 0.0859162 | 23 | * |
career_hand | flowers_google | 0.1634815 | 0.6628436 | 0.5168667 | 16 | |
career_hand | gdi_exp | 0.0317343 | 0.1270012 | 0.9005218 | 16 | |
career_hand | ggi | 0.3909580 | 1.6451108 | 0.1207349 | 15 | |
career_hand | gii_log | -0.4128510 | -1.8131387 | 0.0886129 | 16 | * |
career_hand | median_age | 0.1802039 | 0.7328122 | 0.4742730 | 16 | |
career_hand | sigi_fam_log | -0.4644305 | -2.0976759 | 0.0521716 | 16 | * |
career_hand | weapons_google | 0.4630674 | 2.0898354 | 0.0529579 | 16 | * |
career_hand | wps | 0.2754849 | 1.1462951 | 0.2685280 | 16 | |
flowers_google | gdi_exp | 0.0028962 | 0.0135845 | 0.9892839 | 22 | |
flowers_google | ggi | 0.3607534 | 1.8142531 | 0.0833011 | 22 | * |
flowers_google | gii_log | -0.4980699 | -2.7546505 | 0.0112836 | 23 | * |
flowers_google | median_age | 0.3803707 | 1.9724552 | 0.0606975 | 23 | * |
flowers_google | sigi_fam_log | -0.5102751 | -2.7829899 | 0.0108461 | 22 | * |
flowers_google | weapons_google | 0.4098315 | 2.1547536 | 0.0418896 | 23 | * |
flowers_google | wps | 0.4879503 | 2.6809530 | 0.0133427 | 23 | * |
gdi_exp | ggi | 0.3994582 | 2.2641316 | 0.0318086 | 27 | * |
gdi_exp | gii_log | -0.2678917 | -1.4713283 | 0.1523530 | 28 | |
gdi_exp | median_age | 0.4827340 | 2.9167436 | 0.0068948 | 28 | * |
gdi_exp | sigi_fam_log | -0.1649747 | -0.8691428 | 0.3924299 | 27 | |
gdi_exp | weapons_google | -0.0443527 | -0.2082376 | 0.8369592 | 22 | |
gdi_exp | wps | 0.5704860 | 3.6755160 | 0.0009958 | 28 | * |
ggi | gii_log | -0.4872814 | -2.9527236 | 0.0063122 | 28 | * |
ggi | median_age | 0.2565616 | 1.4046120 | 0.1711336 | 28 | |
ggi | sigi_fam_log | -0.5676059 | -3.5823702 | 0.0013209 | 27 | * |
ggi | weapons_google | 0.1439558 | 0.6823194 | 0.5021586 | 22 | |
ggi | wps | 0.5833856 | 3.8007932 | 0.0007150 | 28 | * |
gii_log | median_age | -0.7809519 | -6.7332923 | 0.0000002 | 29 | * |
gii_log | sigi_fam_log | 0.8299267 | 7.8719765 | 0.0000000 | 28 | * |
gii_log | weapons_google | -0.2938248 | -1.4742068 | 0.1539828 | 23 | |
gii_log | wps | -0.8248617 | -7.8573228 | 0.0000000 | 29 | * |
median_age | sigi_fam_log | -0.6418743 | -4.4293628 | 0.0001317 | 28 | * |
median_age | weapons_google | 0.4618097 | 2.4969718 | 0.0201281 | 23 | * |
median_age | wps | 0.8352517 | 8.1800899 | 0.0000000 | 29 | * |
sigi_fam_log | weapons_google | -0.3109451 | -1.5345318 | 0.1391558 | 22 | |
sigi_fam_log | wps | -0.7096519 | -5.3298023 | 0.0000113 | 28 | * |
weapons_google | wps | 0.4093577 | 2.1517607 | 0.0421504 | 23 | * |