ALPHA <- .1
Six gender parity indicators:
We exclude GPI because it’s missing for about half of countries - shoud look into this.
# read in country to language
all_countries <- read_csv("data/other/country_to_lang.csv")
all_countries[all_countries$country_name == "UK", "country_code"] = "GB"
# read in bias measures
bias_measures <- read_csv("data/other/all_es_wide.csv") %>%
select(-wps_index)
#bias_measures[bias_measures$weapons_google < .5, "weapons_google"] = NA
#bias_measures[bias_measures$flowers_google < .9, "flowers_google"] = NA
bias_measures %>%
gather("measure", "value", c(-1,-6)) %>%
ggplot(aes(x = measure, y = value)) +
geom_boxplot() +
theme_bw()
#read in gender measures
hdi <- read_csv("data/gender_measures/HDI_complete.csv") %>%
mutate_at(3:28, as.numeric) %>%
mutate(mean_value = rowMeans(.[,3:28], na.rm = T)) %>%
mutate(country_code = countrycode(country, "country.name", "iso2c")) %>%
select(kpi_name, country_code, mean_value) %>%
spread(kpi_name, mean_value, -2) %>%
select(country_code, `Median Age`) %>%
rename(median_age = `Median Age`)
all_gender_measures <- read_csv("data/gender_measures/all_gender_measures.csv") %>%
select(-sigi, -sigi_physical, -wb_cpia, -contains("schooling"), -gpi_literacy, -contains("ggi_"), -sigi_son) %>%
left_join(hdi)
all_gender_measures %>%
gather(measure, value, -1:-3) %>%
ggplot(aes(x = value)) +
geom_histogram() +
facet_wrap(~measure, scales = "free", ncol = 4) +
ggtitle("raw") +
theme_bw()
Transform skewed measures.
all_gender_measures_transformed <- all_gender_measures %>%
mutate(sigi_fam_log = log(sigi_fam),
gii_log = log(gii),
gdi_exp = gdi^10) %>%
select(-sigi_fam, -gii, -gdi) %>%
mutate(sigi_fam_log = ifelse(is.infinite(sigi_fam_log), NA, sigi_fam_log))
all_gender_measures_transformed %>%
gather(measure, value, -1:-3) %>%
ggplot(aes(x = value)) +
geom_histogram() +
ggtitle("transformed")+
facet_wrap(~measure, scales = "free", ncol = 4) +
theme_bw()
# merge together
full_df_partial = all_countries %>%
left_join(bias_measures,
by = "wiki_language_code") %>%
left_join(all_gender_measures_transformed, by = "country_code") %>%
select(-contains(".y")) %>%
#select(c(-10:-13, -16)) %>%
select(1:5, 8, everything())
mod1 <- lm(career_behavioral_iat ~ median_age,
data = full_df_partial)
full_df = full_df_partial %>%
modelr::add_residuals(mod1) %>%
rename(career_behavioral_iat_resid = resid) %>%
select(1:6, career_behavioral_iat_resid, everything())
corr_mat <- cor(full_df[,c(-1:-3)],
use = "pairwise.complete.obs")
p.mat <- cor.mtest(full_df[,c(-1:-3)],
conf.level = (1-ALPHA),
use = "pairwise.complete.obs")$p
cols = rev(colorRampPalette(c("red", "white", "blue"))(100))
corrplot(corr_mat, method = "color", col = cols,
type = "upper", order = "original", number.cex = .7,
addCoef.col = "black",
p.mat = p.mat, sig.level = ALPHA, insig = "blank",
tl.col = "black", tl.srt = 90,
diag = FALSE)
MEASURES <- names(full_df)[-1:-3]
unique_pairs <- tidyr::crossing(MEASURES ,MEASURES) %>%
magrittr::set_colnames(c("test1", "test2")) %>%
rowwise() %>%
mutate(test1 = sort(c(test1, test2))[1],
test2 = sort(c(test1, test2))[2]) %>%
filter(test1 != test2) %>%
unique()
get_corr <- function(test1, test2, df){
df %>%
select(test1, test2) %>%
do(tidy(cor.test(unlist(.[,1]), unlist(.[,2])))) %>%
mutate(test1 = test1,
test2 = test2)
}
map2_df(unique_pairs$test1, unique_pairs$test2, get_corr, full_df) %>%
select(test1, test2, estimate, statistic, p.value, parameter) %>%
mutate(sig = ifelse(p.value < ALPHA, "*", "")) %>%
#arrange(p.value) %>%
kable()
| test1 | test2 | estimate | statistic | p.value | parameter | sig |
|---|---|---|---|---|---|---|
| career_behavioral_iat | career_behavioral_iat_resid | 0.7566632 | 7.0397187 | 0.0000000 | 37 | * |
| career_behavioral_iat | career_google | 0.2387366 | 1.4954187 | 0.1432865 | 37 | |
| career_behavioral_iat | career_hand | 0.4450900 | 2.6766289 | 0.0121068 | 29 | * |
| career_behavioral_iat | flowers_google | 0.1403559 | 0.8622875 | 0.3940839 | 37 | |
| career_behavioral_iat | gdi_exp | 0.0180289 | 0.1081909 | 0.9144450 | 36 | |
| career_behavioral_iat | ggi | 0.1713978 | 1.0438335 | 0.3035214 | 36 | |
| career_behavioral_iat | gii_log | -0.4891714 | -3.3651343 | 0.0018293 | 36 | * |
| career_behavioral_iat | median_age | 0.6538049 | 5.2558919 | 0.0000064 | 37 | * |
| career_behavioral_iat | sigi_fam_log | -0.5532607 | -3.9292949 | 0.0003829 | 35 | * |
| career_behavioral_iat | weapons_google | 0.5251447 | 3.7535628 | 0.0005979 | 37 | * |
| career_behavioral_iat | wps | 0.4970552 | 3.4843870 | 0.0012863 | 37 | * |
| career_behavioral_iat_resid | career_google | 0.2723334 | 1.7216112 | 0.0934922 | 37 | * |
| career_behavioral_iat_resid | career_hand | 0.4081303 | 2.4074835 | 0.0226513 | 29 | * |
| career_behavioral_iat_resid | flowers_google | 0.0068319 | 0.0415577 | 0.9670747 | 37 | |
| career_behavioral_iat_resid | gdi_exp | -0.2756604 | -1.7206277 | 0.0939043 | 36 | * |
| career_behavioral_iat_resid | ggi | -0.0802939 | -0.4833240 | 0.6317924 | 36 | |
| career_behavioral_iat_resid | gii_log | 0.0406180 | 0.2439091 | 0.8086859 | 36 | |
| career_behavioral_iat_resid | median_age | 0.0000000 | 0.0000000 | 1.0000000 | 37 | |
| career_behavioral_iat_resid | sigi_fam_log | -0.2065382 | -1.2488231 | 0.2200183 | 35 | |
| career_behavioral_iat_resid | weapons_google | 0.3713526 | 2.4328155 | 0.0199352 | 37 | * |
| career_behavioral_iat_resid | wps | -0.0784735 | -0.4788123 | 0.6348897 | 37 | |
| career_google | career_hand | 0.7135672 | 5.4849400 | 0.0000066 | 29 | * |
| career_google | flowers_google | 0.2833024 | 1.7968784 | 0.0805220 | 37 | * |
| career_google | gdi_exp | 0.0991326 | 0.5977400 | 0.5537553 | 36 | |
| career_google | ggi | 0.3155068 | 1.9949355 | 0.0536626 | 36 | * |
| career_google | gii_log | -0.3211243 | -2.0344987 | 0.0493183 | 36 | * |
| career_google | median_age | 0.0499719 | 0.3043477 | 0.7625683 | 37 | |
| career_google | sigi_fam_log | -0.2895104 | -1.7893981 | 0.0822074 | 35 | * |
| career_google | weapons_google | 0.2985659 | 1.9028982 | 0.0648572 | 37 | * |
| career_google | wps | 0.3320650 | 2.1413819 | 0.0388958 | 37 | * |
| career_hand | flowers_google | 0.3338619 | 1.9073408 | 0.0664250 | 29 | * |
| career_hand | gdi_exp | 0.0418846 | 0.2257537 | 0.8229765 | 29 | |
| career_hand | ggi | 0.4117935 | 2.3911565 | 0.0237578 | 28 | * |
| career_hand | gii_log | -0.3481839 | -1.9653986 | 0.0593581 | 28 | * |
| career_hand | median_age | 0.1629994 | 0.8896769 | 0.3809606 | 29 | |
| career_hand | sigi_fam_log | -0.3377715 | -1.8989217 | 0.0679238 | 28 | * |
| career_hand | weapons_google | 0.5723906 | 3.7591327 | 0.0007662 | 29 | * |
| career_hand | wps | 0.3164743 | 1.7966101 | 0.0828217 | 29 | * |
| flowers_google | gdi_exp | 0.0092419 | 0.0554539 | 0.9560835 | 36 | |
| flowers_google | ggi | 0.3823689 | 2.4828885 | 0.0178286 | 36 | * |
| flowers_google | gii_log | -0.2903693 | -1.8206593 | 0.0769794 | 36 | * |
| flowers_google | median_age | 0.2067689 | 1.2855059 | 0.2066036 | 37 | |
| flowers_google | sigi_fam_log | -0.3115866 | -1.9399461 | 0.0604780 | 35 | * |
| flowers_google | weapons_google | 0.5031787 | 3.5417480 | 0.0010945 | 37 | * |
| flowers_google | wps | 0.4333633 | 2.9249772 | 0.0058530 | 37 | * |
| gdi_exp | ggi | 0.3056067 | 2.0800727 | 0.0436584 | 42 | * |
| gdi_exp | gii_log | -0.1555047 | -1.0201960 | 0.3134763 | 42 | |
| gdi_exp | median_age | 0.3308739 | 2.3257684 | 0.0247030 | 44 | * |
| gdi_exp | sigi_fam_log | -0.0993460 | -0.6470367 | 0.5211290 | 42 | |
| gdi_exp | weapons_google | -0.0402823 | -0.2418900 | 0.8102380 | 36 | |
| gdi_exp | wps | 0.4281157 | 3.1064158 | 0.0033488 | 43 | * |
| ggi | gii_log | -0.4748966 | -3.4971999 | 0.0011249 | 42 | * |
| ggi | median_age | 0.2786336 | 1.9024650 | 0.0638211 | 43 | * |
| ggi | sigi_fam_log | -0.5573804 | -4.2986386 | 0.0001034 | 41 | * |
| ggi | weapons_google | 0.2453969 | 1.5188228 | 0.1375415 | 36 | |
| ggi | wps | 0.5705411 | 4.5554970 | 0.0000428 | 43 | * |
| gii_log | median_age | -0.8006062 | -8.7616930 | 0.0000000 | 43 | * |
| gii_log | sigi_fam_log | 0.7240202 | 6.7209832 | 0.0000000 | 41 | * |
| gii_log | weapons_google | -0.2702176 | -1.6839498 | 0.1008443 | 36 | |
| gii_log | wps | -0.8311771 | -9.8027272 | 0.0000000 | 43 | * |
| median_age | sigi_fam_log | -0.5790572 | -4.6574184 | 0.0000308 | 43 | * |
| median_age | weapons_google | 0.3734384 | 2.4486878 | 0.0191977 | 37 | * |
| median_age | wps | 0.8312096 | 9.9173126 | 0.0000000 | 44 | * |
| sigi_fam_log | weapons_google | -0.2877750 | -1.7776997 | 0.0841436 | 35 | * |
| sigi_fam_log | wps | -0.6535854 | -5.5964887 | 0.0000015 | 42 | * |
| weapons_google | wps | 0.4334082 | 2.9253505 | 0.0058474 | 37 | * |
full_df %>%
select(1, 8, c(10:15)) %>%
gather("measure", "value", c(-1,-2)) %>%
ggplot(aes(x = value, y = flowers_google)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~measure, scales = "free") +
theme_bw()
full_df %>%
select(1, 9, c(10:15)) %>%
gather("measure", "value", c(-1,-2)) %>%
ggplot(aes(x = value, y = weapons_google)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~measure, scales = "free") +
theme_bw()
full_df %>%
select(1, 4, c(10:15)) %>%
gather("measure", "value", c(-1,-2)) %>%
ggplot(aes(x = value, y =career_google)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~measure, scales = "free") +
theme_bw()