Correlations between all measures at the country level

gdp_2017 is per capita GDP taken from the World Bank database (the 2017 “GDP per capita (current US$)” indicator); it is missing values for one country (Taiwan). This measure is correlated with the GDP measure from 2013 (gdp_2013) at r = .96. Models below are presented with both the 2013 and 2017 measures. Note that all of the variables are centered and scaled.

# get GDP 2017 data from World Bank API
gdp_data <- wbstats::wb(indicator = "NY.GDP.PCAP.CD", 
                        startdate = 2017, 
                        enddate = 2017) %>%
            select(iso2c, value) %>%
            rename(gdp_2017 = value)

# data from Bill von Hippel
INPATH <- "Molly data2.csv" 
country_raw <- read_csv(INPATH) %>%
  janitor::clean_names()  %>%
  left_join(gdp_data, by = c("country_code" = "iso2c"))  

# save country data with GDP 2017 data merged in (unscaled)
# OUTPATH <- "country_level_data_with_GDP.csv"
# write_csv(country_raw, OUTPATH)

# scale variables
country_level <- country_raw %>%
  mutate_if(is.numeric, base::scale)
plot_data <- country_level %>%
  select_if(is.numeric)

long_corr <- cor(plot_data, 
                use = "pairwise.complete.obs") %>%
  as.data.frame() %>%
  rownames_to_column("v2") %>%
  gather("v1", "estimate", -v2)

long_p <- corrplot::cor.mtest(plot_data, 
                             use = "pairwise.complete.obs")$p %>%
  as.data.frame(row.names = names(plot_data)) %>%
  do(setNames(.,names(plot_data))) %>%
      rownames_to_column("v2") %>%
  gather("v1", "p", -v2)

corr_df <- full_join(long_corr, long_p) %>%
  mutate(estimate_char = case_when(v1 == v2 ~ "", 
                              TRUE ~ as.character(round(estimate,2))),
         estimate = case_when(v1 == v2 ~ as.numeric(NA), 
                              TRUE ~ estimate),
         estimate_color = case_when(p < .05 ~ estimate, TRUE ~ 0 ))

ggplot(corr_df, aes(v1, fct_rev(v2), fill = estimate_color)) + 
  geom_tile() + #rectangles for each correlation
  #add actual correlation value in the rectangle
  geom_text(aes(label = estimate_char), size = 3) + 
  scale_fill_gradient2(low ="blue", mid = "white", high = "red", 
                       midpoint = 0, space = "Lab", guide = "colourbar",
                       name = "Pearson's r") +
  ggtitle("Pairwise Correlation Coefficients") +
  theme_classic(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), #, hjust = .95, vjust = .2), 
        axis.title.x = element_blank(), 
        axis.title.y = element_blank(),
        axis.ticks = element_blank(),
        legend.position = "none")
Pairwise correlation between all country-level measures. Red and blue correspond to positive and negative correlations, respectively. Non-significant correlations (\textit{p} >= .05) are indicated with white squares.

Pairwise correlation between all country-level measures. Red and blue correspond to positive and negative correlations, respectively. Non-significant correlations ( >= .05) are indicated with white squares.

Target correlations

STEM ~ language bias

country_level %>%
  ggplot(aes(x = lang_es_sub, y = women_stem, label = country))+
  geom_point() +
  geom_text_repel(size = 3) +
  ylab("Perc. Women in STEM") +
  xlab("Linguistic Gender Bias\n(effect size)") +
  ggtitle("Perc. Women in STEM vs. Language Bias") +
  geom_smooth(method = "lm", alpha = .2) +
  theme_classic()

GDP ~ language bias

country_level %>%
  ggplot(aes(x = lang_es_sub, y = gdp_2017, label = country))+
  ylab("GDP (2017)") +
  xlab("Linguistic Gender Bias\n(effect size)") +
  ggtitle("GDP vs. Language Bias") +
  geom_smooth(method = "lm", alpha = .2) +
  geom_point() +
  geom_text_repel(size = 3) +
  theme_classic(base_size = 12)

GDP ~ STEM

country_level %>%
  ggplot(aes(x = women_stem, y = gdp_2017, label = country))+
  ylab("GDP (2017)") +
  xlab("Perc. Women in STEM") +
  ggtitle("GDP vs. Perc. Women in STEM ") +
  geom_smooth(method = "lm", alpha = .2) +
  geom_point() +
  geom_text_repel(size = 3) +
  theme_classic(base_size = 12)

Models predicting gender-career IAT from language biases

GDP 2017

lm(implicit_resid ~ lang_es_sub + gdp_2017,  
   data = country_level) %>%
  summary()
## 
## Call:
## lm(formula = implicit_resid ~ lang_es_sub + gdp_2017, data = country_level)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.85379 -0.34616 -0.01211  0.24129  1.53997 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.07787    0.15120   0.515   0.6106  
## lang_es_sub  0.43528    0.18426   2.362   0.0253 *
## gdp_2017     0.13578    0.18936   0.717   0.4793  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8289 on 28 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.2982, Adjusted R-squared:  0.2481 
## F-statistic: 5.949 on 2 and 28 DF,  p-value: 0.007028

GDP 2013

lm(implicit_resid ~ lang_es_sub + gdp_2013,  
   data = country_level) %>%
  summary()
## 
## Call:
## lm(formula = implicit_resid ~ lang_es_sub + gdp_2013, data = country_level)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.97320 -0.42823  0.02827  0.26116  1.58712 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.08256    0.16201   0.510   0.6146  
## lang_es_sub  0.49167    0.21807   2.255   0.0328 *
## gdp_2013     0.05449    0.20952   0.260   0.7969  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8659 on 26 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.2677, Adjusted R-squared:  0.2114 
## F-statistic: 4.752 on 2 and 26 DF,  p-value: 0.01742

Models predicting women-in-STEM bias from language biases

GDP 2017

lm(women_stem ~  lang_es_sub + gdp_2017,  
   data = country_level) %>%
  summary()
## 
## Call:
## lm(formula = women_stem ~ lang_es_sub + gdp_2017, data = country_level)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.42632 -0.33009 -0.03366  0.35866  1.47446 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.1859     0.1422  -1.307    0.203
## lang_es_sub  -0.2943     0.1730  -1.701    0.101
## gdp_2017     -0.2096     0.1776  -1.180    0.249
## 
## Residual standard error: 0.7221 on 25 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.2943, Adjusted R-squared:  0.2379 
## F-statistic: 5.213 on 2 and 25 DF,  p-value: 0.01281

GDP 2013

lm(women_stem ~  lang_es_sub + gdp_2013,  
   data = country_level) %>%
  summary()
## 
## Call:
## lm(formula = women_stem ~ lang_es_sub + gdp_2013, data = country_level)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.40641 -0.35171 -0.02454  0.46282  1.36665 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.2139     0.1456  -1.469    0.155
## lang_es_sub  -0.1728     0.2002  -0.863    0.397
## gdp_2013     -0.2972     0.1908  -1.558    0.133
## 
## Residual standard error: 0.7273 on 23 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.2719, Adjusted R-squared:  0.2086 
## F-statistic: 4.296 on 2 and 23 DF,  p-value: 0.02599