gdp_2017
is per capita GDP taken from the World Bank database (the 2017 “GDP per capita (current US$)” indicator); it is missing values for one country (Taiwan). This measure is correlated with the GDP measure from 2013 (gdp_2013
) at r = .96. Models below are presented with both the 2013 and 2017 measures. Note that all of the variables are centered and scaled.
# get GDP 2017 data from World Bank API
gdp_data <- wbstats::wb(indicator = "NY.GDP.PCAP.CD",
startdate = 2017,
enddate = 2017) %>%
select(iso2c, value) %>%
rename(gdp_2017 = value)
# data from Bill von Hippel
INPATH <- "Molly data2.csv"
country_raw <- read_csv(INPATH) %>%
janitor::clean_names() %>%
left_join(gdp_data, by = c("country_code" = "iso2c"))
# save country data with GDP 2017 data merged in (unscaled)
# OUTPATH <- "country_level_data_with_GDP.csv"
# write_csv(country_raw, OUTPATH)
# scale variables
country_level <- country_raw %>%
mutate_if(is.numeric, base::scale)
plot_data <- country_level %>%
select_if(is.numeric)
long_corr <- cor(plot_data,
use = "pairwise.complete.obs") %>%
as.data.frame() %>%
rownames_to_column("v2") %>%
gather("v1", "estimate", -v2)
long_p <- corrplot::cor.mtest(plot_data,
use = "pairwise.complete.obs")$p %>%
as.data.frame(row.names = names(plot_data)) %>%
do(setNames(.,names(plot_data))) %>%
rownames_to_column("v2") %>%
gather("v1", "p", -v2)
corr_df <- full_join(long_corr, long_p) %>%
mutate(estimate_char = case_when(v1 == v2 ~ "",
TRUE ~ as.character(round(estimate,2))),
estimate = case_when(v1 == v2 ~ as.numeric(NA),
TRUE ~ estimate),
estimate_color = case_when(p < .05 ~ estimate, TRUE ~ 0 ))
ggplot(corr_df, aes(v1, fct_rev(v2), fill = estimate_color)) +
geom_tile() + #rectangles for each correlation
#add actual correlation value in the rectangle
geom_text(aes(label = estimate_char), size = 3) +
scale_fill_gradient2(low ="blue", mid = "white", high = "red",
midpoint = 0, space = "Lab", guide = "colourbar",
name = "Pearson's r") +
ggtitle("Pairwise Correlation Coefficients") +
theme_classic(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), #, hjust = .95, vjust = .2),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.ticks = element_blank(),
legend.position = "none")
Pairwise correlation between all country-level measures. Red and blue correspond to positive and negative correlations, respectively. Non-significant correlations ( >= .05) are indicated with white squares.
country_level %>%
ggplot(aes(x = lang_es_sub, y = women_stem, label = country))+
geom_point() +
geom_text_repel(size = 3) +
ylab("Perc. Women in STEM") +
xlab("Linguistic Gender Bias\n(effect size)") +
ggtitle("Perc. Women in STEM vs. Language Bias") +
geom_smooth(method = "lm", alpha = .2) +
theme_classic()
country_level %>%
ggplot(aes(x = lang_es_sub, y = gdp_2017, label = country))+
ylab("GDP (2017)") +
xlab("Linguistic Gender Bias\n(effect size)") +
ggtitle("GDP vs. Language Bias") +
geom_smooth(method = "lm", alpha = .2) +
geom_point() +
geom_text_repel(size = 3) +
theme_classic(base_size = 12)
country_level %>%
ggplot(aes(x = women_stem, y = gdp_2017, label = country))+
ylab("GDP (2017)") +
xlab("Perc. Women in STEM") +
ggtitle("GDP vs. Perc. Women in STEM ") +
geom_smooth(method = "lm", alpha = .2) +
geom_point() +
geom_text_repel(size = 3) +
theme_classic(base_size = 12)
lm(implicit_resid ~ lang_es_sub + gdp_2017,
data = country_level) %>%
summary()
##
## Call:
## lm(formula = implicit_resid ~ lang_es_sub + gdp_2017, data = country_level)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.85379 -0.34616 -0.01211 0.24129 1.53997
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.07787 0.15120 0.515 0.6106
## lang_es_sub 0.43528 0.18426 2.362 0.0253 *
## gdp_2017 0.13578 0.18936 0.717 0.4793
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8289 on 28 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.2982, Adjusted R-squared: 0.2481
## F-statistic: 5.949 on 2 and 28 DF, p-value: 0.007028
lm(implicit_resid ~ lang_es_sub + gdp_2013,
data = country_level) %>%
summary()
##
## Call:
## lm(formula = implicit_resid ~ lang_es_sub + gdp_2013, data = country_level)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.97320 -0.42823 0.02827 0.26116 1.58712
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.08256 0.16201 0.510 0.6146
## lang_es_sub 0.49167 0.21807 2.255 0.0328 *
## gdp_2013 0.05449 0.20952 0.260 0.7969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8659 on 26 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.2677, Adjusted R-squared: 0.2114
## F-statistic: 4.752 on 2 and 26 DF, p-value: 0.01742
lm(women_stem ~ lang_es_sub + gdp_2017,
data = country_level) %>%
summary()
##
## Call:
## lm(formula = women_stem ~ lang_es_sub + gdp_2017, data = country_level)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.42632 -0.33009 -0.03366 0.35866 1.47446
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1859 0.1422 -1.307 0.203
## lang_es_sub -0.2943 0.1730 -1.701 0.101
## gdp_2017 -0.2096 0.1776 -1.180 0.249
##
## Residual standard error: 0.7221 on 25 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.2943, Adjusted R-squared: 0.2379
## F-statistic: 5.213 on 2 and 25 DF, p-value: 0.01281
lm(women_stem ~ lang_es_sub + gdp_2013,
data = country_level) %>%
summary()
##
## Call:
## lm(formula = women_stem ~ lang_es_sub + gdp_2013, data = country_level)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.40641 -0.35171 -0.02454 0.46282 1.36665
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2139 0.1456 -1.469 0.155
## lang_es_sub -0.1728 0.2002 -0.863 0.397
## gdp_2013 -0.2972 0.1908 -1.558 0.133
##
## Residual standard error: 0.7273 on 23 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.2719, Adjusted R-squared: 0.2086
## F-statistic: 4.296 on 2 and 23 DF, p-value: 0.02599