gdp_2013 is the measure Bill sent; it is missing three values. gdp_2017 is a measure I obtained.
gdp_data <- wb(indicator = "NY.GDP.PCAP.CD",
startdate = 2017,
enddate = 2017) %>%
select(iso2c, value) %>%
rename(gdp_2017 = value)
#wbsearch("gdp per capita")
#NY.GDP.MKTP.CD
#NY.GDP.PCAP.PP.CD
PATH <- "Molly data2.csv"
bill_data <- read_csv(PATH) %>%
janitor::clean_names() %>%
left_join(gdp_data, by = c("country_code" = "iso2c")) %>%
mutate_if(is.numeric, scale)
plot_data <- bill_data %>%
select_if(is.numeric)
long_corr <- cor(plot_data,
use = "pairwise.complete.obs") %>%
as.data.frame() %>%
rownames_to_column("v2") %>%
gather("v1", "estimate", -v2)
long_p <- corrplot::cor.mtest(plot_data,
use = "pairwise.complete.obs")$p %>%
as.data.frame(row.names = names(plot_data)) %>%
do(setNames(.,names(plot_data))) %>%
rownames_to_column("v2") %>%
gather("v1", "p", -v2)
corr_df <- full_join(long_corr, long_p) %>%
mutate(estimate_char = case_when(v1 == v2 ~ "",
TRUE ~ as.character(round(estimate,2))),
estimate = case_when(v1 == v2 ~ as.numeric(NA),
TRUE ~ estimate),
estimate_color = case_when(p < .05 ~ estimate, TRUE ~ 0 ))
ggplot(corr_df, aes(v1, fct_rev(v2), fill = estimate_color)) +
geom_tile() + #rectangles for each correlation
#add actual correlation value in the rectangle
geom_text(aes(label = estimate_char), size=3) +
scale_fill_gradient2(low ="blue", mid = "white", high = "red",
midpoint = 0, space = "Lab", guide = "colourbar",
name = "Pearson's r") +
ggtitle("Pairwise Correlation Coefficients") +
theme_classic(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), #, hjust = .95, vjust = .2),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.ticks = element_blank(),
legend.position = "none")
bill_data %>%
ggplot(aes(x = lang_es_sub, y = women_stem))+
geom_point(size = 3) +
geom_smooth(method = "lm", alpha = .2) +
theme_classic()
bill_data %>%
ggplot(aes(x = lang_es_sub, y = gdp_2017))+
geom_point(size = 3) +
geom_smooth(method = "lm", alpha = .2) +
theme_classic()
Bill’s measure:
lmer(women_stem ~ lang_es_sub + gdp_2013 + (1|language_name),
data = bill_data) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: women_stem ~ lang_es_sub + gdp_2013 + (1 | language_name)
## Data: bill_data
##
## REML criterion at convergence: 55
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.34115 -0.35934 0.00647 0.28640 1.99820
##
## Random effects:
## Groups Name Variance Std.Dev.
## language_name (Intercept) 0.4927 0.7019
## Residual 0.1725 0.4153
## Number of obs: 26, groups: language_name, 16
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.1068 0.2009 -0.532
## lang_es_sub -0.1045 0.2163 -0.483
## gdp_2013 -0.4438 0.1850 -2.398
##
## Correlation of Fixed Effects:
## (Intr) lng_s_
## lang_es_sub 0.117
## gdp_2013 -0.129 -0.524
My measure:
lmer(women_stem ~ lang_es_sub + gdp_2017 + (1|language_name),
data = bill_data) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: women_stem ~ lang_es_sub + gdp_2017 + (1 | language_name)
## Data: bill_data
##
## REML criterion at convergence: 60.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.22030 -0.34556 -0.06633 0.47187 1.87289
##
## Random effects:
## Groups Name Variance Std.Dev.
## language_name (Intercept) 0.3913 0.6255
## Residual 0.2104 0.4587
## Number of obs: 28, groups: language_name, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.0953 0.1817 -0.524
## lang_es_sub -0.2643 0.1814 -1.458
## gdp_2017 -0.3018 0.1756 -1.718
##
## Correlation of Fixed Effects:
## (Intr) lng_s_
## lang_es_sub 0.215
## gdp_2017 -0.160 -0.471
additive:
lmer(women_stem ~ lang_es_sub + median_country_age + gdp_2017 + (1|language_name),
data = bill_data) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: women_stem ~ lang_es_sub + median_country_age + gdp_2017 + (1 |
## language_name)
## Data: bill_data
##
## REML criterion at convergence: 61.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.16106 -0.36787 -0.05786 0.48451 1.78441
##
## Random effects:
## Groups Name Variance Std.Dev.
## language_name (Intercept) 0.3948 0.6283
## Residual 0.2236 0.4728
## Number of obs: 28, groups: language_name, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.10492 0.18728 -0.560
## lang_es_sub -0.27544 0.18853 -1.461
## median_country_age 0.04798 0.20063 0.239
## gdp_2017 -0.31225 0.18814 -1.660
##
## Correlation of Fixed Effects:
## (Intr) lng_s_ mdn_c_
## lang_es_sub 0.247
## mdn_cntry_g -0.186 -0.216
## gdp_2017 -0.094 -0.376 -0.304
interactive:
lmer(women_stem ~ lang_es_sub * median_country_age + gdp_2017 + (1|language_name),
data = bill_data) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: women_stem ~ lang_es_sub * median_country_age + gdp_2017 + (1 |
## language_name)
## Data: bill_data
##
## REML criterion at convergence: 56.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.29879 -0.60406 0.02726 0.34444 1.52770
##
## Random effects:
## Groups Name Variance Std.Dev.
## language_name (Intercept) 0.2259 0.4753
## Residual 0.2192 0.4681
## Number of obs: 28, groups: language_name, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.3702 0.1814 -2.041
## lang_es_sub -0.3272 0.1626 -2.012
## median_country_age 0.3967 0.2230 1.779
## gdp_2017 -0.3848 0.1734 -2.220
## lang_es_sub:median_country_age 0.5089 0.1851 2.748
##
## Correlation of Fixed Effects:
## (Intr) lng_s_ mdn_c_ g_2017
## lang_es_sub 0.261
## mdn_cntry_g -0.425 -0.234
## gdp_2017 0.020 -0.364 -0.381
## lng_s_sb:__ -0.504 -0.089 0.567 -0.210
lmer(women_stem ~ lang_es_wiki * median_country_age + gdp_2017 + (1|language_name),
data = bill_data) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: women_stem ~ lang_es_wiki * median_country_age + gdp_2017 + (1 |
## language_name)
## Data: bill_data
##
## REML criterion at convergence: 81.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.24681 -0.25976 -0.00787 0.25881 1.73528
##
## Random effects:
## Groups Name Variance Std.Dev.
## language_name (Intercept) 0.7772 0.8816
## Residual 0.2288 0.4783
## Number of obs: 32, groups: language_name, 22
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.3003 0.2388 1.258
## lang_es_wiki 0.2206 0.2436 0.905
## median_country_age -0.2935 0.2306 -1.272
## gdp_2017 -0.4988 0.2216 -2.251
## lang_es_wiki:median_country_age -0.1767 0.2213 -0.798
##
## Correlation of Fixed Effects:
## (Intr) lng_s_ mdn_c_ g_2017
## lang_es_wik 0.244
## mdn_cntry_g -0.268 -0.258
## gdp_2017 -0.140 -0.527 -0.103
## lng_s_wk:__ -0.450 -0.251 0.510 0.243
mod <- lm(women_stem ~ gdp_2017,
data = bill_data)
bill_data %>%
modelr::add_residuals(mod, "women_stem_gdp_resid") %>%
mutate(median_country_age_tile = ntile(median_country_age, 2),
country_age = ifelse(median_country_age_tile == 1, "younger", "older")) %>%
ggplot(aes(x = lang_es_sub, y = women_stem_gdp_resid, color = country_age)) +
geom_point(size = 3) +
geom_smooth(method = "lm", alpha = .2) +
theme_classic()
lang_level <- bill_data %>%
group_by(language_name) %>%
summarize_if(is.numeric, mean, na.rm = T)
lm(women_stem ~ lang_es_sub + gdp_2017,
data = lang_level) %>%
summary()
##
## Call:
## lm(formula = women_stem ~ lang_es_sub + gdp_2017, data = lang_level)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.39590 -0.30211 -0.03374 0.37335 1.47381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1036 0.1829 -0.567 0.5793
## lang_es_sub -0.3425 0.1885 -1.817 0.0893 .
## gdp_2017 -0.1506 0.2307 -0.653 0.5238
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.752 on 15 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.3237, Adjusted R-squared: 0.2336
## F-statistic: 3.59 on 2 and 15 DF, p-value: 0.0532
additive:
lm(women_stem ~ lang_es_sub + median_country_age + gdp_2017, data = lang_level) %>%
summary()
##
## Call:
## lm(formula = women_stem ~ lang_es_sub + median_country_age +
## gdp_2017, data = lang_level)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.41746 -0.25454 -0.05535 0.35757 1.42377
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.11819 0.19701 -0.600 0.5581
## lang_es_sub -0.35673 0.20229 -1.764 0.0996 .
## median_country_age 0.07654 0.29547 0.259 0.7994
## gdp_2017 -0.17817 0.26095 -0.683 0.5059
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7765 on 14 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.327, Adjusted R-squared: 0.1827
## F-statistic: 2.267 on 3 and 14 DF, p-value: 0.1256
interactive
lm(women_stem ~ lang_es_sub * median_country_age + gdp_2017 ,
data = lang_level) %>%
summary()
##
## Call:
## lm(formula = women_stem ~ lang_es_sub * median_country_age +
## gdp_2017, data = lang_level)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4148 -0.3087 0.1236 0.3030 0.9878
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4186 0.1958 -2.138 0.0521 .
## lang_es_sub -0.3810 0.1671 -2.280 0.0401 *
## median_country_age 0.5999 0.3092 1.940 0.0744 .
## gdp_2017 -0.4585 0.2382 -1.925 0.0764 .
## lang_es_sub:median_country_age 0.5431 0.1974 2.751 0.0165 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6406 on 13 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.5746, Adjusted R-squared: 0.4438
## F-statistic: 4.391 on 4 and 13 DF, p-value: 0.01829
mod <- lm(women_stem ~ gdp_2017,
data = lang_level)
lang_level %>%
modelr::add_residuals(mod, "women_stem_gdp_resid") %>%
mutate(median_country_age_tile = ntile(median_country_age, 2),
country_age = ifelse(median_country_age_tile == 1, "younger", "older")) %>%
ggplot(aes(x = lang_es_sub, y = women_stem_gdp_resid, color = country_age)) +
geom_point(size = 3) +
geom_smooth(method = "lm", alpha = .2) +
theme_classic()
lang_level %>%
ggplot(aes(x = lang_es_sub, y = women_stem))+
geom_point(size = 3) +
geom_smooth(method = "lm", alpha = .2) +
theme_classic()
lang_level %>%
ggplot(aes(x = lang_es_sub, y = gdp_2017))+
geom_point(size = 3) +
geom_smooth(method = "lm", alpha = .2) +
theme_classic()