# occupation form overlap by language
OCCUPATION_OVERLAP_PATH <- here('data/study2/occupation_gender_scores.csv')
by_lang_scores_tidy <- read_csv(OCCUPATION_OVERLAP_PATH)
# Occupation semantics by language
BY_LANGUAGE_OCCUPATION_PATH <- here("data/study2/occupation_gender_score_by_language.csv")
occupation_semantics <- read_csv(BY_LANGUAGE_OCCUPATION_PATH)
# Behavioral IAT by languages measure
BEHAVIORAL_IAT_PATH <- here("data/study0/processed/by_language_df.csv")
iat_behavioral_es <- read_csv(BEHAVIORAL_IAT_PATH) %>%
rename(language_code = "wiki_language_code") %>%
select(language_code, median_country_age,
prop_male,log_age, es_iat_sex_age_order_explicit_resid,
es_iat_sex_age_order_implicit_resid, per_women_stem_2012_2017, n_participants)
LANG_IAT_PATH <- here("data/study1b/iat_es_lang.csv")
iat_lang_es <- read_csv(LANG_IAT_PATH)
all_es_tidy2 <- full_join(by_lang_scores_tidy, iat_behavioral_es) %>%
left_join(iat_lang_es) %>%
left_join(occupation_semantics) %>%
mutate_if(is.numeric, scale) %>%
filter(language_code != "zu") %>%
select(language_code,
subt_occu_semantics_fm,
wiki_occu_semantics_fm,
median_country_age,
lang_es_wiki,
lang_es_sub,
es_iat_sex_age_order_implicit_resid,
es_iat_sex_age_order_explicit_resid,
mean_prop_distinct_occs)
#drop_na()
Here are the models for the classic mediation test (subtitle models)
y ~ x
lm(es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs,
data = all_es_tidy2) %>%
summary()
##
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs,
## data = all_es_tidy2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.41290 -0.42885 -0.04407 0.58367 1.48053
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0007338 0.1712522 0.004 0.99662
## mean_prop_distinct_occs 0.5706558 0.1744658 3.271 0.00336 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8556 on 23 degrees of freedom
## Multiple R-squared: 0.3175, Adjusted R-squared: 0.2878
## F-statistic: 10.7 on 1 and 23 DF, p-value: 0.003357
m ~ x
lm(subt_occu_semantics_fm ~ mean_prop_distinct_occs,
data = all_es_tidy2) %>%
summary()
##
## Call:
## lm(formula = subt_occu_semantics_fm ~ mean_prop_distinct_occs,
## data = all_es_tidy2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3179 -0.4499 0.1155 0.3120 1.5309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1165 0.1539 -0.757 0.459041
## mean_prop_distinct_occs 0.7341 0.1527 4.808 0.000141 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6798 on 18 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.5622, Adjusted R-squared: 0.5379
## F-statistic: 23.11 on 1 and 18 DF, p-value: 0.000141
y ~ x + m
lm(es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs +
subt_occu_semantics_fm,
data = all_es_tidy2) %>%
summary()
##
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs +
## subt_occu_semantics_fm, data = all_es_tidy2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.18854 -0.42849 -0.00183 0.48098 1.33172
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.08296 0.17151 0.484 0.635
## mean_prop_distinct_occs 0.21081 0.25313 0.833 0.416
## subt_occu_semantics_fm 0.43765 0.25855 1.693 0.109
##
## Residual standard error: 0.7457 on 17 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.4325, Adjusted R-squared: 0.3658
## F-statistic: 6.479 on 2 and 17 DF, p-value: 0.008101
multilevel::sobel(pred = all_es_tidy2$mean_prop_distinct_occs,
med = all_es_tidy2$subt_occu_semantics_fm,
out = all_es_tidy2$es_iat_sex_age_order_implicit_resid)
## $`Mod1: Y~X`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.03198319 0.1773796 0.1803093 0.858924234
## pred 0.53207686 0.1759516 3.0239958 0.007294376
##
## $`Mod2: Y~X+M`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.08295606 0.1715113 0.4836769 0.6347866
## pred 0.21080932 0.2531302 0.8328097 0.4164993
## med 0.43764747 0.2585511 1.6926927 0.1087585
##
## $`Mod3: M~X`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1164702 0.1539256 -0.7566653 0.4590405606
## pred 0.7340784 0.1526864 4.8077526 0.0001409921
##
## $Indirect.Effect
## [1] 0.3212675
##
## $SE
## [1] 0.2012165
##
## $z.value
## [1] 1.596626
##
## $N
## [1] 20
library(robmed)
all_es_tidy2 %>%
fit_mediation(
x = "mean_prop_distinct_occs",
y = "es_iat_sex_age_order_implicit_resid",
m = "subt_occu_semantics_fm") %>%
test_mediation()
## Bootstrap results for indirect effect
##
## Indirect effect (ab path):
## Data Boot
## subt_occu_semantics_fm 0.2427 0.2908
##
## 95 percent confidence interval:
## Lower Upper
## subt_occu_semantics_fm -0.09183 0.7031
##
## Number of bootstrap replicates: 5000
With age as a covariate
all_es_tidy2 %>%
fit_mediation(
x = "mean_prop_distinct_occs",
y = "es_iat_sex_age_order_implicit_resid",
m = "subt_occu_semantics_fm",
covariates = "median_country_age") %>%
test_mediation()
## Bootstrap results for indirect effect
##
## Indirect effect (ab path):
## Data Boot
## subt_occu_semantics_fm 0.1947 0.2143
##
## 95 percent confidence interval:
## Lower Upper
## subt_occu_semantics_fm -0.1387 0.631
##
## Number of bootstrap replicates: 5000
all_es_tidy3 <- full_join(by_lang_scores_tidy, iat_behavioral_es) %>%
left_join(iat_lang_es) %>%
left_join(occupation_semantics) %>%
mutate_if(is.numeric, scale) %>%
filter(language_code != "zu") %>%
select(wiki_occu_semantics_fm,
median_country_age,
es_iat_sex_age_order_implicit_resid,
mean_prop_distinct_occs,
language_code)
multilevel::sobel(pred = all_es_tidy3$mean_prop_distinct_occs ,
med = all_es_tidy3$wiki_occu_semantics_fm,
out = all_es_tidy3$es_iat_sex_age_order_implicit_resid)
## $`Mod1: Y~X`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0007337709 0.1712522 0.004284739 0.996618231
## pred 0.5706558112 0.1744658 3.270875531 0.003357305
##
## $`Mod2: Y~X+M`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01104695 0.1654187 0.06678176 0.9473586
## pred 0.30074741 0.2355332 1.27687901 0.2149583
## med 0.38646329 0.2357833 1.63906109 0.1154238
##
## $`Mod3: M~X`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.02668605 0.1461817 -0.1825539 0.856747578
## pred 0.69840632 0.1489249 4.6896560 0.000100884
##
## $Indirect.Effect
## [1] 0.2699084
##
## $SE
## [1] 0.1744406
##
## $z.value
## [1] 1.54728
##
## $N
## [1] 25
Bootstrap mediation models have more power: https://journals.sagepub.com/doi/pdf/10.1111/j.1467-9280.2007.01882.x
mediation_model_bootsrapped <- all_es_tidy3 %>%
fit_mediation(
x = "mean_prop_distinct_occs",
y = "es_iat_sex_age_order_implicit_resid",
m = "wiki_occu_semantics_fm") %>%
test_mediation()
mediation_model_bootsrapped
## Bootstrap results for indirect effect
##
## Indirect effect (ab path):
## Data Boot
## wiki_occu_semantics_fm 0.3496 0.3399
##
## 95 percent confidence interval:
## Lower Upper
## wiki_occu_semantics_fm 0.02426 0.8618
##
## Number of bootstrap replicates: 5000
p_value(mediation_model_bootsrapped)
## [1] 0.0372
multilevel::sobel(med = all_es_tidy2$mean_prop_distinct_occs ,
pred = all_es_tidy2$subt_occu_semantics_fm,
out = all_es_tidy2$es_iat_sex_age_order_implicit_resid)
## $`Mod1: Y~X`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1164034 0.1653174 0.7041211 0.490367275
## pred 0.5990970 0.1696120 3.5321611 0.002380461
##
## $`Mod2: Y~X+M`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.08295606 0.1715113 0.4836769 0.6347866
## pred 0.43764747 0.2585511 1.6926927 0.1087585
## med 0.21080932 0.2531302 0.8328097 0.4164993
##
## $`Mod3: M~X`
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1586617 0.1552625 1.021893 0.3203701708
## pred 0.7658557 0.1592960 4.807753 0.0001409921
##
## $Indirect.Effect
## [1] 0.1614495
##
## $SE
## [1] 0.1967482
##
## $z.value
## [1] 0.8205894
##
## $N
## [1] 20
# wiki
lm(es_iat_sex_age_order_implicit_resid ~
mean_prop_distinct_occs +
wiki_occu_semantics_fm +
median_country_age,
data = all_es_tidy2) %>%
summary()
##
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs +
## wiki_occu_semantics_fm + median_country_age, data = all_es_tidy2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.33644 -0.41351 -0.03424 0.67875 1.12992
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.006843 0.144955 -0.047 0.9628
## mean_prop_distinct_occs 0.204695 0.209075 0.979 0.3387
## wiki_occu_semantics_fm 0.303686 0.208554 1.456 0.1601
## median_country_age 0.446497 0.160836 2.776 0.0113 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.723 on 21 degrees of freedom
## Multiple R-squared: 0.555, Adjusted R-squared: 0.4915
## F-statistic: 8.732 on 3 and 21 DF, p-value: 0.0005916
lm(es_iat_sex_age_order_implicit_resid ~
mean_prop_distinct_occs +
# wiki_occu_semantics_fm +
median_country_age,
data = all_es_tidy2) %>%
summary()
##
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs +
## median_country_age, data = all_es_tidy2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.16469 -0.49085 -0.06985 0.55277 1.18407
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01612 0.14846 -0.109 0.91450
## mean_prop_distinct_occs 0.40525 0.16125 2.513 0.01979 *
## median_country_age 0.47998 0.16319 2.941 0.00755 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7412 on 22 degrees of freedom
## Multiple R-squared: 0.5101, Adjusted R-squared: 0.4656
## F-statistic: 11.45 on 2 and 22 DF, p-value: 0.0003899
lm(es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs +
lang_es_wiki +
median_country_age,
data = all_es_tidy2) %>%
summary()
##
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs +
## lang_es_wiki + median_country_age, data = all_es_tidy2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.13417 -0.47215 0.07784 0.39840 1.09875
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05389 0.13920 -0.387 0.7025
## mean_prop_distinct_occs 0.38358 0.15029 2.552 0.0186 *
## lang_es_wiki 0.36448 0.17286 2.109 0.0472 *
## median_country_age 0.42476 0.15398 2.759 0.0118 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6892 on 21 degrees of freedom
## Multiple R-squared: 0.5957, Adjusted R-squared: 0.538
## F-statistic: 10.31 on 3 and 21 DF, p-value: 0.0002229
# subt
lm(es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs +
subt_occu_semantics_fm +
median_country_age,
data = all_es_tidy2) %>%
summary()
##
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs +
## subt_occu_semantics_fm + median_country_age, data = all_es_tidy2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0514 -0.4102 0.1029 0.3519 1.1883
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.06898 0.16851 0.409 0.688
## mean_prop_distinct_occs 0.18417 0.24904 0.740 0.470
## subt_occu_semantics_fm 0.37338 0.25830 1.446 0.168
## median_country_age 0.25227 0.19444 1.297 0.213
##
## Residual standard error: 0.7311 on 16 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.4865, Adjusted R-squared: 0.3903
## F-statistic: 5.054 on 3 and 16 DF, p-value: 0.01188
lm(es_iat_sex_age_order_implicit_resid~ mean_prop_distinct_occs +
lang_es_sub +
median_country_age, data = all_es_tidy2) %>%
summary()
##
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs +
## lang_es_sub + median_country_age, data = all_es_tidy2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2603 -0.3930 0.0297 0.4701 1.1567
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.06848 0.15347 0.446 0.6608
## mean_prop_distinct_occs 0.33204 0.16097 2.063 0.0539 .
## lang_es_sub 0.27296 0.16236 1.681 0.1100
## median_country_age 0.30536 0.17192 1.776 0.0926 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7008 on 18 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.4948, Adjusted R-squared: 0.4106
## F-statistic: 5.877 on 3 and 18 DF, p-value: 0.005571