# occupation form overlap by language
OCCUPATION_OVERLAP_PATH <- here('data/study2/occupation_gender_scores.csv')
by_lang_scores_tidy <- read_csv(OCCUPATION_OVERLAP_PATH)

# Occupation semantics by language
BY_LANGUAGE_OCCUPATION_PATH  <- here("data/study2/occupation_gender_score_by_language.csv")
occupation_semantics <- read_csv(BY_LANGUAGE_OCCUPATION_PATH) 

# Behavioral IAT by languages measure
BEHAVIORAL_IAT_PATH <- here("data/study0/processed/by_language_df.csv")
iat_behavioral_es <- read_csv(BEHAVIORAL_IAT_PATH) %>%
  rename(language_code = "wiki_language_code") %>%
  select(language_code, median_country_age, 
         prop_male,log_age, es_iat_sex_age_order_explicit_resid,
         es_iat_sex_age_order_implicit_resid, per_women_stem_2012_2017, n_participants)

LANG_IAT_PATH <- here("data/study1b/iat_es_lang.csv")
iat_lang_es <- read_csv(LANG_IAT_PATH)

all_es_tidy2 <- full_join(by_lang_scores_tidy, iat_behavioral_es) %>%
  left_join(iat_lang_es)  %>%
  left_join(occupation_semantics)  %>%
  mutate_if(is.numeric, scale) %>%
  filter(language_code != "zu") %>%
  select(language_code, 
         subt_occu_semantics_fm, 
         wiki_occu_semantics_fm,
         median_country_age, 
         lang_es_wiki,
         lang_es_sub,
         es_iat_sex_age_order_implicit_resid, 
         es_iat_sex_age_order_explicit_resid,
         mean_prop_distinct_occs) 
 #drop_na()

Mediation models

Here are the models for the classic mediation test (subtitle models)

y ~ x

lm(es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs, 
  data = all_es_tidy2) %>%
  summary()
## 
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs, 
##     data = all_es_tidy2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.41290 -0.42885 -0.04407  0.58367  1.48053 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             0.0007338  0.1712522   0.004  0.99662   
## mean_prop_distinct_occs 0.5706558  0.1744658   3.271  0.00336 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8556 on 23 degrees of freedom
## Multiple R-squared:  0.3175, Adjusted R-squared:  0.2878 
## F-statistic:  10.7 on 1 and 23 DF,  p-value: 0.003357

m ~ x

lm(subt_occu_semantics_fm ~ mean_prop_distinct_occs, 
  data = all_es_tidy2) %>%
  summary()
## 
## Call:
## lm(formula = subt_occu_semantics_fm ~ mean_prop_distinct_occs, 
##     data = all_es_tidy2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3179 -0.4499  0.1155  0.3120  1.5309 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -0.1165     0.1539  -0.757 0.459041    
## mean_prop_distinct_occs   0.7341     0.1527   4.808 0.000141 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6798 on 18 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.5622, Adjusted R-squared:  0.5379 
## F-statistic: 23.11 on 1 and 18 DF,  p-value: 0.000141

y ~ x + m

lm(es_iat_sex_age_order_implicit_resid ~  mean_prop_distinct_occs +
     subt_occu_semantics_fm, 
  data = all_es_tidy2) %>%
  summary()
## 
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs + 
##     subt_occu_semantics_fm, data = all_es_tidy2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.18854 -0.42849 -0.00183  0.48098  1.33172 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)              0.08296    0.17151   0.484    0.635
## mean_prop_distinct_occs  0.21081    0.25313   0.833    0.416
## subt_occu_semantics_fm   0.43765    0.25855   1.693    0.109
## 
## Residual standard error: 0.7457 on 17 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.4325, Adjusted R-squared:  0.3658 
## F-statistic: 6.479 on 2 and 17 DF,  p-value: 0.008101

Formal tests of mediation

Sobel test

multilevel::sobel(pred = all_es_tidy2$mean_prop_distinct_occs, 
      med = all_es_tidy2$subt_occu_semantics_fm,
      out = all_es_tidy2$es_iat_sex_age_order_implicit_resid)
## $`Mod1: Y~X`
##               Estimate Std. Error   t value    Pr(>|t|)
## (Intercept) 0.03198319  0.1773796 0.1803093 0.858924234
## pred        0.53207686  0.1759516 3.0239958 0.007294376
## 
## $`Mod2: Y~X+M`
##               Estimate Std. Error   t value  Pr(>|t|)
## (Intercept) 0.08295606  0.1715113 0.4836769 0.6347866
## pred        0.21080932  0.2531302 0.8328097 0.4164993
## med         0.43764747  0.2585511 1.6926927 0.1087585
## 
## $`Mod3: M~X`
##               Estimate Std. Error    t value     Pr(>|t|)
## (Intercept) -0.1164702  0.1539256 -0.7566653 0.4590405606
## pred         0.7340784  0.1526864  4.8077526 0.0001409921
## 
## $Indirect.Effect
## [1] 0.3212675
## 
## $SE
## [1] 0.2012165
## 
## $z.value
## [1] 1.596626
## 
## $N
## [1] 20

Bootstrap

library(robmed)

all_es_tidy2 %>%
  fit_mediation(
    x = "mean_prop_distinct_occs",
    y = "es_iat_sex_age_order_implicit_resid",
    m = "subt_occu_semantics_fm") %>%
   test_mediation()
## Bootstrap results for indirect effect
## 
## Indirect effect (ab path):
##                          Data   Boot
## subt_occu_semantics_fm 0.2427 0.2908
## 
## 95 percent confidence interval:
##                           Lower  Upper
## subt_occu_semantics_fm -0.09183 0.7031
## 
## Number of bootstrap replicates: 5000

With age as a covariate

all_es_tidy2 %>%
  fit_mediation(
    x = "mean_prop_distinct_occs",
    y = "es_iat_sex_age_order_implicit_resid",
    m = "subt_occu_semantics_fm",
    covariates = "median_country_age") %>%
   test_mediation()
## Bootstrap results for indirect effect
## 
## Indirect effect (ab path):
##                          Data   Boot
## subt_occu_semantics_fm 0.1947 0.2143
## 
## 95 percent confidence interval:
##                          Lower Upper
## subt_occu_semantics_fm -0.1387 0.631
## 
## Number of bootstrap replicates: 5000

Wiki models

Sobel test

all_es_tidy3 <- full_join(by_lang_scores_tidy, iat_behavioral_es) %>%
  left_join(iat_lang_es)  %>%
  left_join(occupation_semantics)  %>%
  mutate_if(is.numeric, scale) %>%
  filter(language_code != "zu")  %>%
  select(wiki_occu_semantics_fm,
         median_country_age, 
         es_iat_sex_age_order_implicit_resid, 
         mean_prop_distinct_occs,
         language_code) 
multilevel::sobel(pred = all_es_tidy3$mean_prop_distinct_occs , 
                  med = all_es_tidy3$wiki_occu_semantics_fm,
                  out = all_es_tidy3$es_iat_sex_age_order_implicit_resid)
## $`Mod1: Y~X`
##                 Estimate Std. Error     t value    Pr(>|t|)
## (Intercept) 0.0007337709  0.1712522 0.004284739 0.996618231
## pred        0.5706558112  0.1744658 3.270875531 0.003357305
## 
## $`Mod2: Y~X+M`
##               Estimate Std. Error    t value  Pr(>|t|)
## (Intercept) 0.01104695  0.1654187 0.06678176 0.9473586
## pred        0.30074741  0.2355332 1.27687901 0.2149583
## med         0.38646329  0.2357833 1.63906109 0.1154238
## 
## $`Mod3: M~X`
##                Estimate Std. Error    t value    Pr(>|t|)
## (Intercept) -0.02668605  0.1461817 -0.1825539 0.856747578
## pred         0.69840632  0.1489249  4.6896560 0.000100884
## 
## $Indirect.Effect
## [1] 0.2699084
## 
## $SE
## [1] 0.1744406
## 
## $z.value
## [1] 1.54728
## 
## $N
## [1] 25

Bootstrap

Bootstrap mediation models have more power: https://journals.sagepub.com/doi/pdf/10.1111/j.1467-9280.2007.01882.x

mediation_model_bootsrapped <- all_es_tidy3 %>%
  fit_mediation(
    x = "mean_prop_distinct_occs",
    y = "es_iat_sex_age_order_implicit_resid",
    m = "wiki_occu_semantics_fm") %>%
   test_mediation()

mediation_model_bootsrapped
## Bootstrap results for indirect effect
## 
## Indirect effect (ab path):
##                          Data   Boot
## wiki_occu_semantics_fm 0.3496 0.3399
## 
## 95 percent confidence interval:
##                          Lower  Upper
## wiki_occu_semantics_fm 0.02426 0.8618
## 
## Number of bootstrap replicates: 5000
p_value(mediation_model_bootsrapped)
## [1] 0.0372

Occupation as mediator

multilevel::sobel(med = all_es_tidy2$mean_prop_distinct_occs , 
                  pred = all_es_tidy2$subt_occu_semantics_fm,
                  out = all_es_tidy2$es_iat_sex_age_order_implicit_resid)
## $`Mod1: Y~X`
##              Estimate Std. Error   t value    Pr(>|t|)
## (Intercept) 0.1164034  0.1653174 0.7041211 0.490367275
## pred        0.5990970  0.1696120 3.5321611 0.002380461
## 
## $`Mod2: Y~X+M`
##               Estimate Std. Error   t value  Pr(>|t|)
## (Intercept) 0.08295606  0.1715113 0.4836769 0.6347866
## pred        0.43764747  0.2585511 1.6926927 0.1087585
## med         0.21080932  0.2531302 0.8328097 0.4164993
## 
## $`Mod3: M~X`
##              Estimate Std. Error  t value     Pr(>|t|)
## (Intercept) 0.1586617  0.1552625 1.021893 0.3203701708
## pred        0.7658557  0.1592960 4.807753 0.0001409921
## 
## $Indirect.Effect
## [1] 0.1614495
## 
## $SE
## [1] 0.1967482
## 
## $z.value
## [1] 0.8205894
## 
## $N
## [1] 20

Additive regression models

# wiki
lm(es_iat_sex_age_order_implicit_resid ~ 
     mean_prop_distinct_occs + 
     wiki_occu_semantics_fm +
     median_country_age,
   data = all_es_tidy2) %>%
  summary()
## 
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs + 
##     wiki_occu_semantics_fm + median_country_age, data = all_es_tidy2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.33644 -0.41351 -0.03424  0.67875  1.12992 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             -0.006843   0.144955  -0.047   0.9628  
## mean_prop_distinct_occs  0.204695   0.209075   0.979   0.3387  
## wiki_occu_semantics_fm   0.303686   0.208554   1.456   0.1601  
## median_country_age       0.446497   0.160836   2.776   0.0113 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.723 on 21 degrees of freedom
## Multiple R-squared:  0.555,  Adjusted R-squared:  0.4915 
## F-statistic: 8.732 on 3 and 21 DF,  p-value: 0.0005916
lm(es_iat_sex_age_order_implicit_resid ~ 
     mean_prop_distinct_occs + 
   #  wiki_occu_semantics_fm +  
     median_country_age, 
   data = all_es_tidy2) %>%
  summary()
## 
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs + 
##     median_country_age, data = all_es_tidy2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.16469 -0.49085 -0.06985  0.55277  1.18407 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             -0.01612    0.14846  -0.109  0.91450   
## mean_prop_distinct_occs  0.40525    0.16125   2.513  0.01979 * 
## median_country_age       0.47998    0.16319   2.941  0.00755 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7412 on 22 degrees of freedom
## Multiple R-squared:  0.5101, Adjusted R-squared:  0.4656 
## F-statistic: 11.45 on 2 and 22 DF,  p-value: 0.0003899
lm(es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs + 
     lang_es_wiki +  
     median_country_age,
   data = all_es_tidy2) %>%
  summary()
## 
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs + 
##     lang_es_wiki + median_country_age, data = all_es_tidy2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13417 -0.47215  0.07784  0.39840  1.09875 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             -0.05389    0.13920  -0.387   0.7025  
## mean_prop_distinct_occs  0.38358    0.15029   2.552   0.0186 *
## lang_es_wiki             0.36448    0.17286   2.109   0.0472 *
## median_country_age       0.42476    0.15398   2.759   0.0118 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6892 on 21 degrees of freedom
## Multiple R-squared:  0.5957, Adjusted R-squared:  0.538 
## F-statistic: 10.31 on 3 and 21 DF,  p-value: 0.0002229
# subt
lm(es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs + 
     subt_occu_semantics_fm +  
     median_country_age, 
   data = all_es_tidy2) %>%
  summary()
## 
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs + 
##     subt_occu_semantics_fm + median_country_age, data = all_es_tidy2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0514 -0.4102  0.1029  0.3519  1.1883 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)              0.06898    0.16851   0.409    0.688
## mean_prop_distinct_occs  0.18417    0.24904   0.740    0.470
## subt_occu_semantics_fm   0.37338    0.25830   1.446    0.168
## median_country_age       0.25227    0.19444   1.297    0.213
## 
## Residual standard error: 0.7311 on 16 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.4865, Adjusted R-squared:  0.3903 
## F-statistic: 5.054 on 3 and 16 DF,  p-value: 0.01188
lm(es_iat_sex_age_order_implicit_resid~ mean_prop_distinct_occs + 
     lang_es_sub +  
     median_country_age, data = all_es_tidy2) %>%
  summary()
## 
## Call:
## lm(formula = es_iat_sex_age_order_implicit_resid ~ mean_prop_distinct_occs + 
##     lang_es_sub + median_country_age, data = all_es_tidy2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2603 -0.3930  0.0297  0.4701  1.1567 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)              0.06848    0.15347   0.446   0.6608  
## mean_prop_distinct_occs  0.33204    0.16097   2.063   0.0539 .
## lang_es_sub              0.27296    0.16236   1.681   0.1100  
## median_country_age       0.30536    0.17192   1.776   0.0926 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7008 on 18 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.4948, Adjusted R-squared:  0.4106 
## F-statistic: 5.877 on 3 and 18 DF,  p-value: 0.005571