LANGUAGE_DF_PATH <- "by_language_df.csv"
LANGUAGE_DF_PATH_NEW <-  "/Users/mollylewis/Documents/research/Projects/1_in_progress/IATLANG/analyses/2_pretrained_w2v_models/embedding_IAT_v2/es_career_separate.csv"

Language Level

Raw correlations

language_df_new <- read_csv(LANGUAGE_DF_PATH_NEW) %>%
  rename(career_hand_new = sYXab,
         wiki_language_code = language_code) %>%
  select(wiki_language_code, career_hand_new)

language_df_with_ggi <- read_csv(LANGUAGE_DF_PATH) %>%
  select(-career_hand) %>%
  left_join(language_df_new)

language_df_corr <- language_df_with_ggi %>%
  select(-country_name) %>%
  select_if(is.numeric) %>%
  select(-contains("weighted"), -contains("stoet"))  


corr_mat <- cor(language_df_corr, 
                use = "pairwise.complete.obs")

p.mat <- corrplot::cor.mtest(language_df_corr, 
                  conf.level = (1-ALPHA),  
                  use = "pairwise.complete.obs")$p

corrplot::corrplot(corr_mat, method = "color",  col = cols,
         order = "original", number.cex = .7,
         addCoef.col = "black", 
         p.mat = p.mat, sig.level = ALPHA, insig = "blank", 
         tl.col = "black", tl.srt = 90,
         diag = FALSE)

Regressions predicting behavioral data

Career language

behvioral iat is correlated with language measure, but goes away when you control for mean age

predicting implicit

df <- language_df_with_ggi %>%
  select(language_name, wiki_language_code, es_behavioral_iat_resid_simple,
         career_hand_new, wps, median_country_age) %>%
  filter(!is.na(career_hand_new))

lm(es_behavioral_iat_resid_simple  ~  career_hand_new,
data = df) %>%
  summary()
## 
## Call:
## lm(formula = es_behavioral_iat_resid_simple ~ career_hand_new, 
##     data = df)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.033500 -0.020209  0.001705  0.013565  0.032828 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     -0.016785   0.006662   -2.52   0.0209 *
## career_hand_new  0.036085   0.013463    2.68   0.0148 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02073 on 19 degrees of freedom
## Multiple R-squared:  0.2744, Adjusted R-squared:  0.2362 
## F-statistic: 7.185 on 1 and 19 DF,  p-value: 0.0148
lm(es_behavioral_iat_resid_simple  ~  career_hand_new  + median_country_age,
data = df) %>%
  summary()
## 
## Call:
## lm(formula = es_behavioral_iat_resid_simple ~ career_hand_new + 
##     median_country_age, data = df)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.037169 -0.007753 -0.001271  0.008321  0.037241 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.080890   0.020129  -4.019 0.000806 ***
## career_hand_new     0.029715   0.011079   2.682 0.015219 *  
## median_country_age  0.001772   0.000536   3.306 0.003930 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0168 on 18 degrees of freedom
## Multiple R-squared:  0.5485, Adjusted R-squared:  0.4983 
## F-statistic: 10.93 on 2 and 18 DF,  p-value: 0.0007795
lm(es_behavioral_iat_resid_simple  ~  career_hand_new  + ggi,
data = language_df_with_ggi) %>%
  summary()
## 
## Call:
## lm(formula = es_behavioral_iat_resid_simple ~ career_hand_new + 
##     ggi, data = language_df_with_ggi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03847 -0.01477  0.00220  0.01249  0.03381 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     -0.04918    0.06489  -0.758   0.4588  
## career_hand_new  0.03417    0.01483   2.304   0.0341 *
## ggi              0.04779    0.09319   0.513   0.6147  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02116 on 17 degrees of freedom
##   (14 observations deleted due to missingness)
## Multiple R-squared:  0.3053, Adjusted R-squared:  0.2235 
## F-statistic: 3.735 on 2 and 17 DF,  p-value: 0.04523
lm(es_behavioral_iat_resid_simple  ~  career_hand_new  + median_country_age +  ggi  ,
data = language_df_with_ggi) %>%
  summary()
## 
## Call:
## lm(formula = es_behavioral_iat_resid_simple ~ career_hand_new + 
##     median_country_age + ggi, data = language_df_with_ggi)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.033276 -0.008056 -0.001151  0.003874  0.043405 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)        -0.072175   0.052877  -1.365  0.19115   
## career_hand_new     0.029098   0.012076   2.410  0.02837 * 
## median_country_age  0.002379   0.000749   3.176  0.00586 **
## ggi                -0.045528   0.080758  -0.564  0.58074   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01708 on 16 degrees of freedom
##   (14 observations deleted due to missingness)
## Multiple R-squared:  0.5739, Adjusted R-squared:  0.494 
## F-statistic: 7.184 on 3 and 16 DF,  p-value: 0.002857
lm(es_behavioral_iat_resid_simple ~  career_hand_new  + median_country_age +  ggi + es_behavioral_explicit_resid,
data = language_df_with_ggi) %>%
  summary()
## 
## Call:
## lm(formula = es_behavioral_iat_resid_simple ~ career_hand_new + 
##     median_country_age + ggi + es_behavioral_explicit_resid, 
##     data = language_df_with_ggi)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.022852 -0.006838 -0.002470  0.009745  0.038340 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  -0.1015404  0.0530657  -1.913  0.07497 . 
## career_hand_new               0.0236046  0.0119007   1.983  0.06593 . 
## median_country_age            0.0023306  0.0007103   3.281  0.00505 **
## ggi                          -0.0001391  0.0811528  -0.002  0.99866   
## es_behavioral_explicit_resid  0.0655850  0.0390487   1.680  0.11374   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01619 on 15 degrees of freedom
##   (14 observations deleted due to missingness)
## Multiple R-squared:  0.6414, Adjusted R-squared:  0.5457 
## F-statistic: 6.706 on 4 and 15 DF,  p-value: 0.002655
ggplot(language_df_with_ggi, aes(x = career_hand_new, y = es_behavioral_iat_resid_simple)) +
  geom_point() +
  geom_smooth(method = "lm") + 
  geom_text(aes(label = wiki_language_code, x = career_hand_new + .1 )) +
  theme_classic()

predicting explicit

lm(es_behavioral_explicit_resid_simple ~  career_hand_new  + median_country_age +  wps + es_behavioral_iat_resid_simple,
data = language_df_with_ggi) %>%
  summary()
## 
## Call:
## lm(formula = es_behavioral_explicit_resid_simple ~ career_hand_new + 
##     median_country_age + wps + es_behavioral_iat_resid_simple, 
##     data = language_df_with_ggi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2084 -0.1215 -0.0119  0.1123  0.3394 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     0.30480    0.30803   0.989    0.337
## career_hand_new                -0.03346    0.14497  -0.231    0.820
## median_country_age             -0.01338    0.01437  -0.931    0.366
## wps                             0.33682    0.82903   0.406    0.690
## es_behavioral_iat_resid_simple  3.98263    2.60224   1.530    0.145
## 
## Residual standard error: 0.1707 on 16 degrees of freedom
##   (13 observations deleted due to missingness)
## Multiple R-squared:  0.1647, Adjusted R-squared:  -0.04417 
## F-statistic: 0.7885 on 4 and 16 DF,  p-value: 0.5494
lm(es_behavioral_explicit_resid_simple ~  es_behavioral_iat_resid_simple  ,
data = language_df_with_ggi) %>%
  summary()
## 
## Call:
## lm(formula = es_behavioral_explicit_resid_simple ~ es_behavioral_iat_resid_simple, 
##     data = language_df_with_ggi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36144 -0.12194  0.01977  0.10849  0.40263 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     0.01282    0.03184   0.403    0.690
## es_behavioral_iat_resid_simple  1.76773    1.09420   1.616    0.117
## 
## Residual standard error: 0.1705 on 29 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.08257,    Adjusted R-squared:  0.05093 
## F-statistic:  2.61 on 1 and 29 DF,  p-value: 0.117
lm(career_hand_new  ~  per_women_stem ,
data = language_df_with_ggi) %>%
  summary()


m = select(language_df_with_ggi, es_behavioral_iat_resid_simple, career_hand_new, per_women_stem )

m1 <- lm(per_women_stem ~ career_hand_new, language_df_with_ggi)
m2 <- lm(es_behavioral_iat_resid_simple ~ career_hand_new, language_df_with_ggi)
m3 <- lm(per_women_stem ~ career_hand_new  + es_behavioral_iat_resid_simple + median_country_age, language_df_with_ggi)

m1 <- lm(career_hand_new ~ per_women_stem, language_df_with_ggi)
m2 <- lm(ggi_stoet ~ per_women_stem, language_df_with_ggi)
m3 <- lm(ggi_stoet ~ per_women_stem  + career_hand_new , language_df_with_ggi)