LANGUAGE_DF_PATH <- "by_language_df.csv"
LANGUAGE_DF_PATH_NEW <- "/Users/mollylewis/Documents/research/Projects/1_in_progress/IATLANG/analyses/2_pretrained_w2v_models/embedding_IAT_v2/es_career_separate.csv"
language_df_new <- read_csv(LANGUAGE_DF_PATH_NEW) %>%
rename(career_hand_new = sYXab,
wiki_language_code = language_code) %>%
select(wiki_language_code, career_hand_new)
language_df_with_ggi <- read_csv(LANGUAGE_DF_PATH) %>%
select(-career_hand) %>%
left_join(language_df_new)
language_df_corr <- language_df_with_ggi %>%
select(-country_name) %>%
select_if(is.numeric) %>%
select(-contains("weighted"), -contains("stoet"))
corr_mat <- cor(language_df_corr,
use = "pairwise.complete.obs")
p.mat <- corrplot::cor.mtest(language_df_corr,
conf.level = (1-ALPHA),
use = "pairwise.complete.obs")$p
corrplot::corrplot(corr_mat, method = "color", col = cols,
order = "original", number.cex = .7,
addCoef.col = "black",
p.mat = p.mat, sig.level = ALPHA, insig = "blank",
tl.col = "black", tl.srt = 90,
diag = FALSE)
behvioral iat is correlated with language measure, but goes away when you control for mean age
df <- language_df_with_ggi %>%
select(language_name, wiki_language_code, es_behavioral_iat_resid_simple,
career_hand_new, wps, median_country_age) %>%
filter(!is.na(career_hand_new))
lm(es_behavioral_iat_resid_simple ~ career_hand_new,
data = df) %>%
summary()
##
## Call:
## lm(formula = es_behavioral_iat_resid_simple ~ career_hand_new,
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.033500 -0.020209 0.001705 0.013565 0.032828
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.016785 0.006662 -2.52 0.0209 *
## career_hand_new 0.036085 0.013463 2.68 0.0148 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02073 on 19 degrees of freedom
## Multiple R-squared: 0.2744, Adjusted R-squared: 0.2362
## F-statistic: 7.185 on 1 and 19 DF, p-value: 0.0148
lm(es_behavioral_iat_resid_simple ~ career_hand_new + median_country_age,
data = df) %>%
summary()
##
## Call:
## lm(formula = es_behavioral_iat_resid_simple ~ career_hand_new +
## median_country_age, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.037169 -0.007753 -0.001271 0.008321 0.037241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.080890 0.020129 -4.019 0.000806 ***
## career_hand_new 0.029715 0.011079 2.682 0.015219 *
## median_country_age 0.001772 0.000536 3.306 0.003930 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0168 on 18 degrees of freedom
## Multiple R-squared: 0.5485, Adjusted R-squared: 0.4983
## F-statistic: 10.93 on 2 and 18 DF, p-value: 0.0007795
lm(es_behavioral_iat_resid_simple ~ career_hand_new + ggi,
data = language_df_with_ggi) %>%
summary()
##
## Call:
## lm(formula = es_behavioral_iat_resid_simple ~ career_hand_new +
## ggi, data = language_df_with_ggi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03847 -0.01477 0.00220 0.01249 0.03381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04918 0.06489 -0.758 0.4588
## career_hand_new 0.03417 0.01483 2.304 0.0341 *
## ggi 0.04779 0.09319 0.513 0.6147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02116 on 17 degrees of freedom
## (14 observations deleted due to missingness)
## Multiple R-squared: 0.3053, Adjusted R-squared: 0.2235
## F-statistic: 3.735 on 2 and 17 DF, p-value: 0.04523
lm(es_behavioral_iat_resid_simple ~ career_hand_new + median_country_age + ggi ,
data = language_df_with_ggi) %>%
summary()
##
## Call:
## lm(formula = es_behavioral_iat_resid_simple ~ career_hand_new +
## median_country_age + ggi, data = language_df_with_ggi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.033276 -0.008056 -0.001151 0.003874 0.043405
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.072175 0.052877 -1.365 0.19115
## career_hand_new 0.029098 0.012076 2.410 0.02837 *
## median_country_age 0.002379 0.000749 3.176 0.00586 **
## ggi -0.045528 0.080758 -0.564 0.58074
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01708 on 16 degrees of freedom
## (14 observations deleted due to missingness)
## Multiple R-squared: 0.5739, Adjusted R-squared: 0.494
## F-statistic: 7.184 on 3 and 16 DF, p-value: 0.002857
lm(es_behavioral_iat_resid_simple ~ career_hand_new + median_country_age + ggi + es_behavioral_explicit_resid,
data = language_df_with_ggi) %>%
summary()
##
## Call:
## lm(formula = es_behavioral_iat_resid_simple ~ career_hand_new +
## median_country_age + ggi + es_behavioral_explicit_resid,
## data = language_df_with_ggi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.022852 -0.006838 -0.002470 0.009745 0.038340
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1015404 0.0530657 -1.913 0.07497 .
## career_hand_new 0.0236046 0.0119007 1.983 0.06593 .
## median_country_age 0.0023306 0.0007103 3.281 0.00505 **
## ggi -0.0001391 0.0811528 -0.002 0.99866
## es_behavioral_explicit_resid 0.0655850 0.0390487 1.680 0.11374
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01619 on 15 degrees of freedom
## (14 observations deleted due to missingness)
## Multiple R-squared: 0.6414, Adjusted R-squared: 0.5457
## F-statistic: 6.706 on 4 and 15 DF, p-value: 0.002655
ggplot(language_df_with_ggi, aes(x = career_hand_new, y = es_behavioral_iat_resid_simple)) +
geom_point() +
geom_smooth(method = "lm") +
geom_text(aes(label = wiki_language_code, x = career_hand_new + .1 )) +
theme_classic()
lm(es_behavioral_explicit_resid_simple ~ career_hand_new + median_country_age + wps + es_behavioral_iat_resid_simple,
data = language_df_with_ggi) %>%
summary()
##
## Call:
## lm(formula = es_behavioral_explicit_resid_simple ~ career_hand_new +
## median_country_age + wps + es_behavioral_iat_resid_simple,
## data = language_df_with_ggi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2084 -0.1215 -0.0119 0.1123 0.3394
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.30480 0.30803 0.989 0.337
## career_hand_new -0.03346 0.14497 -0.231 0.820
## median_country_age -0.01338 0.01437 -0.931 0.366
## wps 0.33682 0.82903 0.406 0.690
## es_behavioral_iat_resid_simple 3.98263 2.60224 1.530 0.145
##
## Residual standard error: 0.1707 on 16 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1647, Adjusted R-squared: -0.04417
## F-statistic: 0.7885 on 4 and 16 DF, p-value: 0.5494
lm(es_behavioral_explicit_resid_simple ~ es_behavioral_iat_resid_simple ,
data = language_df_with_ggi) %>%
summary()
##
## Call:
## lm(formula = es_behavioral_explicit_resid_simple ~ es_behavioral_iat_resid_simple,
## data = language_df_with_ggi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36144 -0.12194 0.01977 0.10849 0.40263
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01282 0.03184 0.403 0.690
## es_behavioral_iat_resid_simple 1.76773 1.09420 1.616 0.117
##
## Residual standard error: 0.1705 on 29 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.08257, Adjusted R-squared: 0.05093
## F-statistic: 2.61 on 1 and 29 DF, p-value: 0.117
lm(career_hand_new ~ per_women_stem ,
data = language_df_with_ggi) %>%
summary()
m = select(language_df_with_ggi, es_behavioral_iat_resid_simple, career_hand_new, per_women_stem )
m1 <- lm(per_women_stem ~ career_hand_new, language_df_with_ggi)
m2 <- lm(es_behavioral_iat_resid_simple ~ career_hand_new, language_df_with_ggi)
m3 <- lm(per_women_stem ~ career_hand_new + es_behavioral_iat_resid_simple + median_country_age, language_df_with_ggi)
m1 <- lm(career_hand_new ~ per_women_stem, language_df_with_ggi)
m2 <- lm(ggi_stoet ~ per_women_stem, language_df_with_ggi)
m3 <- lm(ggi_stoet ~ per_women_stem + career_hand_new , language_df_with_ggi)