Here we look at whether IAT scores correlate with language bias for a range of difference languages. A number of different IAT measures are examined. Note this is at the language level (averaging across countries that speak the same language for the IAT data).
Read in behavior and language biases
countries_langs <- read_csv("../../data/other/countries_lang.csv") %>%
mutate(language_name = ifelse(language_name == "Spanish; Castilian", "Spanish", language_name),
language_name = ifelse(language_name == "Dutch; Flemish", "Dutch", language_name))
IAT_behavior_measures <- read_csv("../behavior_IAT/IAT_behavior_measures.csv") %>%
left_join(countries_langs %>% select(country_name, language_name, language_code)) %>%
filter(type %in% c("country_means_D_score",
"country_gender_D_score",
"country_gender_D_score_diff",
"country_RT_mean",
"country_RT_ratio")) %>%
group_by(type, language_code, sex) %>%
summarize(mean = mean(mean))
lang_bias <- read_csv("career_effect_sizes.csv")
lang_bias_google <- read_csv("google_translate/career_effect_sizes_google.csv",
col_names = c("language_code", "test", "test_name", "lang_bias")) %>%
filter(language_code != "he", language_code != "zu", language_code != "th") #,language_code != "ha",language_code != "id",language_code != "zh")
d <- left_join(IAT_behavior_measures, lang_bias) %>%
mutate_if(is.character, as.factor) %>%
filter(!is.na(lang_bias))
There are 22 languages represented here.
ggplot(d, aes(x = mean)) +
geom_histogram() +
facet_wrap(~type, scales = "free_x")+
theme_bw()
ggplot(lang_bias, aes(x = lang_bias)) +
geom_histogram() +
theme_bw()
ggplot(lang_bias_google, aes(x = lang_bias)) +
geom_histogram() +
theme_bw()
How correlated are the two language measures?
left_join(lang_bias_google, lang_bias, by = "language_code") %>%
do(tidy(cor.test(.$lang_bias.x, .$lang_bias.y))) %>%
kable()
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.7007927 | 4.167868 | 0.0005781 | 18 | 0.3743697 | 0.8726815 | Pearson’s product-moment correlation | two.sided |
d %>%
mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias, group = sex, color = sex, shape = type2)) +
geom_point() +
geom_smooth(method = "lm") +
facet_grid(.~type2 + type, scales = "free_x", drop = T) +
theme_bw()
Two effects: (1) D-score correlated with language bias (driven by males) (2) Explicit family-male associaiton correlated with language bias
d %>%
group_by(type, sex) %>%
do(tidy(cor.test(.$mean, .$lang_bias))) %>%
arrange(p.value) %>%
select(-parameter, -method, -alternative) %>%
kable()
| type | sex | estimate | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| country_gender_D_score | m | 0.4477692 | 2.2395419 | 0.0366428 | 0.0322487 | 0.7313172 |
| country_means_D_score | NA | 0.3787269 | 1.8300407 | 0.0821900 | -0.0510297 | 0.6901379 |
| country_gender_D_score_diff | NA | -0.3476880 | -1.6583734 | 0.1128442 | -0.6709440 | 0.0866176 |
| country_RT_mean | m | -0.2243384 | -1.0295130 | 0.3155291 | -0.5901306 | 0.2178775 |
| country_gender_D_score | f | 0.1534496 | 0.6944725 | 0.4953750 | -0.2867076 | 0.5401150 |
| country_RT_mean | f | -0.0955694 | -0.4293647 | 0.6722478 | -0.4971464 | 0.3397276 |
| country_RT_ratio | NA | -0.0351527 | -0.1573048 | 0.8765819 | -0.4500905 | 0.3922695 |
d <- left_join(IAT_behavior_measures, lang_bias_google) %>%
mutate_if(is.character, as.factor) %>%
filter(!is.na(lang_bias))
There are 24 languages represented here.
d %>%
mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias,
group = sex, color = sex, shape = type2)) +
geom_point() +
geom_smooth(method = "lm") +
facet_grid(.~type2 + type, scales = "free_x", drop = T) +
theme_bw()
d %>%
group_by(type, sex) %>%
do(tidy(cor.test(.$mean, .$lang_bias))) %>%
arrange(p.value) %>%
select(-parameter, -method, -alternative) %>%
kable()
| type | sex | estimate | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| country_gender_D_score | m | 0.4009012 | 2.0525596 | 0.0521991 | -0.0029769 | 0.6923324 |
| country_means_D_score | NA | 0.3602098 | 1.8111114 | 0.0838011 | -0.0505291 | 0.6667262 |
| country_gender_D_score_diff | NA | -0.3143286 | -1.5530493 | 0.1346797 | -0.6369594 | 0.1020019 |
| country_RT_ratio | NA | 0.2759421 | 1.3465644 | 0.1918307 | -0.1434192 | 0.6112932 |
| country_gender_D_score | f | 0.1756703 | 0.8369825 | 0.4116025 | -0.2450950 | 0.5407471 |
| country_RT_mean | m | -0.0159656 | -0.0748948 | 0.9409753 | -0.4166787 | 0.3899425 |
| country_RT_mean | f | 0.0116495 | 0.0546448 | 0.9569146 | -0.3935968 | 0.4131049 |