Here we look at whether IAT scores correlate with language bias for a range of difference languages. A number of different IAT measures are examined. This is at the country level
Read in behavior and language biases
countries_langs <- read_csv("../../data/other/countries_lang.csv") %>%
mutate(language_name = ifelse(language_name == "Spanish; Castilian", "Spanish", language_name),
language_name = ifelse(language_name == "Dutch; Flemish", "Dutch", language_name))
IAT_behavior_measures <- read_csv("../behavior_IAT/IAT_behavior_measures.csv") %>%
left_join(countries_langs %>% select(country_name, language_name, language_code)) %>%
filter(type %in% c("country_means_D_score",
"country_gender_D_score",
"country_gender_D_score_diff",
"country_RT_mean",
"country_RT_ratio"))
lang_bias <- read_csv("career_effect_sizes.csv")
lang_bias_google <- read_csv("google_translate/career_effect_sizes_google.csv",
col_names = c("language_code", "test", "test_name", "lang_bias")) %>%
filter(language_code != "he", language_code != "zu", language_code != "th") #,language_code != "ha",language_code != "id",language_code != "zh")
d <- left_join(IAT_behavior_measures, lang_bias) %>%
mutate_if(is.character, as.factor) %>%
filter(!is.na(lang_bias))
There are 22 languages represented here.
ggplot(d, aes(x = mean)) +
geom_histogram() +
facet_wrap(~type, scales = "free_x")+
theme_bw()
ggplot(lang_bias, aes(x = lang_bias)) +
geom_histogram() +
theme_bw()
ggplot(lang_bias_google, aes(x = lang_bias)) +
geom_histogram() +
theme_bw()
How correlated are the two language measures?
left_join(lang_bias_google, lang_bias, by = "language_code") %>%
do(tidy(cor.test(.$lang_bias.x, .$lang_bias.y))) %>%
kable()
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.7007927 | 4.167868 | 0.0005781 | 18 | 0.3743697 | 0.8726815 | Pearson’s product-moment correlation | two.sided |
d %>%
mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias, group = sex, color = sex, shape = type2)) +
geom_point() +
geom_smooth(method = "lm") +
facet_grid(.~type2 + type, scales = "free_x", drop = T) +
theme_bw()
Two effects: (1) D-score correlated with language bias (driven by males) (2) Explicit family-male associaiton correlated with language bias
d %>%
group_by(type, sex) %>%
do(tidy(cor.test(.$mean, .$lang_bias))) %>%
arrange(p.value) %>%
select(-parameter, -method, -alternative) %>%
kable()
| type | sex | estimate | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| country_gender_D_score | m | 0.3435225 | 2.1639976 | 0.0373728 | 0.0219463 | 0.6006810 |
| country_means_D_score | NA | 0.2873735 | 1.7749964 | 0.0845964 | -0.0404081 | 0.5593124 |
| country_gender_D_score_diff | NA | -0.2438632 | -1.4876262 | 0.1458043 | -0.5262962 | 0.0870328 |
| country_gender_D_score | f | 0.1533071 | 0.9178272 | 0.3649912 | -0.1796351 | 0.4547373 |
| country_RT_mean | m | -0.1024097 | -0.6090661 | 0.5464140 | -0.4127331 | 0.2292153 |
| country_RT_mean | f | -0.0613774 | -0.3637995 | 0.7181962 | -0.3778813 | 0.2679709 |
| country_RT_ratio | NA | -0.0359346 | -0.2127295 | 0.8327721 | -0.3558107 | 0.2914782 |
d <- left_join(IAT_behavior_measures, lang_bias_google) %>%
mutate_if(is.character, as.factor) %>%
filter(!is.na(lang_bias))
There are 24 languages represented here.
d %>%
mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias,
group = sex, color = sex, shape = type2)) +
geom_point() +
geom_smooth(method = "lm") +
facet_grid(.~type2 + type, scales = "free_x", drop = T) +
theme_bw()
d %>%
group_by(type, sex) %>%
do(tidy(cor.test(.$mean, .$lang_bias))) %>%
arrange(p.value) %>%
select(-parameter, -method, -alternative) %>%
kable()
| type | sex | estimate | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| country_RT_ratio | NA | 0.3745995 | 2.2494730 | 0.0317270 | 0.0359084 | 0.6361034 |
| country_means_D_score | NA | 0.3057319 | 1.7878496 | 0.0835802 | -0.0419837 | 0.5873883 |
| country_gender_D_score | f | 0.2778738 | 1.6105637 | 0.1174119 | -0.0723358 | 0.5670848 |
| country_gender_D_score | m | 0.2150913 | 1.2262801 | 0.2293269 | -0.1384405 | 0.5200019 |
| country_RT_mean | f | 0.1430089 | 0.8045093 | 0.4272300 | -0.2106418 | 0.4635589 |
| country_RT_mean | m | 0.1375692 | 0.7733055 | 0.4451986 | -0.2159383 | 0.4591912 |
| country_gender_D_score_diff | NA | -0.0576810 | -0.3216897 | 0.7498469 | -0.3932036 | 0.2913984 |