Here we look at whether IAT scores correlate with language bias for a range of difference languages. A number of different IAT measures are examined. Note this is at the language level (averaging across countries that speak the same language for the IAT data).

Read in behavior and language biases

countries_langs <- read_csv("../../data/other/countries_lang.csv") %>%
  mutate(language_name = ifelse(language_name == "Spanish; Castilian", "Spanish", language_name),
         language_name = ifelse(language_name == "Dutch; Flemish", "Dutch", language_name))

IAT_behavior_measures <- read_csv("../behavior_IAT/IAT_behavior_measures.csv") %>%
  left_join(countries_langs %>% select(country_name, language_name, language_code)) %>%
  filter(type %in% c("country_means_D_score", 
             "country_gender_D_score", 
             "country_gender_D_score_diff", 
             "country_RT_mean",
             "country_RT_ratio")) %>%
  group_by(type, language_code, sex) %>%
  summarize(mean = mean(mean)) 

lang_bias <- read_csv("career_effect_sizes.csv") 

lang_bias_google <- read_csv("google_translate/career_effect_sizes_google.csv",
                             col_names = c("language_code",   "test",  "test_name", "lang_bias")) %>%
  filter(language_code != "he", language_code != "zu", language_code != "th") #,language_code !=  "ha",language_code !=  "id",language_code !=  "zh")


d <- left_join(IAT_behavior_measures, lang_bias) %>%
  mutate_if(is.character, as.factor) %>%
  filter(!is.na(lang_bias))

There are 22 languages represented here.

Measures

Behavior

ggplot(d, aes(x = mean)) +
  geom_histogram() +
  facet_wrap(~type, scales = "free_x")+
  theme_bw()

Language - Hand translations

ggplot(lang_bias, aes(x = lang_bias)) +
  geom_histogram() +
  theme_bw()

Language - google translations

ggplot(lang_bias_google, aes(x = lang_bias)) +
  geom_histogram() +
  theme_bw()

How correlated are the two language measures?

left_join(lang_bias_google, lang_bias, by = "language_code") %>%
  do(tidy(cor.test(.$lang_bias.x, .$lang_bias.y))) %>%
  kable()
estimate statistic p.value parameter conf.low conf.high method alternative
0.7007927 4.167868 0.0005781 18 0.3743697 0.8726815 Pearson’s product-moment correlation two.sided

Correlation between measures - Hand-translations

d %>%
  mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias, group = sex, color = sex, shape = type2)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_grid(.~type2 + type, scales = "free_x", drop = T) +
  theme_bw()

Two effects: (1) D-score correlated with language bias (driven by males) (2) Explicit family-male associaiton correlated with language bias

d %>%
  group_by(type, sex) %>%
  do(tidy(cor.test(.$mean, .$lang_bias))) %>%
  arrange(p.value) %>%
  select(-parameter, -method, -alternative) %>%
  kable()
type sex estimate statistic p.value conf.low conf.high
country_gender_D_score m 0.4477692 2.2395419 0.0366428 0.0322487 0.7313172
country_means_D_score NA 0.3787269 1.8300407 0.0821900 -0.0510297 0.6901379
country_gender_D_score_diff NA -0.3476880 -1.6583734 0.1128442 -0.6709440 0.0866176
country_RT_mean m -0.2243384 -1.0295130 0.3155291 -0.5901306 0.2178775
country_gender_D_score f 0.1534496 0.6944725 0.4953750 -0.2867076 0.5401150
country_RT_mean f -0.0955694 -0.4293647 0.6722478 -0.4971464 0.3397276
country_RT_ratio NA -0.0351527 -0.1573048 0.8765819 -0.4500905 0.3922695

Correlation between measures - Google-translations

d <- left_join(IAT_behavior_measures, lang_bias_google) %>%
  mutate_if(is.character, as.factor) %>%
  filter(!is.na(lang_bias))

There are 24 languages represented here.

d %>%
  mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias, 
            group = sex, color = sex, shape = type2)) + 
  geom_point() +
  geom_smooth(method = "lm") +
  facet_grid(.~type2 + type, scales = "free_x", drop = T) +
  theme_bw()

d %>%
  group_by(type, sex) %>%
  do(tidy(cor.test(.$mean, .$lang_bias))) %>%
  arrange(p.value) %>%
  select(-parameter, -method, -alternative) %>%
  kable()
type sex estimate statistic p.value conf.low conf.high
country_gender_D_score m 0.4009012 2.0525596 0.0521991 -0.0029769 0.6923324
country_means_D_score NA 0.3602098 1.8111114 0.0838011 -0.0505291 0.6667262
country_gender_D_score_diff NA -0.3143286 -1.5530493 0.1346797 -0.6369594 0.1020019
country_RT_ratio NA 0.2759421 1.3465644 0.1918307 -0.1434192 0.6112932
country_gender_D_score f 0.1756703 0.8369825 0.4116025 -0.2450950 0.5407471
country_RT_mean m -0.0159656 -0.0748948 0.9409753 -0.4166787 0.3899425
country_RT_mean f 0.0116495 0.0546448 0.9569146 -0.3935968 0.4131049