Here we look at whether IAT scores correlate with language bias for a range of difference languages. A number of different IAT measures are examined. This is at the country level

Read in behavior and language biases

countries_langs <- read_csv("../../data/other/countries_lang.csv") %>%
  mutate(language_name = ifelse(language_name == "Spanish; Castilian", "Spanish", language_name),
         language_name = ifelse(language_name == "Dutch; Flemish", "Dutch", language_name))

IAT_behavior_measures <- read_csv("../behavior_IAT/IAT_behavior_measures.csv") %>%
  left_join(countries_langs %>% select(country_name, language_name, language_code)) %>%
  filter(type %in% c("country_means_D_score", 
             "country_gender_D_score", 
             "country_gender_D_score_diff", 
             "country_RT_mean",
             "country_RT_ratio")) 

lang_bias <- read_csv("career_effect_sizes.csv")

lang_bias_google <- read_csv("google_translate/career_effect_sizes_google.csv",
                             col_names = c("language_code",   "test",  "test_name", "lang_bias")) %>%
  filter(language_code != "he", language_code != "zu", language_code != "th") #,language_code !=  "ha",language_code !=  "id",language_code !=  "zh")

d <- left_join(IAT_behavior_measures, lang_bias) %>%
  mutate_if(is.character, as.factor) %>%
  filter(!is.na(lang_bias))

There are 22 languages represented here.

Measures

Behavior

ggplot(d, aes(x = mean)) +
  geom_histogram() +
  facet_wrap(~type, scales = "free_x")+
  theme_bw()

Language - Hand translations

ggplot(lang_bias, aes(x = lang_bias)) +
  geom_histogram() +
  theme_bw()

Language - google translations

ggplot(lang_bias_google, aes(x = lang_bias)) +
  geom_histogram() +
  theme_bw()

How correlated are the two language measures?

left_join(lang_bias_google, lang_bias, by = "language_code") %>%
  do(tidy(cor.test(.$lang_bias.x, .$lang_bias.y))) %>%
  kable()
estimate statistic p.value parameter conf.low conf.high method alternative
0.7007927 4.167868 0.0005781 18 0.3743697 0.8726815 Pearson’s product-moment correlation two.sided

Correlation between measures - Hand-translations

d %>%
  mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias, group = sex, color = sex, shape = type2)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_grid(.~type2 + type, scales = "free_x", drop = T) +
  theme_bw()

Two effects: (1) D-score correlated with language bias (driven by males) (2) Explicit family-male associaiton correlated with language bias

d %>%
  group_by(type, sex) %>%
  do(tidy(cor.test(.$mean, .$lang_bias))) %>%
  arrange(p.value) %>%
  select(-parameter, -method, -alternative) %>%
  kable()
type sex estimate statistic p.value conf.low conf.high
country_gender_D_score m 0.3435225 2.1639976 0.0373728 0.0219463 0.6006810
country_means_D_score NA 0.2873735 1.7749964 0.0845964 -0.0404081 0.5593124
country_gender_D_score_diff NA -0.2438632 -1.4876262 0.1458043 -0.5262962 0.0870328
country_gender_D_score f 0.1533071 0.9178272 0.3649912 -0.1796351 0.4547373
country_RT_mean m -0.1024097 -0.6090661 0.5464140 -0.4127331 0.2292153
country_RT_mean f -0.0613774 -0.3637995 0.7181962 -0.3778813 0.2679709
country_RT_ratio NA -0.0359346 -0.2127295 0.8327721 -0.3558107 0.2914782

Correlation between measures - Google-translations

d <- left_join(IAT_behavior_measures, lang_bias_google) %>%
  mutate_if(is.character, as.factor) %>%
  filter(!is.na(lang_bias))

There are 24 languages represented here.

d %>%
  mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias, 
            group = sex, color = sex, shape = type2)) + 
  geom_point() +
  geom_smooth(method = "lm") +
  facet_grid(.~type2 + type, scales = "free_x", drop = T) +
  theme_bw()

d %>%
  group_by(type, sex) %>%
  do(tidy(cor.test(.$mean, .$lang_bias))) %>%
  arrange(p.value) %>%
  select(-parameter, -method, -alternative) %>%
  kable()
type sex estimate statistic p.value conf.low conf.high
country_RT_ratio NA 0.3745995 2.2494730 0.0317270 0.0359084 0.6361034
country_means_D_score NA 0.3057319 1.7878496 0.0835802 -0.0419837 0.5873883
country_gender_D_score f 0.2778738 1.6105637 0.1174119 -0.0723358 0.5670848
country_gender_D_score m 0.2150913 1.2262801 0.2293269 -0.1384405 0.5200019
country_RT_mean f 0.1430089 0.8045093 0.4272300 -0.2106418 0.4635589
country_RT_mean m 0.1375692 0.7733055 0.4451986 -0.2159383 0.4591912
country_gender_D_score_diff NA -0.0576810 -0.3216897 0.7498469 -0.3932036 0.2913984