Comparing behavior vs. language

Measures
Correlation between measures - Hand-translations
Correlation between measures - Google-translations

Here we look at whether IAT scores correlate with language bias for a range of difference languages. A number of different IAT measures are examined. Note this is at the language level (averaging across countries that speak the same language for the IAT data).

Read in behavior and language biases

countries_langs <- read_csv("../../data/other/countries_lang.csv") %>%
  mutate(language_name = ifelse(language_name == "Spanish; Castilian", "Spanish", language_name),
         language_name = ifelse(language_name == "Dutch; Flemish", "Dutch", language_name))

IAT_behavior_measures <- read_csv("../behavior_IAT/IAT_behavior_measures.csv") %>%
  left_join(countries_langs %>% select(country_name, language_name, language_code)) %>%
  filter(type %in% c("country_means_D_score", 
             "country_gender_D_score", 
             "country_gender_D_score_diff", 
             "country_RT_mean",
             "country_RT_ratio")) %>%
  group_by(type, language_code, sex) %>%
  summarize(mean = mean(mean)) 

lang_bias <- read_csv("career_effect_sizes.csv") 

lang_bias_google <- read_csv("google_translate/career_effect_sizes_google.csv",
                             col_names = c("language_code",   "test",  "test_name", "lang_bias")) %>%
  filter(language_code != "he", language_code != "zu", language_code != "th") #,language_code !=  "ha",language_code !=  "id",language_code !=  "zh")


d <- left_join(IAT_behavior_measures, lang_bias) %>%
  mutate_if(is.character, as.factor) %>%
  filter(!is.na(lang_bias))

There are 22 languages represented here.

Measures

Behavior

ggplot(d, aes(x = mean)) +
  geom_histogram() +
  facet_wrap(~type, scales = "free_x")+
  theme_bw()

Language - Hand translations

ggplot(lang_bias, aes(x = lang_bias)) +
  geom_histogram() +
  theme_bw()

Language - google translations

ggplot(lang_bias_google, aes(x = lang_bias)) +
  geom_histogram() +
  theme_bw()

How correlated are the two language measures?

left_join(lang_bias_google, lang_bias, by = "language_code") %>%
  do(tidy(cor.test(.$lang_bias.x, .$lang_bias.y))) %>%
  kable()

estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
0.7007927	4.167868	0.0005781	18	0.3743697	0.8726815	Pearson’s product-moment correlation	two.sided

Correlation between measures - Hand-translations

d %>%
  mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias, group = sex, color = sex, shape = type2)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_grid(.~type2 + type, scales = "free_x", drop = T) +
  theme_bw()

Two effects: (1) D-score correlated with language bias (driven by males) (2) Explicit family-male associaiton correlated with language bias

d %>%
  group_by(type, sex) %>%
  do(tidy(cor.test(.$mean, .$lang_bias))) %>%
  arrange(p.value) %>%
  select(-parameter, -method, -alternative) %>%
  kable()

type	sex	estimate	statistic	p.value	conf.low	conf.high
country_gender_D_score	m	0.4477692	2.2395419	0.0366428	0.0322487	0.7313172
country_means_D_score	NA	0.3787269	1.8300407	0.0821900	-0.0510297	0.6901379
country_gender_D_score_diff	NA	-0.3476880	-1.6583734	0.1128442	-0.6709440	0.0866176
country_RT_mean	m	-0.2243384	-1.0295130	0.3155291	-0.5901306	0.2178775
country_gender_D_score	f	0.1534496	0.6944725	0.4953750	-0.2867076	0.5401150
country_RT_mean	f	-0.0955694	-0.4293647	0.6722478	-0.4971464	0.3397276
country_RT_ratio	NA	-0.0351527	-0.1573048	0.8765819	-0.4500905	0.3922695

Correlation between measures - Google-translations

d <- left_join(IAT_behavior_measures, lang_bias_google) %>%
  mutate_if(is.character, as.factor) %>%
  filter(!is.na(lang_bias))

There are 24 languages represented here.

d %>%
  mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias, 
            group = sex, color = sex, shape = type2)) + 
  geom_point() +
  geom_smooth(method = "lm") +
  facet_grid(.~type2 + type, scales = "free_x", drop = T) +
  theme_bw()

d %>%
  group_by(type, sex) %>%
  do(tidy(cor.test(.$mean, .$lang_bias))) %>%
  arrange(p.value) %>%
  select(-parameter, -method, -alternative) %>%
  kable()

type	sex	estimate	statistic	p.value	conf.low	conf.high
country_gender_D_score	m	0.4009012	2.0525596	0.0521991	-0.0029769	0.6923324
country_means_D_score	NA	0.3602098	1.8111114	0.0838011	-0.0505291	0.6667262
country_gender_D_score_diff	NA	-0.3143286	-1.5530493	0.1346797	-0.6369594	0.1020019
country_RT_ratio	NA	0.2759421	1.3465644	0.1918307	-0.1434192	0.6112932
country_gender_D_score	f	0.1756703	0.8369825	0.4116025	-0.2450950	0.5407471
country_RT_mean	m	-0.0159656	-0.0748948	0.9409753	-0.4166787	0.3899425
country_RT_mean	f	0.0116495	0.0546448	0.9569146	-0.3935968	0.4131049