Comparing behavior vs. language

Measures
Correlation between measures - Hand-translations
Correlation between measures - Google-translations

Here we look at whether IAT scores correlate with language bias for a range of difference languages. A number of different IAT measures are examined. This is at the country level

Read in behavior and language biases

countries_langs <- read_csv("../../data/other/countries_lang.csv") %>%
  mutate(language_name = ifelse(language_name == "Spanish; Castilian", "Spanish", language_name),
         language_name = ifelse(language_name == "Dutch; Flemish", "Dutch", language_name))

IAT_behavior_measures <- read_csv("../behavior_IAT/IAT_behavior_measures.csv") %>%
  left_join(countries_langs %>% select(country_name, language_name, language_code)) %>%
  filter(type %in% c("country_means_D_score", 
             "country_gender_D_score", 
             "country_gender_D_score_diff", 
             "country_RT_mean",
             "country_RT_ratio")) 

lang_bias <- read_csv("career_effect_sizes.csv")

lang_bias_google <- read_csv("google_translate/career_effect_sizes_google.csv",
                             col_names = c("language_code",   "test",  "test_name", "lang_bias")) %>%
  filter(language_code != "he", language_code != "zu", language_code != "th") #,language_code !=  "ha",language_code !=  "id",language_code !=  "zh")

d <- left_join(IAT_behavior_measures, lang_bias) %>%
  mutate_if(is.character, as.factor) %>%
  filter(!is.na(lang_bias))

There are 22 languages represented here.

Measures

Behavior

ggplot(d, aes(x = mean)) +
  geom_histogram() +
  facet_wrap(~type, scales = "free_x")+
  theme_bw()

Language - Hand translations

ggplot(lang_bias, aes(x = lang_bias)) +
  geom_histogram() +
  theme_bw()

Language - google translations

ggplot(lang_bias_google, aes(x = lang_bias)) +
  geom_histogram() +
  theme_bw()

How correlated are the two language measures?

left_join(lang_bias_google, lang_bias, by = "language_code") %>%
  do(tidy(cor.test(.$lang_bias.x, .$lang_bias.y))) %>%
  kable()

estimate	statistic	p.value	parameter	conf.low	conf.high	method	alternative
0.7007927	4.167868	0.0005781	18	0.3743697	0.8726815	Pearson’s product-moment correlation	two.sided

Correlation between measures - Hand-translations

d %>%
  mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias, group = sex, color = sex, shape = type2)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_grid(.~type2 + type, scales = "free_x", drop = T) +
  theme_bw()

Two effects: (1) D-score correlated with language bias (driven by males) (2) Explicit family-male associaiton correlated with language bias

d %>%
  group_by(type, sex) %>%
  do(tidy(cor.test(.$mean, .$lang_bias))) %>%
  arrange(p.value) %>%
  select(-parameter, -method, -alternative) %>%
  kable()

type	sex	estimate	statistic	p.value	conf.low	conf.high
country_gender_D_score	m	0.3435225	2.1639976	0.0373728	0.0219463	0.6006810
country_means_D_score	NA	0.2873735	1.7749964	0.0845964	-0.0404081	0.5593124
country_gender_D_score_diff	NA	-0.2438632	-1.4876262	0.1458043	-0.5262962	0.0870328
country_gender_D_score	f	0.1533071	0.9178272	0.3649912	-0.1796351	0.4547373
country_RT_mean	m	-0.1024097	-0.6090661	0.5464140	-0.4127331	0.2292153
country_RT_mean	f	-0.0613774	-0.3637995	0.7181962	-0.3778813	0.2679709
country_RT_ratio	NA	-0.0359346	-0.2127295	0.8327721	-0.3558107	0.2914782

Correlation between measures - Google-translations

d <- left_join(IAT_behavior_measures, lang_bias_google) %>%
  mutate_if(is.character, as.factor) %>%
  filter(!is.na(lang_bias))

There are 24 languages represented here.

d %>%
  mutate(type2 = ifelse(type %in% c("country_gender_D_score_diff", "country_means_D_score", "country_gender_D_score"), "D-score", ifelse(type %in% c("country_RT_ratio", "country_RT_mean"), "RT", "explicit"))) %>%
ggplot( aes(x = mean, y = lang_bias, 
            group = sex, color = sex, shape = type2)) + 
  geom_point() +
  geom_smooth(method = "lm") +
  facet_grid(.~type2 + type, scales = "free_x", drop = T) +
  theme_bw()

d %>%
  group_by(type, sex) %>%
  do(tidy(cor.test(.$mean, .$lang_bias))) %>%
  arrange(p.value) %>%
  select(-parameter, -method, -alternative) %>%
  kable()

type	sex	estimate	statistic	p.value	conf.low	conf.high
country_RT_ratio	NA	0.3745995	2.2494730	0.0317270	0.0359084	0.6361034
country_means_D_score	NA	0.3057319	1.7878496	0.0835802	-0.0419837	0.5873883
country_gender_D_score	f	0.2778738	1.6105637	0.1174119	-0.0723358	0.5670848
country_gender_D_score	m	0.2150913	1.2262801	0.2293269	-0.1384405	0.5200019
country_RT_mean	f	0.1430089	0.8045093	0.4272300	-0.2106418	0.4635589
country_RT_mean	m	0.1375692	0.7733055	0.4451986	-0.2159383	0.4591912
country_gender_D_score_diff	NA	-0.0576810	-0.3216897	0.7498469	-0.3932036	0.2913984