Study 2 data only. I’m controling for block order, and also including median country age as a covariate, because that seems to matter from other analyses we’ve done.
Take aways:
MINPARTICIPANTS <- 8
IAT_lang_full_path <- "genius_effect_sizes_google_full.csv"
IAT_lang_restricted_path <- "genius_effect_sizes_google_restricted.csv"
IAT_lang_restricted_path2 <- "genius_effect_sizes_google_restricted2.csv"
lang_key <- read_tsv("language_name_to_google.csv")
I calculated language IAT as in Caliskan et al (2017) for 40 languages in the behavioral data set, using translations from Google Translate. The restricted word list and full word lists effect sizes are highly correlated, so in the following analysis, I ony look at the effect sizes calculated from the restricted set.
# Study 2
IAT_behavioral_path1 <-"../../data/IAT/Gender-Genius/InternationalIAT_LanguageData.csv"
IAT_behavioral_path_raw1 <- read_csv(IAT_behavioral_path1)
IAT_behavioral_tidy1 <- IAT_behavioral_path_raw1 %>%
mutate(lang1 = tolower(PrimaryLanguage),
#lang2 = tolower(lang_other_clean),
Gender = tolower(Gender),
subid = as.factor(1:n())) %>%
rename(iat_score = IATScore,
gender = Gender,
age = Age,
country = Country,
# ses = SES,
condition = ConditionC,
conservatism = Conservatism,
status = Status,
children = Children) %>%
mutate(lang = ifelse(lang1 == "other", lang2, lang1)) %>%
# region = as.factor(countrycode::countrycode(country, "country.name", "region"))) %>%
left_join(lang_key %>% select(language_code, lang)) %>%
mutate_if(is.character, as.factor) %>%
mutate(condition = as.factor(condition),
log_age = log(age)) %>%
select(subid, iat_score, lang, language_code, gender, condition, log_age, country, conservatism, status, children)
## Study1
IAT_behavioral_path2 <- "../../data/IAT/Gender-Genius/IAT_Study1_Combined_Master_Dataset_LanguageData.csv"
IAT_behavioral_path_raw2 <- read_csv(IAT_behavioral_path2)
IAT_behavioral_tidy2 <- IAT_behavioral_path_raw2 %>%
mutate(lang = tolower(PrimaryLanguage),
Gender = tolower(Gender),
subid = as.factor(SubjectID)) %>%
rename(iat_score = IATScore,
gender = Gender,
age = Age,
country = Country,
condition = Condition,
#sexism = Sexism,
#race = Race,
#politicalparty = PoliticalParty,
conservatism = Conservatism,
status = Status,
children = Children)%>%
#income = Income) %>%
left_join(lang_key %>% select(language_code, lang)) %>%
mutate_if(is.character, as.factor) %>%
mutate(condition = as.factor(condition),
conservatism = as.factor(conservatism),
children = as.numeric(children),
log_age = log(age)) %>%
select(subid, iat_score, lang, language_code, gender, condition, log_age, country, conservatism, status, children)
IAT_behavioral_tidy <- bind_rows(IAT_behavioral_tidy1, IAT_behavioral_tidy2)
IAT_behavioral_tidy <- IAT_behavioral_tidy1
Let’s residualize out order, age and gender, as in career, and add in mean country age, and objective bias measures
# add residuals
mod <- lm(iat_score ~ as.factor(gender) + log_age + as.factor(condition), data = IAT_behavioral_tidy)
AGE_DATA_PATH <- "../7_age_controls/median_country_age_world_factbook.csv"
country_age <- read_csv(AGE_DATA_PATH) %>%
rename(country= country_name) %>%
mutate(country = fct_recode(country,
"United States"= "United States of America",
"United Kingdom" = "UK",
"Czech Republic" = "Czechia",
"Tanzania, United Republic of" = "Tanzania",
"Moldova, Republic of" = "Moldova",
"Bahamas" = "Bahamas, The"))
GENDER_MEASURE_PATH <- "../../analyses/4_gender_measures/data/gender_measures/all_gender_measures2.csv"
objective_country_measures_by_country <- read_csv(GENDER_MEASURE_PATH) %>%
rename(country = country_name) %>%
select(ggi, wps, country) %>%
mutate(country = fct_recode(country,
"Vietnam"= "Viet Nam",
"Macedonia" = "The former Yugoslav Republic of Macedonia",
"Tanzania, United Republic of" = "United Republic of Tanzania",
"United Kingdom" = "UK",
"Moldova, Republic of" = "Republic of Moldova",
"Venezuela" = "Venezuela, Bolivarian Republic of")) %>%
filter(!is.na(wps) & !is.na(ggi))
IAT_behavioral_tidy_with_resids <- IAT_behavioral_tidy %>%
add_residuals(mod, "iat_resid") %>%
left_join(country_age) %>%
left_join(objective_country_measures_by_country)
There are 505 participants in the data, speaking 45 languages.
lang_counts <- count(IAT_behavioral_tidy_with_resids, lang)
lang_counts %>%
arrange(-n) %>%
DT::datatable()
Here are the scores, raw and residualized.
ggplot(IAT_behavioral_tidy_with_resids, aes(x = iat_score)) +
geom_histogram() +
ggtitle("Behavioral IAT score distribtions") +
theme_classic()
ggplot(IAT_behavioral_tidy_with_resids, aes(x = iat_resid)) +
geom_histogram() +
ggtitle("Behavioral IAT score distribtions") +
theme_classic()
I subset to only those languages that have sufficient speakers. Here the value is set to 8. Here are the mean behavioral IATs by language as a function of language IAT. The ranges are 95% CIs.
targ_langs <- lang_counts %>%
filter(n >= MINPARTICIPANTS) %>%
pull(lang)
behavioral_means_tidy <- IAT_behavioral_tidy_with_resids %>%
filter(lang %in% targ_langs) %>%
group_by(lang) %>%
multi_boot_standard(col = "iat_score") %>%
ungroup() %>%
rename(behavioral_mean = mean,
behavioral_ci_lower = ci_lower,
behavioral_ci_upper = ci_upper) %>%
left_join(lang_key %>% select(language_code, lang))
sample_sizes <- count(IAT_behavioral_tidy_with_resids, lang) %>%
left_join(lang_key %>% select(language_code, lang))
median_age_by_lang <- IAT_behavioral_tidy_with_resids %>%
group_by(language_code) %>%
summarize(median_age = mean(median_age))
mean_objectives_by_lang <- IAT_behavioral_tidy_with_resids %>%
distinct(language_code, country, .keep_all = T) %>%
select(country, language_code, wps, ggi) %>%
group_by(language_code) %>%
summarize(wps = mean(wps, na.rm = T),
ggi = mean(ggi, na.rm = T))
full_df <- behavioral_means_tidy %>%
left_join(IAT_lang_restricted, by = "language_code") %>%
left_join(sample_sizes, by = "language_code") %>%
left_join(median_age_by_lang, by = "language_code") %>%
left_join(mean_objectives_by_lang, by = "language_code")
ggplot(full_df, aes(x = effect_size_restricted, y = behavioral_mean)) +
geom_pointrange(aes(color = language_code, ymin = behavioral_ci_lower, ymax = behavioral_ci_upper)) +
geom_text(aes(label = language_code)) +
xlab("Language IAT effect size") +
ylab("Behavioral IAT effect size") +
geom_smooth(method = "lm") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic() +
theme(legend.position = "none")
Models
cor.test(full_df$behavioral_mean,
full_df$effect_size_restricted)
##
## Pearson's product-moment correlation
##
## data: full_df$behavioral_mean and full_df$effect_size_restricted
## t = 1.188, df = 9, p-value = 0.2652
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2973699 0.7929289
## sample estimates:
## cor
## 0.3681834
lm(behavioral_mean ~ effect_size_restricted + median_age, full_df %>% mutate_if(is.numeric, scale)) %>%
summary()
##
## Call:
## lm(formula = behavioral_mean ~ effect_size_restricted + median_age,
## data = full_df %>% mutate_if(is.numeric, scale))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6212 -0.4251 -0.2460 0.4239 1.2658
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.244e-16 2.053e-01 0.000 1.0000
## effect_size_restricted 8.797e-02 2.318e-01 0.379 0.7142
## median_age 7.564e-01 2.318e-01 3.263 0.0115 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6809 on 8 degrees of freedom
## Multiple R-squared: 0.6291, Adjusted R-squared: 0.5364
## F-statistic: 6.785 on 2 and 8 DF, p-value: 0.01892
lm(behavioral_mean ~ effect_size_restricted + median_age + ggi, full_df %>% mutate_if(is.numeric, scale)) %>%
summary()
##
## Call:
## lm(formula = behavioral_mean ~ effect_size_restricted + median_age +
## ggi, data = full_df %>% mutate_if(is.numeric, scale))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6215 -0.4314 -0.2316 0.4280 1.2650
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.054e-16 2.195e-01 0.000 1.0000
## effect_size_restricted 9.224e-02 2.807e-01 0.329 0.7521
## median_age 7.661e-01 3.903e-01 1.963 0.0905 .
## ggi -1.414e-02 4.368e-01 -0.032 0.9751
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7278 on 7 degrees of freedom
## Multiple R-squared: 0.6292, Adjusted R-squared: 0.4702
## F-statistic: 3.959 on 3 and 7 DF, p-value: 0.06092
behavioral_means_tidy <- IAT_behavioral_tidy_with_resids %>%
filter(lang %in% targ_langs) %>%
group_by(lang) %>%
multi_boot_standard(col = "iat_resid") %>%
ungroup() %>%
rename(behavioral_mean = mean,
behavioral_ci_lower = ci_lower,
behavioral_ci_upper = ci_upper) %>%
left_join(lang_key %>% select(language_code, lang))
full_df <- behavioral_means_tidy %>%
left_join(IAT_lang_restricted, by = "language_code") %>%
left_join(sample_sizes, by = "language_code") %>%
left_join(median_age_by_lang, by = "language_code") %>%
left_join(mean_objectives_by_lang, by = "language_code")
ggplot(full_df, aes(x = effect_size_restricted, y = behavioral_mean)) +
geom_pointrange(aes(color = language_code, ymin = behavioral_ci_lower, ymax = behavioral_ci_upper)) +
geom_text(aes(label = language_code)) +
xlab("Language IAT effect size") +
ylab("Behavioral IAT effect size - residualized") +
geom_smooth(method = "lm") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic() +
theme(legend.position = "none")
cor.test(full_df$behavioral_mean,
full_df$effect_size_restricted)
##
## Pearson's product-moment correlation
##
## data: full_df$behavioral_mean and full_df$effect_size_restricted
## t = 1.0733, df = 9, p-value = 0.3111
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3296283 0.7792635
## sample estimates:
## cor
## 0.336855
lm(behavioral_mean ~ effect_size_restricted + median_age + ggi, full_df %>% mutate_if(is.numeric, scale)) %>%
summary()
##
## Call:
## lm(formula = behavioral_mean ~ effect_size_restricted + median_age +
## ggi, data = full_df %>% mutate_if(is.numeric, scale))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8239 -0.4224 -0.2897 0.2904 1.4463
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.205e-16 2.453e-01 0.000 1.000
## effect_size_restricted 4.459e-02 3.138e-01 0.142 0.891
## median_age 6.200e-01 4.363e-01 1.421 0.198
## ggi 1.121e-01 4.883e-01 0.230 0.825
##
## Residual standard error: 0.8136 on 7 degrees of freedom
## Multiple R-squared: 0.5366, Adjusted R-squared: 0.3381
## F-statistic: 2.702 on 3 and 7 DF, p-value: 0.1258
cor.test(full_df$effect_size_restricted, full_df$ggi)
##
## Pearson's product-moment correlation
##
## data: full_df$effect_size_restricted and full_df$ggi
## t = 2.0163, df = 9, p-value = 0.07456
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06320374 0.86743295
## sample estimates:
## cor
## 0.5578207
cor.test(full_df$effect_size_restricted, full_df$wps)
##
## Pearson's product-moment correlation
##
## data: full_df$effect_size_restricted and full_df$wps
## t = 1.6178, df = 9, p-value = 0.1402
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1750726 0.8363819
## sample estimates:
## cor
## 0.4746511
This is the same analysis as above, but at the participant level using mixed-effect models.
by_participant_df <- IAT_behavioral_tidy_with_resids %>%
left_join(IAT_lang_restricted, by = "language_code") %>%
mutate(effect_size_restricted = scale(effect_size_restricted),
log_age = scale(log_age),
iat_score = scale(iat_score),
iat_resid = scale(iat_resid),
condition = as.factor(condition),
median_age = scale(median_age))
ggplot(by_participant_df, aes(x = effect_size_restricted,
y = iat_score)) +
geom_point(aes( color = language_code)) +
geom_smooth(method = "lm") +
ylab("Behavioral IAT") +
xlab("Language IAT") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic() +
theme(legend.position = "none")
In a mixed-effect model with country and language as random intercepts and gender and age as fixed effect, here’s no relationship between language IAT and behavioral IAT when you include all languages. All variables are scaled.
lme4::lmer(iat_score ~ effect_size_restricted+ gender + log_age + condition + (1|country) + (1|lang),
by_participant_df) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## iat_score ~ effect_size_restricted + gender + log_age + condition +
## (1 | country) + (1 | lang)
## Data: by_participant_df
##
## REML criterion at convergence: 1381.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.93410 -0.62122 0.03456 0.68683 3.14900
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.01404 0.1185
## lang (Intercept) 0.03937 0.1984
## Residual 0.89111 0.9440
## Number of obs: 497, groups: country, 74; lang, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.39919 0.11252 -3.548
## effect_size_restricted -0.04515 0.06999 -0.645
## gendermale 0.23932 0.09427 2.539
## log_age -0.02783 0.04276 -0.651
## condition1 0.56059 0.08627 6.498
##
## Correlation of Fixed Effects:
## (Intr) effc__ gndrml log_ag
## effct_sz_rs 0.013
## gendermale -0.621 0.019
## log_age -0.006 0.002 0.037
## condition1 -0.406 0.007 0.008 -0.027
lme4::lmer(iat_score ~ effect_size_restricted* gender + log_age + condition + (1|country) + (1|lang),
by_participant_df) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## iat_score ~ effect_size_restricted * gender + log_age + condition +
## (1 | country) + (1 | lang)
## Data: by_participant_df
##
## REML criterion at convergence: 1383.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.91782 -0.63305 0.02894 0.67509 3.06333
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.01352 0.1163
## lang (Intercept) 0.04214 0.2053
## Residual 0.89079 0.9438
## Number of obs: 497, groups: country, 74; lang, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.40120 0.11310 -3.547
## effect_size_restricted 0.01690 0.09816 0.172
## gendermale 0.24369 0.09440 2.581
## log_age -0.02719 0.04276 -0.636
## condition1 0.55538 0.08644 6.425
## effect_size_restricted:gendermale -0.08723 0.09426 -0.925
##
## Correlation of Fixed Effects:
## (Intr) effc__ gndrml log_ag cndtn1
## effct_sz_rs -0.009
## gendermale -0.618 0.051
## log_age -0.006 0.011 0.037
## condition1 -0.401 -0.036 0.005 -0.028
## effct_sz_r: 0.026 -0.692 -0.054 -0.014 0.060
This is true even when you exclude participants.
targ_langs <- lang_counts %>%
filter(n >= MINPARTICIPANTS) %>%
pull(lang)
lme4::lmer(iat_score ~ effect_size_restricted+ gender + log_age + condition + (1|country) + (1|lang),
by_participant_df %>% filter(lang %in% targ_langs)) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## iat_score ~ effect_size_restricted + gender + log_age + condition +
## (1 | country) + (1 | lang)
## Data: by_participant_df %>% filter(lang %in% targ_langs)
##
## REML criterion at convergence: 1175.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.88721 -0.61025 -0.00123 0.68786 3.00306
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.02504 0.1582
## lang (Intercept) 0.02456 0.1567
## Residual 0.87737 0.9367
## Number of obs: 425, groups: country, 53; lang, 11
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.26330 0.12336 -2.134
## effect_size_restricted 0.05143 0.08111 0.634
## gendermale 0.16019 0.09947 1.610
## log_age -0.04323 0.04498 -0.961
## condition1 0.57120 0.09254 6.172
##
## Correlation of Fixed Effects:
## (Intr) effc__ gndrml log_ag
## effct_sz_rs 0.157
## gendermale -0.563 0.014
## log_age -0.028 0.026 0.032
## condition1 -0.381 -0.041 -0.017 -0.021
lme4::lmer(iat_score ~ effect_size_restricted*gender + log_age + condition + (1|country) + (1|lang),
by_participant_df %>% filter(lang %in% targ_langs)) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## iat_score ~ effect_size_restricted * gender + log_age + condition +
## (1 | country) + (1 | lang)
## Data: by_participant_df %>% filter(lang %in% targ_langs)
##
## REML criterion at convergence: 1175.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.86373 -0.63056 -0.01102 0.66263 2.85882
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.02313 0.1521
## lang (Intercept) 0.02960 0.1720
## Residual 0.87450 0.9351
## Number of obs: 425, groups: country, 53; lang, 11
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.25996 0.12541 -2.073
## effect_size_restricted 0.16042 0.11002 1.458
## gendermale 0.16184 0.09932 1.630
## log_age -0.04282 0.04490 -0.954
## condition1 0.56269 0.09255 6.080
## effect_size_restricted:gendermale -0.14930 0.10068 -1.483
##
## Correlation of Fixed Effects:
## (Intr) effc__ gndrml log_ag cndtn1
## effct_sz_rs 0.127
## gendermale -0.552 0.017
## log_age -0.027 0.024 0.032
## condition1 -0.374 -0.067 -0.018 -0.021
## effct_sz_r: 0.004 -0.640 -0.012 -0.008 0.055