Here I’m controling for block order and combining datasets from exp1 and exp2. I’m also included median country age as a covariate, because that seems to matter from other analyses we’ve done.
Take aways:
MINPARTICIPANTS <- 8
IAT_lang_full_path <- "genius_effect_sizes_google_full.csv"
IAT_lang_restricted_path <- "genius_effect_sizes_google_restricted.csv"
IAT_lang_restricted_path2 <- "genius_effect_sizes_google_restricted2.csv"
lang_key <- read_tsv("language_name_to_google.csv")
I calculated language IAT as in Caliskan et al (2017) for 40 languages in the behavioral data set, using translations from Google Translate. The restricted word list and full word lists effect sizes are highly correlated, so in the following analysis, I ony look at the effect sizes calculated from the restricted set.
# Study 2
IAT_behavioral_path1 <-"../../data/IAT/Gender-Genius/InternationalIAT_LanguageData.csv"
IAT_behavioral_path_raw1 <- read_csv(IAT_behavioral_path1)
IAT_behavioral_tidy1 <- IAT_behavioral_path_raw1 %>%
mutate(lang1 = tolower(PrimaryLanguage),
#lang2 = tolower(lang_other_clean),
Gender = tolower(Gender),
subid = as.factor(1:n())) %>%
rename(iat_score = IATScore,
gender = Gender,
age = Age,
country = Country,
# ses = SES,
condition = ConditionC,
conservatism = Conservatism,
status = Status,
children = Children) %>%
mutate(lang = ifelse(lang1 == "other", lang2, lang1)) %>%
# region = as.factor(countrycode::countrycode(country, "country.name", "region"))) %>%
left_join(lang_key %>% select(language_code, lang)) %>%
mutate_if(is.character, as.factor) %>%
mutate(condition = as.factor(condition),
log_age = log(age)) %>%
select(subid, iat_score, lang, language_code, gender, condition, log_age, country, conservatism, status, children)
## Study1
IAT_behavioral_path2 <- "../../data/IAT/Gender-Genius/IAT_Study1_Combined_Master_Dataset_LanguageData.csv"
IAT_behavioral_path_raw2 <- read_csv(IAT_behavioral_path2)
IAT_behavioral_tidy2 <- IAT_behavioral_path_raw2 %>%
mutate(lang = tolower(PrimaryLanguage),
Gender = tolower(Gender),
subid = as.factor(SubjectID)) %>%
rename(iat_score = IATScore,
gender = Gender,
age = Age,
country = Country,
condition = Condition,
#sexism = Sexism,
#race = Race,
#politicalparty = PoliticalParty,
conservatism = Conservatism,
status = Status,
children = Children)%>%
#income = Income) %>%
left_join(lang_key %>% select(language_code, lang)) %>%
mutate_if(is.character, as.factor) %>%
mutate(condition = as.factor(condition),
conservatism = as.factor(conservatism),
children = as.numeric(children),
log_age = log(age)) %>%
select(subid, iat_score, lang, language_code, gender, condition, log_age, country, conservatism, status, children)
IAT_behavioral_tidy <- bind_rows(IAT_behavioral_tidy1, IAT_behavioral_tidy2)
#IAT_behavioral_tidy <- IAT_behavioral_tidy1
Let’s residualize out order, age and gender, as in career, and add in mean country age, and objective bias measures
# add residuals
mod <- lm(iat_score ~ as.factor(gender) + log_age + as.factor(condition), data = IAT_behavioral_tidy)
AGE_DATA_PATH <- "../7_age_controls/median_country_age_world_factbook.csv"
country_age <- read_csv(AGE_DATA_PATH) %>%
rename(country= country_name) %>%
mutate(country = fct_recode(country,
"United States"= "United States of America",
"United Kingdom" = "UK",
"Czech Republic" = "Czechia",
"Tanzania, United Republic of" = "Tanzania",
"Moldova, Republic of" = "Moldova",
"Bahamas" = "Bahamas, The"))
GENDER_MEASURE_PATH <- "../../analyses/4_gender_measures/data/gender_measures/all_gender_measures2.csv"
objective_country_measures_by_country <- read_csv(GENDER_MEASURE_PATH) %>%
rename(country = country_name) %>%
select(ggi, wps, country)
IAT_behavioral_tidy_with_resids <- IAT_behavioral_tidy %>%
add_residuals(mod, "iat_resid") %>%
left_join(country_age) %>%
left_join(objective_country_measures_by_country)
There are 918 participants in the data, speaking 46 languages.
lang_counts <- count(IAT_behavioral_tidy_with_resids, lang)
lang_counts %>%
arrange(-n) %>%
DT::datatable()
Here are the scores, raw and residualized.
ggplot(IAT_behavioral_tidy_with_resids, aes(x = iat_score)) +
geom_histogram() +
ggtitle("Behavioral IAT score distribtions") +
theme_classic()
ggplot(IAT_behavioral_tidy_with_resids, aes(x = iat_resid)) +
geom_histogram() +
ggtitle("Behavioral IAT score distribtions") +
theme_classic()
I subset to only those languages that have sufficient speakers. Here the value is set to 8. Here are the mean behavioral IATs by language as a function of language IAT. The ranges are 95% CIs.
targ_langs <- lang_counts %>%
filter(n >= MINPARTICIPANTS) %>%
pull(lang)
behavioral_means_tidy <- IAT_behavioral_tidy_with_resids %>%
filter(lang %in% targ_langs) %>%
group_by(lang) %>%
multi_boot_standard(col = "iat_score") %>%
ungroup() %>%
rename(behavioral_mean = mean,
behavioral_ci_lower = ci_lower,
behavioral_ci_upper = ci_upper) %>%
left_join(lang_key %>% select(language_code, lang))
sample_sizes <- count(IAT_behavioral_tidy_with_resids, lang) %>%
left_join(lang_key %>% select(language_code, lang))
median_age_by_lang <- IAT_behavioral_tidy_with_resids %>%
group_by(language_code) %>%
summarize(median_age = mean(median_age))
mean_objectives_by_lang <- IAT_behavioral_tidy_with_resids %>%
select(language_code, wps, ggi) %>%
group_by(language_code) %>%
summarize(wps = mean(wps),
ggi = mean(ggi))
full_df <- behavioral_means_tidy %>%
left_join(IAT_lang_restricted, by = "language_code") %>%
left_join(sample_sizes, by = "language_code") %>%
left_join(median_age_by_lang, by = "language_code") %>%
left_join(mean_objectives_by_lang, by = "language_code")
ggplot(full_df, aes(x = effect_size_restricted, y = behavioral_mean)) +
geom_pointrange(aes(color = language_code, ymin = behavioral_ci_lower, ymax = behavioral_ci_upper)) +
geom_text(aes(label = language_code)) +
xlab("Language IAT effect size") +
ylab("Behavioral IAT effect size") +
geom_smooth(method = "lm") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic() +
theme(legend.position = "none")
Models
cor.test(full_df$behavioral_mean,
full_df$effect_size_restricted)
##
## Pearson's product-moment correlation
##
## data: full_df$behavioral_mean and full_df$effect_size_restricted
## t = 0.24281, df = 11, p-value = 0.8126
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4980061 0.5998667
## sample estimates:
## cor
## 0.07301368
lm(behavioral_mean ~ effect_size_restricted + median_age , full_df) %>%
summary()
##
## Call:
## lm(formula = behavioral_mean ~ effect_size_restricted + median_age,
## data = full_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.13906 -0.05073 -0.02720 0.06562 0.18831
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.474387 0.169597 -2.797 0.01889 *
## effect_size_restricted -0.032584 0.049825 -0.654 0.52788
## median_age 0.020922 0.004697 4.454 0.00123 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1065 on 10 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6667, Adjusted R-squared: 0.6
## F-statistic: 10 on 2 and 10 DF, p-value: 0.004115
lm(behavioral_mean ~ effect_size_restricted + median_age + wps, full_df) %>%
summary()
##
## Call:
## lm(formula = behavioral_mean ~ effect_size_restricted + median_age +
## wps, data = full_df)
##
## Residuals:
## 4 7 9 11 13
## -0.01948 -0.10272 0.01240 0.02055 0.08923
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.11959 2.72647 0.777 0.579
## effect_size_restricted 0.63331 0.82897 0.764 0.585
## median_age 0.09901 0.06823 1.451 0.384
## wps -7.68377 7.21104 -1.066 0.480
##
## Residual standard error: 0.1395 on 1 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.8804, Adjusted R-squared: 0.5217
## F-statistic: 2.454 on 3 and 1 DF, p-value: 0.4314
behavioral_means_tidy <- IAT_behavioral_tidy_with_resids %>%
filter(lang %in% targ_langs) %>%
group_by(lang) %>%
multi_boot_standard(col = "iat_resid") %>%
ungroup() %>%
rename(behavioral_mean = mean,
behavioral_ci_lower = ci_lower,
behavioral_ci_upper = ci_upper) %>%
left_join(lang_key %>% select(language_code, lang))
full_df <- behavioral_means_tidy %>%
left_join(IAT_lang_restricted, by = "language_code") %>%
left_join(sample_sizes, by = "language_code") %>%
left_join(median_age_by_lang, by = "language_code") %>%
left_join(mean_objectives_by_lang, by = "language_code")
ggplot(full_df, aes(x = effect_size_restricted, y = behavioral_mean)) +
geom_pointrange(aes(color = language_code, ymin = behavioral_ci_lower, ymax = behavioral_ci_upper)) +
geom_text(aes(label = language_code)) +
xlab("Language IAT effect size") +
ylab("Behavioral IAT effect size") +
geom_smooth(method = "lm") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic() +
theme(legend.position = "none")
cor.test(full_df$behavioral_mean,
full_df$effect_size_restricted)
##
## Pearson's product-moment correlation
##
## data: full_df$behavioral_mean and full_df$effect_size_restricted
## t = 0.18247, df = 11, p-value = 0.8585
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5115355 0.5881172
## sample estimates:
## cor
## 0.05493243
lm(behavioral_mean ~ effect_size_restricted + median_age + wps, full_df) %>%
summary()
##
## Call:
## lm(formula = behavioral_mean ~ effect_size_restricted + median_age +
## wps, data = full_df)
##
## Residuals:
## 4 7 9 11 13
## -0.015625 -0.082407 0.009952 0.016490 0.071590
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.46706 2.18737 1.128 0.462
## effect_size_restricted 0.81180 0.66506 1.221 0.437
## median_age 0.10234 0.05474 1.870 0.313
## wps -8.68139 5.78522 -1.501 0.374
##
## Residual standard error: 0.1119 on 1 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.9021, Adjusted R-squared: 0.6085
## F-statistic: 3.072 on 3 and 1 DF, p-value: 0.3918
This is the same analysis as above, but at the participant level using mixed-effect models.
by_participant_df <- IAT_behavioral_tidy_with_resids %>%
left_join(IAT_lang_restricted, by = "language_code") %>%
mutate(effect_size_restricted = scale(effect_size_restricted),
log_age = scale(log_age),
iat_score = scale(iat_score),
iat_resid = scale(iat_resid),
condition = as.factor(condition),
median_age = scale(median_age))
ggplot(by_participant_df, aes(x = effect_size_restricted,
y = iat_score)) +
geom_point(aes( color = language_code)) +
geom_smooth(method = "lm") +
ylab("Behavioral IAT") +
xlab("Language IAT") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic() +
theme(legend.position = "none")
In a mixed-effect model with country and language as random intercepts and gender and age as fixed effect, here’s no relationship between language IAT and behavioral IAT when you include all languages. All variables are scaled.
lme4::lmer(iat_score ~ effect_size_restricted+ gender + condition + children + log_age + median_age + wps+ as.numeric(conservatism) + (1|country) + (1|lang),
by_participant_df) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## iat_score ~ effect_size_restricted + gender + condition + children +
## log_age + median_age + wps + as.numeric(conservatism) + (1 |
## country) + (1 | lang)
## Data: by_participant_df
##
## REML criterion at convergence: 1095.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.65725 -0.64455 0.00685 0.70369 2.97554
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.000000 0.00000
## lang (Intercept) 0.001758 0.04193
## Residual 0.968997 0.98438
## Number of obs: 383, groups: country, 61; lang, 38
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -1.00319 0.70980 -1.413
## effect_size_restricted -0.01931 0.04619 -0.418
## gendermale 0.20312 0.11231 1.809
## condition1 0.59797 0.10115 5.912
## children -0.16573 0.07021 -2.360
## log_age 0.06761 0.07176 0.942
## median_age 0.04612 0.07983 0.578
## wps 0.98571 0.95177 1.036
## as.numeric(conservatism) -0.02201 0.02407 -0.914
##
## Correlation of Fixed Effects:
## (Intr) effc__ gndrml cndtn1 chldrn log_ag medn_g wps
## effct_sz_rs 0.266
## gendermale -0.095 0.043
## condition1 -0.076 0.020 0.020
## children -0.114 -0.062 0.105 -0.020
## log_age 0.092 0.055 -0.005 -0.027 -0.511
## median_age 0.819 0.126 0.018 -0.016 0.034 0.049
## wps -0.976 -0.242 -0.018 -0.002 0.076 -0.107 -0.847
## as.nmrc(cn) -0.184 -0.053 -0.046 0.010 0.003 0.011 0.068 0.051
lme4::lmer(iat_score ~ effect_size_restricted* gender + condition +children +log_age +median_age + as.numeric(conservatism) + (1|country) + (1|lang),
by_participant_df) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## iat_score ~ effect_size_restricted * gender + condition + children +
## log_age + median_age + as.numeric(conservatism) + (1 | country) +
## (1 | lang)
## Data: by_participant_df
##
## REML criterion at convergence: 2408.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9892 -0.6245 0.0520 0.6634 3.2377
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.00000 0.0000
## lang (Intercept) 0.02424 0.1557
## Residual 0.88021 0.9382
## Number of obs: 874, groups: country, 75; lang, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.444316 0.109252 -4.067
## effect_size_restricted -0.018699 0.071149 -0.263
## gendermale 0.329392 0.068250 4.826
## condition1 0.528208 0.064099 8.241
## children -0.042220 0.038789 -1.088
## log_age -0.070011 0.035814 -1.955
## median_age 0.120182 0.037158 3.234
## as.numeric(conservatism) 0.002541 0.016077 0.158
## effect_size_restricted:gendermale -0.034542 0.069046 -0.500
##
## Correlation of Fixed Effects:
## (Intr) effc__ gndrml cndtn1 chldrn log_ag medn_g as.n()
## effct_sz_rs 0.122
## gendermale -0.405 0.069
## condition1 -0.245 0.010 -0.034
## children -0.229 -0.018 0.215 -0.041
## log_age 0.108 0.040 -0.160 -0.029 -0.387
## median_age -0.123 -0.136 0.051 -0.020 -0.060 0.120
## as.nmrc(cn) -0.543 0.016 -0.066 -0.052 -0.055 -0.035 0.149
## effct_sz_r: 0.088 -0.670 -0.082 0.008 -0.019 -0.035 -0.017 -0.027
This is true even when you exclude participants.
targ_langs <- lang_counts %>%
filter(n >= MINPARTICIPANTS) %>%
pull(lang)
lme4::lmer(iat_score ~ effect_size_restricted+ gender + condition +children +log_age +median_age + wps + as.numeric(conservatism) + (1|country) + (1|lang),
by_participant_df %>% filter(lang %in% targ_langs)) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## iat_score ~ effect_size_restricted + gender + condition + children +
## log_age + median_age + wps + as.numeric(conservatism) + (1 |
## country) + (1 | lang)
## Data: by_participant_df %>% filter(lang %in% targ_langs)
##
## REML criterion at convergence: 956.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.64044 -0.65148 0.00661 0.70103 3.02795
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 2.217e-15 4.709e-08
## lang (Intercept) 0.000e+00 0.000e+00
## Residual 9.594e-01 9.795e-01
## Number of obs: 335, groups: country, 47; lang, 13
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.902588 0.764881 -1.180
## effect_size_restricted 0.004172 0.048279 0.086
## gendermale 0.181098 0.116884 1.549
## condition1 0.598277 0.107838 5.548
## children -0.164721 0.074562 -2.209
## log_age 0.081932 0.076238 1.075
## median_age 0.102368 0.086879 1.178
## wps 0.899488 1.020635 0.881
## as.numeric(conservatism) -0.020219 0.025424 -0.795
##
## Correlation of Fixed Effects:
## (Intr) effc__ gndrml cndtn1 chldrn log_ag medn_g wps
## effct_sz_rs 0.273
## gendermale -0.071 0.051
## condition1 -0.102 -0.029 -0.009
## children -0.148 -0.048 0.061 -0.030
## log_age 0.125 0.067 0.001 -0.014 -0.526
## median_age 0.828 0.143 0.033 -0.046 -0.002 0.077
## wps -0.977 -0.243 -0.031 0.021 0.119 -0.143 -0.850
## as.nmrc(cn) -0.210 -0.104 -0.043 0.043 0.008 0.002 0.045 0.075
lme4::lmer(iat_score ~ effect_size_restricted*gender + condition + log_age +median_age + wps + (1|country) + (1|lang),
by_participant_df %>% filter(lang %in% targ_langs)) %>%
summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## iat_score ~ effect_size_restricted * gender + condition + log_age +
## median_age + wps + (1 | country) + (1 | lang)
## Data: by_participant_df %>% filter(lang %in% targ_langs)
##
## REML criterion at convergence: 977.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6479 -0.6434 0.0195 0.6822 3.1691
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.0000 0.0000
## lang (Intercept) 0.0000 0.0000
## Residual 0.9856 0.9928
## Number of obs: 341, groups: country, 47; lang, 13
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -1.10781 0.75174 -1.474
## effect_size_restricted 0.05934 0.08518 0.697
## gendermale 0.19080 0.12550 1.520
## condition1 0.59664 0.10857 5.496
## log_age -0.01555 0.06561 -0.237
## median_age 0.10968 0.08734 1.256
## wps 0.98006 1.02150 0.959
## effect_size_restricted:gendermale -0.08597 0.10104 -0.851
##
## Correlation of Fixed Effects:
## (Intr) effc__ gndrml cndtn1 log_ag medn_g wps
## effct_sz_rs 0.257
## gendermale -0.123 -0.274
## condition1 -0.123 -0.068 0.014
## log_age 0.062 0.031 0.039 -0.033
## median_age 0.868 0.154 -0.002 -0.069 0.092
## wps -0.986 -0.212 0.009 0.042 -0.101 -0.865
## effct_sz_r: -0.142 -0.823 0.367 0.059 -0.003 -0.087 0.100