ALPHA <- .05
PROP_MISSING_CUTOFF <- .2
MEASURES <- c("career_google",
"career_hand",
"flowers_google",
"career_behavioral_iat",
"wps_index",
"weapons_google")
Read in four sets of effect sizes: career (hand translations), career (google translations), flowers (google translations), and weapons (google translations).
We need to exclude some languages based on the mean proportion missing translations for each test. Here, the cutoff for proportion missing is 0.2. Note that correlations are starred/highlighted at the 0.05 level.
career_hand <- read.csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/study2b/data/career_effect_sizes_hand_translations.csv", col.names = c("wiki_language_code", "test_id", "test_name", "es"), header = F, fill = TRUE) %>%
mutate(test_version = "career_hand")
career_google <- read.csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/study2b/google_translate_and_names_analysis/data/career_effect_sizes_google_translations.csv", col.names = c("wiki_language_code", "test_id", "test_name", "es"), header = F, fill = TRUE) %>%
mutate(test_version = "career_google")
flowers_google <- read.csv("../data/flowers_effect_sizes_google.csv",
col.names = c("wiki_language_code", "test_id", "test_name", "es"), header = F, fill = TRUE) %>%
mutate(test_version = "flowers_google")
weapons_google <- read.csv("../data/weapons_effect_sizes_google.csv",
col.names = c("wiki_language_code", "test_id", "test_name", "es"), header = F, fill = TRUE) %>%
mutate(test_version = "weapons_google")
lang_codes <- read_csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/study2b/data/language_names_to_wiki_codes.csv")
behavioral_wps <- read_csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/all/all_measures_df.csv") %>%
left_join(lang_codes) %>%
group_by(wiki_language_code) %>%
summarise(career_behavioral_iat =
weighted.mean(es_behavioral_iat,
normalized_n, na.rm = T),
wps_index = mean(wps_index))
prop_missing_raw <- read_csv("../data/prop_google_translate_missing.csv") %>%
filter(language_code != "zh_yue") %>%
rowwise() %>%
mutate(mean_prop_missing = mean(c(prop_missing_career, prop_missing_flowers, prop_missing_weapons))) %>%
arrange(-mean_prop_missing)
ggplot(prop_missing_raw, aes(x = mean_prop_missing)) +
geom_histogram() +
theme_classic()
prop_missing <- prop_missing_raw %>%
#filter(mean_prop_missing < PROP_MISSING_CUTOFF) %>%
filter((prop_missing_career < PROP_MISSING_CUTOFF) &
(prop_missing_flowers < PROP_MISSING_CUTOFF) &
(prop_missing_weapons < PROP_MISSING_CUTOFF))
#filter(!(language_code %in% c("vi"))
all_es <- bind_rows(list(career_hand, career_google,
flowers_google, weapons_google)) %>%
select(-test_id, -test_name) %>%
filter(wiki_language_code %in% prop_missing$language_code)
ggplot(all_es, aes(x = es, fill = test_version)) +
geom_density(alpha = .5) +
theme_classic()
Weapons and flowers have much less spread that career.
all_es_wide <- all_es %>%
spread(test_version, es) %>%
left_join(behavioral_wps)
unique_pairs <- tidyr::crossing(MEASURES ,MEASURES) %>%
magrittr::set_colnames(c("test1", "test2")) %>%
rowwise() %>%
mutate(test1 = sort(c(test1, test2))[1],
test2 = sort(c(test1, test2))[2]) %>%
filter(test1 != test2) %>%
unique()
get_corr <- function(test1, test2, df){
df %>%
select(test1, test2) %>%
do(tidy(cor.test(.[,1], .[,2]))) %>%
mutate(test1 = test1,
test2 = test2)
}
map2_df(unique_pairs$test1, unique_pairs$test2, get_corr, all_es_wide) %>%
select(test1, test2, estimate, statistic, p.value, parameter) %>%
mutate(sig = ifelse(p.value < ALPHA, "*", "")) %>%
kable()
| test1 | test2 | estimate | statistic | p.value | parameter | sig |
|---|---|---|---|---|---|---|
| career_behavioral_iat | career_google | 0.2198718 | 1.0809194 | 0.2909371 | 23 | |
| career_behavioral_iat | career_hand | 0.4862996 | 2.2261570 | 0.0407233 | 16 | * |
| career_behavioral_iat | flowers_google | 0.2170289 | 1.0662478 | 0.2973769 | 23 | |
| career_behavioral_iat | weapons_google | 0.5435156 | 3.1053274 | 0.0049843 | 23 | * |
| career_behavioral_iat | wps_index | 0.4169983 | 2.1519198 | 0.0426391 | 22 | * |
| career_google | career_hand | 0.7110338 | 4.0448036 | 0.0009393 | 16 | * |
| career_google | flowers_google | 0.1572407 | 0.7635988 | 0.4528687 | 23 | |
| career_google | weapons_google | 0.1591782 | 0.7732506 | 0.4472489 | 23 | |
| career_google | wps_index | 0.3590211 | 1.8042482 | 0.0849024 | 22 | |
| career_hand | flowers_google | 0.1634815 | 0.6628436 | 0.5168667 | 16 | |
| career_hand | weapons_google | 0.4630674 | 2.0898354 | 0.0529579 | 16 | |
| career_hand | wps_index | 0.2754849 | 1.1462951 | 0.2685280 | 16 | |
| flowers_google | weapons_google | 0.4098315 | 2.1547536 | 0.0418896 | 23 | * |
| flowers_google | wps_index | 0.4576881 | 2.4144827 | 0.0245196 | 22 | * |
| weapons_google | wps_index | 0.4662374 | 2.4719648 | 0.0216503 | 22 | * |
corr_plot <- function(data, mapping, z){
p <- ggplot(data = data, mapping = mapping) +
geom_point(size = .4) +
geom_smooth(method = lm, color="blue", alpha = .3) +
theme_minimal()
p
}
GGally::ggpairs(all_es_wide, columns = 2:7,
lower = list(continuous = corr_plot))
corr_mat <- cor(all_es_wide[,c(-1)],
use = "pairwise.complete.obs")
p.mat <- cor.mtest(all_es_wide[,c(-1)],
conf.level = (1-ALPHA),
use = "pairwise.complete.obs")$p
cols = rev(colorRampPalette(c("red", "white", "blue"))(100))
corrplot(corr_mat, method = "color", col = cols,
type = "upper", order = "original", number.cex = .7,
addCoef.col = "black",
p.mat = p.mat, sig.level = ALPHA, insig = "blank",
tl.col = "black", tl.srt = 90,
diag = FALSE)
lm(career_behavioral_iat~ flowers_google + career_google, data = all_es_wide) %>%
summary()
##
## Call:
## lm(formula = career_behavioral_iat ~ flowers_google + career_google,
## data = all_es_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.11704 -0.04218 -0.00794 0.02862 0.12723
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.97107 0.07818 12.421 2.04e-11 ***
## flowers_google 0.05796 0.06407 0.905 0.375
## career_google 0.04147 0.04503 0.921 0.367
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07162 on 22 degrees of freedom
## Multiple R-squared: 0.08248, Adjusted R-squared: -0.0009333
## F-statistic: 0.9888 on 2 and 22 DF, p-value: 0.388
lm(career_hand ~ wps_index + flowers_google, data = all_es_wide) %>%
summary()
##
## Call:
## lm(formula = career_hand ~ wps_index + flowers_google, data = all_es_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38904 -0.24871 0.08452 0.22509 0.43438
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1299 0.5188 -0.250 0.806
## wps_index 0.6498 0.7171 0.906 0.379
## flowers_google 0.0533 0.3609 0.148 0.885
##
## Residual standard error: 0.2976 on 15 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.07723, Adjusted R-squared: -0.0458
## F-statistic: 0.6277 on 2 and 15 DF, p-value: 0.5473
lm(career_google ~ wps_index + flowers_google, data = all_es_wide) %>%
summary()
##
## Call:
## lm(formula = career_google ~ wps_index + flowers_google, data = all_es_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7911 -0.2603 0.0159 0.2409 0.6083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.47465 0.53515 -0.887 0.385
## wps_index 1.14123 0.73929 1.544 0.138
## flowers_google 0.01821 0.35128 0.052 0.959
##
## Residual standard error: 0.328 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.129, Adjusted R-squared: 0.04606
## F-statistic: 1.555 on 2 and 21 DF, p-value: 0.2345
lm(career_hand ~ wps_index + weapons_google, data = all_es_wide) %>%
summary()
##
## Call:
## lm(formula = career_hand ~ wps_index + weapons_google, data = all_es_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3787 -0.1703 0.0016 0.1536 0.5763
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.7813 0.6097 -1.281 0.219
## wps_index 0.1267 0.6783 0.187 0.854
## weapons_google 0.8398 0.5124 1.639 0.122
##
## Residual standard error: 0.2743 on 15 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.2163, Adjusted R-squared: 0.1118
## F-statistic: 2.069 on 2 and 15 DF, p-value: 0.1608
lm(career_google ~ wps_index + weapons_google, data = all_es_wide) %>%
summary()
##
## Call:
## lm(formula = career_google ~ wps_index + weapons_google, data = all_es_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78932 -0.19713 0.04308 0.23934 0.56139
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.7799 0.6377 -1.223 0.235
## wps_index 0.8884 0.7322 1.213 0.238
## weapons_google 0.3942 0.4978 0.792 0.437
##
## Residual standard error: 0.3233 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1542, Adjusted R-squared: 0.0736
## F-statistic: 1.914 on 2 and 21 DF, p-value: 0.1724
lm(career_hand ~ wps_index + weapons_google + flowers_google, data = all_es_wide) %>%
summary()
##
## Call:
## lm(formula = career_hand ~ wps_index + weapons_google + flowers_google,
## data = all_es_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37978 -0.17125 0.00525 0.15528 0.57533
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.77324 0.64114 -1.206 0.248
## wps_index 0.14637 0.75474 0.194 0.849
## weapons_google 0.84521 0.53571 1.578 0.137
## flowers_google -0.02473 0.34778 -0.071 0.944
##
## Residual standard error: 0.2839 on 14 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.2165, Adjusted R-squared: 0.04865
## F-statistic: 1.29 on 3 and 14 DF, p-value: 0.3166
lm(career_google ~ wps_index + weapons_google + flowers_google, data = all_es_wide) %>%
summary()
##
## Call:
## lm(formula = career_google ~ wps_index + weapons_google + flowers_google,
## data = all_es_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78813 -0.19430 0.04103 0.23423 0.56060
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.79331 0.67910 -1.168 0.256
## wps_index 0.86302 0.82836 1.042 0.310
## weapons_google 0.39524 0.51026 0.775 0.448
## flowers_google 0.02567 0.35481 0.072 0.943
##
## Residual standard error: 0.3312 on 20 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1544, Adjusted R-squared: 0.02753
## F-statistic: 1.217 on 3 and 20 DF, p-value: 0.3293