Comparing language IATs

Distribution of proportion missing translations
Distribution of effect sizes by test.
Correlations between effect sizes

ALPHA <- .05
PROP_MISSING_CUTOFF <- .2

MEASURES <- c("career_google", 
           "career_hand",
           "flowers_google",
           "career_behavioral_iat",
           "wps_index",
           "weapons_google")

Read in four sets of effect sizes: career (hand translations), career (google translations), flowers (google translations), and weapons (google translations).

We need to exclude some languages based on the mean proportion missing translations for each test. Here, the cutoff for proportion missing is 0.2. Note that correlations are starred/highlighted at the 0.05 level.

career_hand <- read.csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/study2b/data/career_effect_sizes_hand_translations.csv", col.names = c("wiki_language_code", "test_id", "test_name", "es"),  header = F, fill = TRUE) %>%
  mutate(test_version = "career_hand")

career_google <- read.csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/study2b/google_translate_and_names_analysis/data/career_effect_sizes_google_translations.csv", col.names = c("wiki_language_code", "test_id", "test_name", "es"),  header = F, fill = TRUE) %>%
  mutate(test_version = "career_google")

flowers_google <- read.csv("../data/flowers_effect_sizes_google.csv", 
                           col.names = c("wiki_language_code", "test_id", "test_name", "es"), header = F, fill = TRUE)  %>%
  mutate(test_version = "flowers_google")

weapons_google <- read.csv("../data/weapons_effect_sizes_google.csv", 
                           col.names = c("wiki_language_code", "test_id", "test_name", "es"), header = F, fill = TRUE) %>%
  mutate(test_version = "weapons_google")

lang_codes <- read_csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/study2b/data/language_names_to_wiki_codes.csv")

behavioral_wps <- read_csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/all/all_measures_df.csv") %>%
   left_join(lang_codes) %>%
  group_by(wiki_language_code) %>%
  summarise(career_behavioral_iat =
              weighted.mean(es_behavioral_iat,
                            normalized_n, na.rm = T),
            wps_index = mean(wps_index))

Distribution of proportion missing translations

prop_missing_raw <- read_csv("../data/prop_google_translate_missing.csv")  %>%
  filter(language_code != "zh_yue") %>%
  rowwise() %>%
  mutate(mean_prop_missing = mean(c(prop_missing_career, prop_missing_flowers, prop_missing_weapons))) %>%
  arrange(-mean_prop_missing)

ggplot(prop_missing_raw, aes(x = mean_prop_missing)) +
  geom_histogram() +
  theme_classic()

Distribution of effect sizes by test.

prop_missing <- prop_missing_raw %>%
  #filter(mean_prop_missing < PROP_MISSING_CUTOFF)  %>%
  filter((prop_missing_career < PROP_MISSING_CUTOFF) & 
           (prop_missing_flowers < PROP_MISSING_CUTOFF) & 
           (prop_missing_weapons < PROP_MISSING_CUTOFF)) 
  #filter(!(language_code %in% c("vi"))

all_es <- bind_rows(list(career_hand, career_google, 
                        flowers_google, weapons_google))  %>%
  select(-test_id, -test_name) %>%
  filter(wiki_language_code %in% prop_missing$language_code)

ggplot(all_es, aes(x = es, fill = test_version)) +
  geom_density(alpha = .5) +
  theme_classic()

Weapons and flowers have much less spread that career.

Correlations between effect sizes

all_es_wide <- all_es %>%
  spread(test_version, es) %>%
  left_join(behavioral_wps)

unique_pairs <- tidyr::crossing(MEASURES ,MEASURES) %>% 
  magrittr::set_colnames(c("test1", "test2")) %>%
  rowwise() %>%
  mutate(test1 = sort(c(test1, test2))[1],       
         test2 = sort(c(test1, test2))[2]) %>%
  filter(test1 != test2) %>%                      
  unique()  

get_corr <- function(test1, test2, df){
  df %>%
    select(test1, test2) %>%
    do(tidy(cor.test(.[,1], .[,2]))) %>%
    mutate(test1 = test1, 
           test2 = test2)
}

map2_df(unique_pairs$test1, unique_pairs$test2, get_corr, all_es_wide)  %>%
  select(test1, test2, estimate, statistic, p.value, parameter) %>%
  mutate(sig = ifelse(p.value < ALPHA, "*", "")) %>%
  kable()

test1	test2	estimate	statistic	p.value	parameter	sig
career_behavioral_iat	career_google	0.2198718	1.0809194	0.2909371	23
career_behavioral_iat	career_hand	0.4862996	2.2261570	0.0407233	16	*
career_behavioral_iat	flowers_google	0.2170289	1.0662478	0.2973769	23
career_behavioral_iat	weapons_google	0.5435156	3.1053274	0.0049843	23	*
career_behavioral_iat	wps_index	0.4169983	2.1519198	0.0426391	22	*
career_google	career_hand	0.7110338	4.0448036	0.0009393	16	*
career_google	flowers_google	0.1572407	0.7635988	0.4528687	23
career_google	weapons_google	0.1591782	0.7732506	0.4472489	23
career_google	wps_index	0.3590211	1.8042482	0.0849024	22
career_hand	flowers_google	0.1634815	0.6628436	0.5168667	16
career_hand	weapons_google	0.4630674	2.0898354	0.0529579	16
career_hand	wps_index	0.2754849	1.1462951	0.2685280	16
flowers_google	weapons_google	0.4098315	2.1547536	0.0418896	23	*
flowers_google	wps_index	0.4576881	2.4144827	0.0245196	22	*
weapons_google	wps_index	0.4662374	2.4719648	0.0216503	22	*

corr_plot <- function(data, mapping, z){
  p <- ggplot(data = data, mapping = mapping) + 
    geom_point(size = .4) + 
    geom_smooth(method = lm,  color="blue", alpha = .3) +
    theme_minimal()
  p
}

GGally::ggpairs(all_es_wide, columns = 2:7, 
                lower = list(continuous = corr_plot))

corr_mat <- cor(all_es_wide[,c(-1)], 
                use = "pairwise.complete.obs")

p.mat <- cor.mtest(all_es_wide[,c(-1)], 
                  conf.level = (1-ALPHA),  
                  use = "pairwise.complete.obs")$p

cols = rev(colorRampPalette(c("red", "white", "blue"))(100))

corrplot(corr_mat, method = "color",  col = cols,
         type = "upper", order = "original", number.cex = .7,
         addCoef.col = "black", 
         p.mat = p.mat, sig.level = ALPHA, insig = "blank", 
         tl.col = "black", tl.srt = 90,
         diag = FALSE)

lm(career_behavioral_iat~ flowers_google + career_google, data = all_es_wide) %>%
  summary()

## 
## Call:
## lm(formula = career_behavioral_iat ~ flowers_google + career_google, 
##     data = all_es_wide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.11704 -0.04218 -0.00794  0.02862  0.12723 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.97107    0.07818  12.421 2.04e-11 ***
## flowers_google  0.05796    0.06407   0.905    0.375    
## career_google   0.04147    0.04503   0.921    0.367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07162 on 22 degrees of freedom
## Multiple R-squared:  0.08248,    Adjusted R-squared:  -0.0009333 
## F-statistic: 0.9888 on 2 and 22 DF,  p-value: 0.388

lm(career_hand ~ wps_index + flowers_google, data = all_es_wide) %>%
  summary()

## 
## Call:
## lm(formula = career_hand ~ wps_index + flowers_google, data = all_es_wide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38904 -0.24871  0.08452  0.22509  0.43438 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)     -0.1299     0.5188  -0.250    0.806
## wps_index        0.6498     0.7171   0.906    0.379
## flowers_google   0.0533     0.3609   0.148    0.885
## 
## Residual standard error: 0.2976 on 15 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.07723,    Adjusted R-squared:  -0.0458 
## F-statistic: 0.6277 on 2 and 15 DF,  p-value: 0.5473

lm(career_google ~ wps_index + flowers_google, data = all_es_wide) %>%
  summary()

## 
## Call:
## lm(formula = career_google ~ wps_index + flowers_google, data = all_es_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7911 -0.2603  0.0159  0.2409  0.6083 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)    -0.47465    0.53515  -0.887    0.385
## wps_index       1.14123    0.73929   1.544    0.138
## flowers_google  0.01821    0.35128   0.052    0.959
## 
## Residual standard error: 0.328 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.129,  Adjusted R-squared:  0.04606 
## F-statistic: 1.555 on 2 and 21 DF,  p-value: 0.2345

lm(career_hand ~ wps_index + weapons_google, data = all_es_wide) %>%
  summary()

## 
## Call:
## lm(formula = career_hand ~ wps_index + weapons_google, data = all_es_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3787 -0.1703  0.0016  0.1536  0.5763 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)     -0.7813     0.6097  -1.281    0.219
## wps_index        0.1267     0.6783   0.187    0.854
## weapons_google   0.8398     0.5124   1.639    0.122
## 
## Residual standard error: 0.2743 on 15 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.2163, Adjusted R-squared:  0.1118 
## F-statistic: 2.069 on 2 and 15 DF,  p-value: 0.1608

lm(career_google ~ wps_index + weapons_google, data = all_es_wide) %>%
  summary()

## 
## Call:
## lm(formula = career_google ~ wps_index + weapons_google, data = all_es_wide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78932 -0.19713  0.04308  0.23934  0.56139 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)     -0.7799     0.6377  -1.223    0.235
## wps_index        0.8884     0.7322   1.213    0.238
## weapons_google   0.3942     0.4978   0.792    0.437
## 
## Residual standard error: 0.3233 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1542, Adjusted R-squared:  0.0736 
## F-statistic: 1.914 on 2 and 21 DF,  p-value: 0.1724

lm(career_hand ~ wps_index + weapons_google  + flowers_google, data = all_es_wide) %>%
  summary()

## 
## Call:
## lm(formula = career_hand ~ wps_index + weapons_google + flowers_google, 
##     data = all_es_wide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37978 -0.17125  0.00525  0.15528  0.57533 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)    -0.77324    0.64114  -1.206    0.248
## wps_index       0.14637    0.75474   0.194    0.849
## weapons_google  0.84521    0.53571   1.578    0.137
## flowers_google -0.02473    0.34778  -0.071    0.944
## 
## Residual standard error: 0.2839 on 14 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.2165, Adjusted R-squared:  0.04865 
## F-statistic:  1.29 on 3 and 14 DF,  p-value: 0.3166

lm(career_google ~ wps_index + weapons_google  + flowers_google, data = all_es_wide) %>%
  summary()

## 
## Call:
## lm(formula = career_google ~ wps_index + weapons_google + flowers_google, 
##     data = all_es_wide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78813 -0.19430  0.04103  0.23423  0.56060 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)    -0.79331    0.67910  -1.168    0.256
## wps_index       0.86302    0.82836   1.042    0.310
## weapons_google  0.39524    0.51026   0.775    0.448
## flowers_google  0.02567    0.35481   0.072    0.943
## 
## Residual standard error: 0.3312 on 20 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1544, Adjusted R-squared:  0.02753 
## F-statistic: 1.217 on 3 and 20 DF,  p-value: 0.3293

Comparing language IATs

Molly Lewis

2018-02-08

Distribution of proportion missing translations

Distribution of effect sizes by test.

Correlations between effect sizes