ALPHA <- .05
PROP_MISSING_CUTOFF <- .2

MEASURES <- c("career_google", 
           "career_hand",
           "flowers_google",
           "career_behavioral_iat",
           "wps_index",
           "weapons_google")

Read in four sets of effect sizes: career (hand translations), career (google translations), flowers (google translations), and weapons (google translations).

We need to exclude some languages based on the mean proportion missing translations for each test. Here, the cutoff for proportion missing is 0.2. Note that correlations are starred/highlighted at the 0.05 level.

career_hand <- read.csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/study2b/data/career_effect_sizes_hand_translations.csv", col.names = c("wiki_language_code", "test_id", "test_name", "es"),  header = F, fill = TRUE) %>%
  mutate(test_version = "career_hand")

career_google <- read.csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/study2b/google_translate_and_names_analysis/data/career_effect_sizes_google_translations.csv", col.names = c("wiki_language_code", "test_id", "test_name", "es"),  header = F, fill = TRUE) %>%
  mutate(test_version = "career_google")

flowers_google <- read.csv("../data/flowers_effect_sizes_google.csv", 
                           col.names = c("wiki_language_code", "test_id", "test_name", "es"), header = F, fill = TRUE)  %>%
  mutate(test_version = "flowers_google")

weapons_google <- read.csv("../data/weapons_effect_sizes_google.csv", 
                           col.names = c("wiki_language_code", "test_id", "test_name", "es"), header = F, fill = TRUE) %>%
  mutate(test_version = "weapons_google")
lang_codes <- read_csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/study2b/data/language_names_to_wiki_codes.csv")

behavioral_wps <- read_csv("/Users/mollylewis/Documents/research/Projects/IATLANG/writeup/cogsci2018/analysis/all/all_measures_df.csv") %>%
   left_join(lang_codes) %>%
  group_by(wiki_language_code) %>%
  summarise(career_behavioral_iat =
              weighted.mean(es_behavioral_iat,
                            normalized_n, na.rm = T),
            wps_index = mean(wps_index)) 

Distribution of proportion missing translations

prop_missing_raw <- read_csv("../data/prop_google_translate_missing.csv")  %>%
  filter(language_code != "zh_yue") %>%
  rowwise() %>%
  mutate(mean_prop_missing = mean(c(prop_missing_career, prop_missing_flowers, prop_missing_weapons))) %>%
  arrange(-mean_prop_missing)

ggplot(prop_missing_raw, aes(x = mean_prop_missing)) +
  geom_histogram() +
  theme_classic()

Distribution of effect sizes by test.

prop_missing <- prop_missing_raw %>%
  #filter(mean_prop_missing < PROP_MISSING_CUTOFF)  %>%
  filter((prop_missing_career < PROP_MISSING_CUTOFF) & 
           (prop_missing_flowers < PROP_MISSING_CUTOFF) & 
           (prop_missing_weapons < PROP_MISSING_CUTOFF)) 
  #filter(!(language_code %in% c("vi"))

all_es <- bind_rows(list(career_hand, career_google, 
                        flowers_google, weapons_google))  %>%
  select(-test_id, -test_name) %>%
  filter(wiki_language_code %in% prop_missing$language_code)
ggplot(all_es, aes(x = es, fill = test_version)) +
  geom_density(alpha = .5) +
  theme_classic()

Weapons and flowers have much less spread that career.

Correlations between effect sizes

all_es_wide <- all_es %>%
  spread(test_version, es) %>%
  left_join(behavioral_wps)

unique_pairs <- tidyr::crossing(MEASURES ,MEASURES) %>% 
  magrittr::set_colnames(c("test1", "test2")) %>%
  rowwise() %>%
  mutate(test1 = sort(c(test1, test2))[1],       
         test2 = sort(c(test1, test2))[2]) %>%
  filter(test1 != test2) %>%                      
  unique()  

get_corr <- function(test1, test2, df){
  df %>%
    select(test1, test2) %>%
    do(tidy(cor.test(.[,1], .[,2]))) %>%
    mutate(test1 = test1, 
           test2 = test2)
}

map2_df(unique_pairs$test1, unique_pairs$test2, get_corr, all_es_wide)  %>%
  select(test1, test2, estimate, statistic, p.value, parameter) %>%
  mutate(sig = ifelse(p.value < ALPHA, "*", "")) %>%
  kable()
test1 test2 estimate statistic p.value parameter sig
career_behavioral_iat career_google 0.2198718 1.0809194 0.2909371 23
career_behavioral_iat career_hand 0.4862996 2.2261570 0.0407233 16 *
career_behavioral_iat flowers_google 0.2170289 1.0662478 0.2973769 23
career_behavioral_iat weapons_google 0.5435156 3.1053274 0.0049843 23 *
career_behavioral_iat wps_index 0.4169983 2.1519198 0.0426391 22 *
career_google career_hand 0.7110338 4.0448036 0.0009393 16 *
career_google flowers_google 0.1572407 0.7635988 0.4528687 23
career_google weapons_google 0.1591782 0.7732506 0.4472489 23
career_google wps_index 0.3590211 1.8042482 0.0849024 22
career_hand flowers_google 0.1634815 0.6628436 0.5168667 16
career_hand weapons_google 0.4630674 2.0898354 0.0529579 16
career_hand wps_index 0.2754849 1.1462951 0.2685280 16
flowers_google weapons_google 0.4098315 2.1547536 0.0418896 23 *
flowers_google wps_index 0.4576881 2.4144827 0.0245196 22 *
weapons_google wps_index 0.4662374 2.4719648 0.0216503 22 *
corr_plot <- function(data, mapping, z){
  p <- ggplot(data = data, mapping = mapping) + 
    geom_point(size = .4) + 
    geom_smooth(method = lm,  color="blue", alpha = .3) +
    theme_minimal()
  p
}

GGally::ggpairs(all_es_wide, columns = 2:7, 
                lower = list(continuous = corr_plot))

corr_mat <- cor(all_es_wide[,c(-1)], 
                use = "pairwise.complete.obs")

p.mat <- cor.mtest(all_es_wide[,c(-1)], 
                  conf.level = (1-ALPHA),  
                  use = "pairwise.complete.obs")$p

cols = rev(colorRampPalette(c("red", "white", "blue"))(100))

corrplot(corr_mat, method = "color",  col = cols,
         type = "upper", order = "original", number.cex = .7,
         addCoef.col = "black", 
         p.mat = p.mat, sig.level = ALPHA, insig = "blank", 
         tl.col = "black", tl.srt = 90,
         diag = FALSE)

lm(career_behavioral_iat~ flowers_google + career_google, data = all_es_wide) %>%
  summary()
## 
## Call:
## lm(formula = career_behavioral_iat ~ flowers_google + career_google, 
##     data = all_es_wide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.11704 -0.04218 -0.00794  0.02862  0.12723 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.97107    0.07818  12.421 2.04e-11 ***
## flowers_google  0.05796    0.06407   0.905    0.375    
## career_google   0.04147    0.04503   0.921    0.367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07162 on 22 degrees of freedom
## Multiple R-squared:  0.08248,    Adjusted R-squared:  -0.0009333 
## F-statistic: 0.9888 on 2 and 22 DF,  p-value: 0.388
lm(career_hand ~ wps_index + flowers_google, data = all_es_wide) %>%
  summary()
## 
## Call:
## lm(formula = career_hand ~ wps_index + flowers_google, data = all_es_wide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38904 -0.24871  0.08452  0.22509  0.43438 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)     -0.1299     0.5188  -0.250    0.806
## wps_index        0.6498     0.7171   0.906    0.379
## flowers_google   0.0533     0.3609   0.148    0.885
## 
## Residual standard error: 0.2976 on 15 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.07723,    Adjusted R-squared:  -0.0458 
## F-statistic: 0.6277 on 2 and 15 DF,  p-value: 0.5473
lm(career_google ~ wps_index + flowers_google, data = all_es_wide) %>%
  summary()
## 
## Call:
## lm(formula = career_google ~ wps_index + flowers_google, data = all_es_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7911 -0.2603  0.0159  0.2409  0.6083 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)    -0.47465    0.53515  -0.887    0.385
## wps_index       1.14123    0.73929   1.544    0.138
## flowers_google  0.01821    0.35128   0.052    0.959
## 
## Residual standard error: 0.328 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.129,  Adjusted R-squared:  0.04606 
## F-statistic: 1.555 on 2 and 21 DF,  p-value: 0.2345
lm(career_hand ~ wps_index + weapons_google, data = all_es_wide) %>%
  summary()
## 
## Call:
## lm(formula = career_hand ~ wps_index + weapons_google, data = all_es_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3787 -0.1703  0.0016  0.1536  0.5763 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)     -0.7813     0.6097  -1.281    0.219
## wps_index        0.1267     0.6783   0.187    0.854
## weapons_google   0.8398     0.5124   1.639    0.122
## 
## Residual standard error: 0.2743 on 15 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.2163, Adjusted R-squared:  0.1118 
## F-statistic: 2.069 on 2 and 15 DF,  p-value: 0.1608
lm(career_google ~ wps_index + weapons_google, data = all_es_wide) %>%
  summary()
## 
## Call:
## lm(formula = career_google ~ wps_index + weapons_google, data = all_es_wide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78932 -0.19713  0.04308  0.23934  0.56139 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)     -0.7799     0.6377  -1.223    0.235
## wps_index        0.8884     0.7322   1.213    0.238
## weapons_google   0.3942     0.4978   0.792    0.437
## 
## Residual standard error: 0.3233 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1542, Adjusted R-squared:  0.0736 
## F-statistic: 1.914 on 2 and 21 DF,  p-value: 0.1724
lm(career_hand ~ wps_index + weapons_google  + flowers_google, data = all_es_wide) %>%
  summary()
## 
## Call:
## lm(formula = career_hand ~ wps_index + weapons_google + flowers_google, 
##     data = all_es_wide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37978 -0.17125  0.00525  0.15528  0.57533 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)    -0.77324    0.64114  -1.206    0.248
## wps_index       0.14637    0.75474   0.194    0.849
## weapons_google  0.84521    0.53571   1.578    0.137
## flowers_google -0.02473    0.34778  -0.071    0.944
## 
## Residual standard error: 0.2839 on 14 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.2165, Adjusted R-squared:  0.04865 
## F-statistic:  1.29 on 3 and 14 DF,  p-value: 0.3166
lm(career_google ~ wps_index + weapons_google  + flowers_google, data = all_es_wide) %>%
  summary()
## 
## Call:
## lm(formula = career_google ~ wps_index + weapons_google + flowers_google, 
##     data = all_es_wide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78813 -0.19430  0.04103  0.23423  0.56060 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)    -0.79331    0.67910  -1.168    0.256
## wps_index       0.86302    0.82836   1.042    0.310
## weapons_google  0.39524    0.51026   0.775    0.448
## flowers_google  0.02567    0.35481   0.072    0.943
## 
## Residual standard error: 0.3312 on 20 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1544, Adjusted R-squared:  0.02753 
## F-statistic: 1.217 on 3 and 20 DF,  p-value: 0.3293