#Read in drawing summary data

d <- list.files("../../data/summary_data/") %>%
  map(function(x) paste0("../../data/summary_data/", x)) %>% 
  map(read_feather) %>% 
  bind_rows() 

Item and countries vary along a number of measures. Can we predict this variablity?

=> the only predictors included here are 1st language status (for countries) and concretness (for items). There are infinitely other possible predictors. Which would be theoretically interesting?

Country variability

first language status

english_countries <- c("Australia", 
                      "Canada",
                      "United Kingdom", 
                      "United States")

d %<>% 
  mutate(lang = ifelse(country %in% english_countries, "english", "non_english"))
d %>%
  gather(measure, value, c(4:8)) %>%
  ggplot(aes(y = value, x= lang, fill = lang)) +
    facet_wrap(~measure, scales = "free") +
    geom_boxplot() +
    theme_bw() +
    theme(legend.position = "none")

No difference here

Item variability

concreteness

cues <- read_csv("../../data/supplementary_data/extreme_cues.csv") %>%
  select(cue, conc.bin)

d %<>% left_join(cues, by=c("word" = "cue")) %>%
  mutate(conc.bin = as.factor(conc.bin))

d %>%
  filter(!is.na(conc.bin)) %>%
  gather(measure, value, c(4:8)) %>%
  ggplot(aes(y = value, x= conc.bin, fill = conc.bin)) +
    facet_wrap(~measure, scales = "free") +
    geom_boxplot() +
    theme_bw() +
    theme(legend.position = "none")

concretness and first language status

summaries <- d %>%
  filter(!is.na(conc.bin)) %>%
  gather(measure, value, c(4,6, 8)) %>% 
  filter(!is.na(value))  %>%
  group_by(measure, conc.bin, lang) %>%
  multi_boot_standard(column = "value", na.rm = TRUE)

ggplot(summaries, aes(x = lang, y = mean,
                      fill = conc.bin, color = conc.bin, group = conc.bin)) +
    facet_wrap(~measure, scales = "free") +
    geom_pointrange(aes(ymax = summary_ci_upper, ymin = summary_ci_lower), 
                   position=position_dodge(.9), size = .4) +
    theme_bw() 

Abstract words have longer strokes for both English and non-English speakers. Also, difference in accuracy between low and high concreteness words larger for non-english speakers.