#Read in drawing summary data
d <- list.files("../../data/summary_data/") %>%
map(function(x) paste0("../../data/summary_data/", x)) %>%
map(read_feather) %>%
bind_rows()
Item and countries vary along a number of measures. Can we predict this variablity?
=> the only predictors included here are 1st language status (for countries) and concretness (for items). There are infinitely other possible predictors. Which would be theoretically interesting?
english_countries <- c("Australia",
"Canada",
"United Kingdom",
"United States")
d %<>%
mutate(lang = ifelse(country %in% english_countries, "english", "non_english"))
d %>%
gather(measure, value, c(4:8)) %>%
ggplot(aes(y = value, x= lang, fill = lang)) +
facet_wrap(~measure, scales = "free") +
geom_boxplot() +
theme_bw() +
theme(legend.position = "none")
No difference here
cues <- read_csv("../../data/supplementary_data/extreme_cues.csv") %>%
select(cue, conc.bin)
d %<>% left_join(cues, by=c("word" = "cue")) %>%
mutate(conc.bin = as.factor(conc.bin))
d %>%
filter(!is.na(conc.bin)) %>%
gather(measure, value, c(4:8)) %>%
ggplot(aes(y = value, x= conc.bin, fill = conc.bin)) +
facet_wrap(~measure, scales = "free") +
geom_boxplot() +
theme_bw() +
theme(legend.position = "none")
summaries <- d %>%
filter(!is.na(conc.bin)) %>%
gather(measure, value, c(4,6, 8)) %>%
filter(!is.na(value)) %>%
group_by(measure, conc.bin, lang) %>%
multi_boot_standard(column = "value", na.rm = TRUE)
ggplot(summaries, aes(x = lang, y = mean,
fill = conc.bin, color = conc.bin, group = conc.bin)) +
facet_wrap(~measure, scales = "free") +
geom_pointrange(aes(ymax = summary_ci_upper, ymin = summary_ci_lower),
position=position_dodge(.9), size = .4) +
theme_bw()
Abstract words have longer strokes for both English and non-English speakers. Also, difference in accuracy between low and high concreteness words larger for non-english speakers.