In this analysis, I sampled N words each from each -tile of the Brysbaert corpus. Then for each tile, I calculated the pairwise distance for all words for each language (no anchor) or to all words in the most concrete -ile (anchor), and the looked at the correlation of these distances across languages.
The general prediction is that the correlation should increase as concreteness increases.
all_corrs <- read_feather("/Volumes/wilbur_the_great/concreteness_correlations/quintile/pairwise_language_corrs_wiki.feather")
ggplot(all_corrs, aes(x = estimate)) +
geom_histogram() +
facet_grid(.~quintile) +
ggtitle("Distribution of correlations, by quintile") +
theme_bw()
overall_corrs <- all_corrs %>%
group_by(quintile) %>%
multi_boot_standard(col = "estimate")
ggplot(overall_corrs, aes(x = quintile, y = mean)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
scale_x_continuous(breaks=1:5) +
ggtitle("Mean correlation by quintile") +
theme_bw()
all_corrs <- read_feather("/Volumes/wilbur_the_great/concreteness_correlations/quintile_anchored/pairwise_word_anchored_corrs.feather")
ggplot(all_corrs, aes(x = estimate)) +
geom_histogram() +
facet_grid(.~quintile) +
ggtitle("Distribution of correlations, by quintile") +
theme_bw()
overall_corrs <- all_corrs %>%
group_by(quintile) %>%
multi_boot_standard(col = "estimate")
ggplot(overall_corrs, aes(x = quintile, y = mean)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), size = .2) +
scale_x_continuous(breaks=1:5) +
ggtitle("Mean correlation by quintile (anchored on concrete)") +
theme_bw()
all_corrs <- read_feather("/Volumes/wilbur_the_great/concreteness_correlations/pairwise_word_anchored_corrs_20x1000.feather")
ggplot(all_corrs, aes(x = estimate)) +
geom_histogram() +
facet_wrap(~quintile) +
ggtitle("Distribution of correlations, by 20-ile") +
theme_bw()
overall_corrs <- all_corrs %>%
group_by(quintile) %>%
multi_boot_standard(col = "estimate")
ggplot(overall_corrs, aes(x = quintile, y = mean, group = 1)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), size = .2) +
scale_x_continuous(breaks=1:20) +
xlab("20-ile concreteness") +
geom_line() +
ggtitle("Mean correlation by 20-ile (anchored on concrete)") +
theme_bw()