ETS_PATH <- "ets/data/lang_pairwise_tile_correlations_ets_decile_qap.csv"
Pearsons r (base r cor function)
conc_corr_ets <- read_csv(ETS_PATH,
col_names = c("tile1", "tile2", "corr_qap", "corr", "lang1", "lang2")) %>%
select(-"corr_qap") %>%
mutate(corpus = "ets") %>%
mutate(corr = fisherz(corr))
## Within deciles
conc_corr_ms_same <- conc_corr_ets %>%
group_by(tile1, tile2, corpus) %>%
multi_boot_standard(col = "corr") %>%
ungroup() %>%
filter(tile2 == tile1) %>%
mutate(tile1 = as.factor(tile1),
lab = case_when(tile2 == 9 & corpus == "ets" ~ "English Second-\nLanguage Corpus",
tile2 == 9 & corpus == "wiki" ~ "Multilingual\nWikipedia Corpus",
TRUE ~ "")) %>%
mutate(corpus = fct_recode(corpus,
"English Second-Language Corpus" = "ets",
"Multilingual Wikipedia Corpus" = "wiki"))
# mutate_at(vars(contains("ci"), mean), fisherz2r)
p1 <- ggplot(conc_corr_ms_same, aes(x = tile2, y = mean,
line_type = corpus, group = corpus, color = tile2)) +
geom_smooth(method = "lm", alpha = .2, color = "black") +
geom_pointrange(aes(ymin = ci_lower, max = ci_upper, shape = rev(corpus)), size = .7, alpha = .7) +
scale_x_continuous(breaks = 1:10) +
viridis::scale_color_viridis(option = "plasma", begin = 1, end = 0,
#guide = guide_legend(nrow = 0)) +
guide =FALSE) +
ylab("") +
ylim(.14, .6) +
ggtitle("Within Concreteness Deciles") +
scale_y_continuous(position = "right", limits = c(.14, .6)) +
xlab("Concreteness Decile") +
theme_classic(base_size = 18) +
guides(shape=guide_legend(title="Corpus")) +
theme(axis.line = element_line(size = 1.2),
axis.ticks = element_line(size = 1),
legend.position=c(0.65,0.2),
legend.text = element_text(size = 8),
legend.title = element_text(size = 10),
legend.background = element_rect(linetype = 1, size = 0.5,
colour = 1))
conc_corr_ms_diff <- conc_corr_ets %>%
group_by(tile1, tile2, corpus) %>%
multi_boot_standard(col = "corr") %>%
ungroup() %>%
filter(tile2 <= tile1) %>%
mutate(ratio = tile2/tile1,
tile1 = as.factor(tile1),
tile2_jittered = jitter(tile2, .6),
ratio_jittered = jitter(ratio, 3),
corpus = fct_recode(corpus, "English Second-Language Corpus" = "ets",
"Multilingual Wikipedia Corpus" = "wiki"))
# mutate_at(vars(contains("ci"), mean), fisherz2r)
## Across deciles (ratio version)
p2 <- ggplot() +
ylab("") +
geom_pointrange(data = conc_corr_ms_diff,
aes(ymin = ci_lower, max = ci_upper, x = ratio_jittered,
y = mean, shape = rev(corpus)), alpha = .5, size = .7) +
geom_smooth(data = conc_corr_ms_diff,
aes(x = ratio, y = mean, group = corpus),
method = "lm", alpha = .3, color = "black") +
ylab("Cross-linguistic word-pairwise \n correlation (Pearson's r)") +
scale_x_continuous(breaks = c(0,.25, .5, .75, 1), limits = c(0,1.01)) +
ggtitle("Across Concreteness Deciles") +
xlab("Word-Pair Concreteness Ratio") +
ylim(.14, .6) +
theme_classic(base_size = 18) +
theme(axis.line = element_line(size = 1.2),
axis.ticks = element_line(size = 1),
legend.position = "none")
#gridExtra::grid.arrange(p2, p1, nrow = 1)
#pdf("../figs/concreteness_plot.pdf", width = 11, height = 5)
ggdraw(xlim = c(0,2)) +
draw_plot(p2, 0, 0, 1, 1) +
draw_plot(p1, 1, 0, 1, 1)

#dev.off()
conc_corr_ets %>%
group_by(tile1, tile2) %>%
summarize(corr = mean(corr)) %>%
ggplot(aes(x = tile1, y = tile2, fill = corr, label = round(corr, 2))) +
scale_fill_continuous(low = "white", high = "red") +
ylab("concreteness decile 1") +
xlab("concreteness decile 2") +
geom_tile() +
geom_text()

## Within deciles
conc_corr_ms_same <- conc_corr_ets %>%
group_by(tile1, tile2, corpus) %>%
multi_boot_standard(col = "corr") %>%
mutate(same = case_when(tile1 == tile2 ~ "same", TRUE ~ "diff")) %>%
mutate_at(vars(contains("ci"), mean), fisherz2r)
ggplot(conc_corr_ms_same, aes(x = tile1, y = mean, color = as.factor(tile2), group = tile2, shape = same)) +
geom_smooth(method = "lm", alpha = .2) +
geom_point(size = 3)

ggplot(conc_corr_ms_same, aes(x = tile1, y = mean, color = as.factor(tile2), group = tile2, shape = same)) +
geom_point(size = 3) +
geom_line()

ggplot(conc_corr_ms_same %>% filter(tile1 <= tile2),
aes(x = tile1, y = mean, color = as.factor(tile2), group = tile2, shape = same)) +
geom_point(size = 3) +
geom_line()

ggplot(conc_corr_ms_same %>% filter(tile1 <= tile2),
aes(x = tile1, y = mean, color = as.factor(tile2), group = tile2, shape = same)) +
geom_smooth(method = "lm", alpha = .2) +
geom_point(size = 3)

QAP r (gcor function from sna)
conc_corr_ets <- read_csv(ETS_PATH,
col_names = c("tile1", "tile2", "corr", "corr_r", "lang1", "lang2")) %>%
select(-"corr_r") %>%
mutate(corpus = "ets")
# mutate(corr = fisherz(corr))
## Within deciles
conc_corr_ms_same <- conc_corr_ets %>%
group_by(tile1, tile2, corpus) %>%
multi_boot_standard(col = "corr") %>%
ungroup() %>%
filter(tile2 == tile1) %>%
mutate(tile1 = as.factor(tile1),
lab = case_when(tile2 == 9 & corpus == "ets" ~ "English Second-\nLanguage Corpus",
tile2 == 9 & corpus == "wiki" ~ "Multilingual\nWikipedia Corpus",
TRUE ~ "")) %>%
mutate(corpus = fct_recode(corpus,
"English Second-Language Corpus" = "ets",
"Multilingual Wikipedia Corpus" = "wiki"))
# mutate_at(vars(contains("ci"), mean), fisherz2r)
p1 <- ggplot(conc_corr_ms_same, aes(x = tile2, y = mean,
line_type = corpus, group = corpus, color = tile2)) +
geom_smooth(method = "lm", alpha = .2, color = "black") +
geom_pointrange(aes(ymin = ci_lower, max = ci_upper, shape = rev(corpus)), size = .7, alpha = .7) +
scale_x_continuous(breaks = 1:10) +
viridis::scale_color_viridis(option = "plasma", begin = 1, end = 0,
#guide = guide_legend(nrow = 0)) +
guide =FALSE) +
ylab("") +
ggtitle("Within Concreteness Deciles") +
scale_y_continuous(position = "right", limits = c(.14, .6)) +
xlab("Concreteness Decile") +
theme_classic(base_size = 18) +
guides(shape=guide_legend(title="Corpus")) +
theme(axis.line = element_line(size = 1.2),
axis.ticks = element_line(size = 1),
legend.position=c(0.65,0.2),
legend.text = element_text(size = 8),
legend.title = element_text(size = 10),
legend.background = element_rect(linetype = 1, size = 0.5,
colour = 1))
conc_corr_ms_diff <- conc_corr_ets %>%
group_by(tile1, tile2, corpus) %>%
multi_boot_standard(col = "corr") %>%
ungroup() %>%
filter(tile2 <= tile1) %>%
mutate(ratio = tile2/tile1,
tile1 = as.factor(tile1),
tile2_jittered = jitter(tile2, .6),
ratio_jittered = jitter(ratio, 3),
corpus = fct_recode(corpus, "English Second-Language Corpus" = "ets",
"Multilingual Wikipedia Corpus" = "wiki"))
#mutate_at(vars(contains("ci"), mean), fisherz2r)
## Across deciles (ratio version)
p2 <- ggplot() +
ylab("") +
geom_pointrange(data = conc_corr_ms_diff,
aes(ymin = ci_lower, max = ci_upper, x = ratio_jittered,
y = mean, shape = rev(corpus)), alpha = .5, size = .7) +
geom_smooth(data = conc_corr_ms_diff,
aes(x = ratio, y = mean, group = corpus),
method = "lm", alpha = .3, color = "black") +
ylab("Cross-linguistic word-pairwise \n correlation (QAP r)") +
scale_x_continuous(breaks = c(0,.25, .5, .75, 1), limits = c(0,1.01)) +
ggtitle("Across Concreteness Deciles") +
xlab("Word-Pair Concreteness Ratio") +
ylim(.14, .6) +
theme_classic(base_size = 18) +
theme(axis.line = element_line(size = 1.2),
axis.ticks = element_line(size = 1),
legend.position = "none")
#gridExtra::grid.arrange(p2, p1, nrow = 1)
#pdf("../figs/concreteness_plot.pdf", width = 11, height = 5)
ggdraw(xlim = c(0,2)) +
draw_plot(p2, 0, 0, 1, 1) +
draw_plot(p1, 1, 0, 1, 1)

#dev.off()
conc_corr_ets %>%
group_by(tile1, tile2) %>%
summarize(corr = mean(corr)) %>%
ggplot(aes(x = tile1, y = tile2, fill = corr, label = round(corr, 2))) +
scale_fill_continuous(low = "white", high = "red") +
ylab("concreteness decile 1") +
xlab("concreteness decile 2") +
geom_tile() +
geom_text()
