ETS_PATH <- "ets/data/lang_pairwise_tile_correlations_ets_decile_qap.csv"

Pearsons r (base r cor function)

conc_corr_ets <- read_csv(ETS_PATH,
                      col_names = c("tile1", "tile2", "corr_qap", "corr", "lang1", "lang2"))  %>%
  select(-"corr_qap") %>%
  mutate(corpus = "ets") %>%
  mutate(corr = fisherz(corr)) 
## Within deciles
conc_corr_ms_same <- conc_corr_ets %>%
  group_by(tile1, tile2, corpus) %>%
  multi_boot_standard(col = "corr") %>%
  ungroup() %>%
  filter(tile2 == tile1) %>%
  mutate(tile1 = as.factor(tile1),
         lab = case_when(tile2 == 9 & corpus == "ets" ~ "English Second-\nLanguage Corpus",
                         tile2 == 9 & corpus == "wiki" ~ "Multilingual\nWikipedia Corpus",
                         TRUE ~ "")) %>%
    mutate(corpus = fct_recode(corpus, 
                             "English Second-Language Corpus" = "ets",
"Multilingual Wikipedia Corpus" = "wiki"))
 # mutate_at(vars(contains("ci"), mean), fisherz2r)


p1 <- ggplot(conc_corr_ms_same, aes(x = tile2, y = mean,
                         line_type = corpus, group = corpus, color = tile2)) +
  geom_smooth(method = "lm", alpha = .2, color = "black") +
  geom_pointrange(aes(ymin = ci_lower, max = ci_upper, shape = rev(corpus)), size = .7, alpha = .7)  +
  scale_x_continuous(breaks = 1:10) +
  viridis::scale_color_viridis(option = "plasma", begin = 1, end = 0, 
                       #guide = guide_legend(nrow = 0)) +
                       guide =FALSE) +

  ylab("") +
    ylim(.14, .6) +
  ggtitle("Within Concreteness Deciles") + 
  scale_y_continuous(position = "right", limits = c(.14, .6)) +
  xlab("Concreteness Decile") +
  theme_classic(base_size = 18)  +
    guides(shape=guide_legend(title="Corpus")) +
  theme(axis.line = element_line(size = 1.2),
        axis.ticks = element_line(size = 1),
         legend.position=c(0.65,0.2),
        legend.text = element_text(size = 8),
        legend.title = element_text(size = 10),
        legend.background = element_rect(linetype = 1, size = 0.5, 
                                         colour = 1))

conc_corr_ms_diff <- conc_corr_ets %>%
  group_by(tile1, tile2, corpus) %>%
  multi_boot_standard(col = "corr") %>%
  ungroup() %>%
  filter(tile2 <= tile1) %>%
  mutate(ratio = tile2/tile1,
         tile1 = as.factor(tile1), 
         tile2_jittered = jitter(tile2, .6),
         ratio_jittered = jitter(ratio, 3),
         corpus = fct_recode(corpus, "English Second-Language Corpus" = "ets",
                                     "Multilingual Wikipedia Corpus" = "wiki")) 
 # mutate_at(vars(contains("ci"), mean), fisherz2r) 


## Across deciles (ratio version)
p2 <- ggplot() +
  ylab("") +
  geom_pointrange(data = conc_corr_ms_diff, 
                  aes(ymin = ci_lower, max = ci_upper, x = ratio_jittered, 
                      y = mean, shape = rev(corpus)), alpha = .5, size = .7) +
  geom_smooth(data = conc_corr_ms_diff, 
              aes(x = ratio, y = mean, group = corpus), 
            method = "lm", alpha = .3, color = "black") +
  ylab("Cross-linguistic word-pairwise \n correlation (Pearson's r)") +
  scale_x_continuous(breaks = c(0,.25, .5, .75, 1), limits = c(0,1.01)) +
  ggtitle("Across Concreteness Deciles") +
  xlab("Word-Pair Concreteness Ratio") +
  ylim(.14, .6) +
  theme_classic(base_size = 18)  +
  theme(axis.line = element_line(size = 1.2),
        axis.ticks = element_line(size = 1),
        legend.position = "none")
        
#gridExtra::grid.arrange(p2, p1, nrow = 1)
#pdf("../figs/concreteness_plot.pdf", width = 11, height = 5)

ggdraw(xlim = c(0,2)) +
  draw_plot(p2, 0, 0, 1, 1) +
  draw_plot(p1, 1, 0, 1, 1)

#dev.off()
conc_corr_ets %>%
  group_by(tile1, tile2) %>%
  summarize(corr = mean(corr)) %>%
  ggplot(aes(x = tile1, y = tile2, fill = corr, label = round(corr, 2))) +
  scale_fill_continuous(low = "white", high = "red") +
  ylab("concreteness decile 1") +
  xlab("concreteness decile 2") +
  geom_tile() + 
  geom_text()

## Within deciles
conc_corr_ms_same <- conc_corr_ets %>%
  group_by(tile1, tile2, corpus) %>%
  multi_boot_standard(col = "corr") %>%
  mutate(same = case_when(tile1 == tile2 ~ "same", TRUE ~ "diff")) %>%
  mutate_at(vars(contains("ci"), mean), fisherz2r)

ggplot(conc_corr_ms_same, aes(x = tile1, y = mean, color = as.factor(tile2), group = tile2, shape = same)) +
  geom_smooth(method = "lm", alpha = .2) +
  geom_point(size = 3)  

ggplot(conc_corr_ms_same, aes(x = tile1, y = mean, color = as.factor(tile2), group = tile2, shape = same)) +
  geom_point(size = 3)  +
 geom_line()

ggplot(conc_corr_ms_same %>% filter(tile1 <= tile2),
       aes(x = tile1, y = mean, color = as.factor(tile2), group = tile2, shape = same)) +
  geom_point(size = 3)  +
 geom_line()

ggplot(conc_corr_ms_same %>% filter(tile1 <= tile2), 
       aes(x = tile1, y = mean, color = as.factor(tile2), group = tile2, shape = same)) +
  geom_smooth(method = "lm", alpha = .2) +
  geom_point(size = 3)  

QAP r (gcor function from sna)

conc_corr_ets <- read_csv(ETS_PATH,
                      col_names = c("tile1", "tile2", "corr", "corr_r", "lang1", "lang2"))  %>%
  select(-"corr_r") %>%
  mutate(corpus = "ets") 
 # mutate(corr = fisherz(corr)) 
## Within deciles
conc_corr_ms_same <- conc_corr_ets %>%
  group_by(tile1, tile2, corpus) %>%
  multi_boot_standard(col = "corr") %>%
  ungroup() %>%
  filter(tile2 == tile1) %>%
  mutate(tile1 = as.factor(tile1),
         lab = case_when(tile2 == 9 & corpus == "ets" ~ "English Second-\nLanguage Corpus",
                         tile2 == 9 & corpus == "wiki" ~ "Multilingual\nWikipedia Corpus",
                         TRUE ~ "")) %>%
    mutate(corpus = fct_recode(corpus, 
                             "English Second-Language Corpus" = "ets",
"Multilingual Wikipedia Corpus" = "wiki")) 
 # mutate_at(vars(contains("ci"), mean), fisherz2r)


p1 <- ggplot(conc_corr_ms_same, aes(x = tile2, y = mean,
                         line_type = corpus, group = corpus, color = tile2)) +
  geom_smooth(method = "lm", alpha = .2, color = "black") +
  geom_pointrange(aes(ymin = ci_lower, max = ci_upper, shape = rev(corpus)), size = .7, alpha = .7)  +
  scale_x_continuous(breaks = 1:10) +
  viridis::scale_color_viridis(option = "plasma", begin = 1, end = 0, 
                       #guide = guide_legend(nrow = 0)) +
                       guide =FALSE) +
  ylab("") +
  ggtitle("Within Concreteness Deciles") + 
  scale_y_continuous(position = "right", limits = c(.14, .6)) +
  xlab("Concreteness Decile") +
  theme_classic(base_size = 18)  +
    guides(shape=guide_legend(title="Corpus")) +
  theme(axis.line = element_line(size = 1.2),
        axis.ticks = element_line(size = 1),
         legend.position=c(0.65,0.2),
        legend.text = element_text(size = 8),
        legend.title = element_text(size = 10),
        legend.background = element_rect(linetype = 1, size = 0.5, 
                                         colour = 1))

conc_corr_ms_diff <- conc_corr_ets %>%
  group_by(tile1, tile2, corpus) %>%
  multi_boot_standard(col = "corr") %>%
  ungroup() %>%
  filter(tile2 <= tile1) %>%
  mutate(ratio = tile2/tile1,
         tile1 = as.factor(tile1), 
         tile2_jittered = jitter(tile2, .6),
         ratio_jittered = jitter(ratio, 3),
         corpus = fct_recode(corpus, "English Second-Language Corpus" = "ets",
                                     "Multilingual Wikipedia Corpus" = "wiki")) 
  #mutate_at(vars(contains("ci"), mean), fisherz2r) 


## Across deciles (ratio version)
p2 <- ggplot() +
  ylab("") +
  geom_pointrange(data = conc_corr_ms_diff, 
                  aes(ymin = ci_lower, max = ci_upper, x = ratio_jittered, 
                      y = mean, shape = rev(corpus)), alpha = .5, size = .7) +
  geom_smooth(data = conc_corr_ms_diff, 
              aes(x = ratio, y = mean, group = corpus), 
            method = "lm", alpha = .3, color = "black") +
  ylab("Cross-linguistic word-pairwise \n correlation (QAP r)") +
  scale_x_continuous(breaks = c(0,.25, .5, .75, 1), limits = c(0,1.01)) +
  ggtitle("Across Concreteness Deciles") +
  xlab("Word-Pair Concreteness Ratio") +
  ylim(.14, .6) +
  theme_classic(base_size = 18)  +
  theme(axis.line = element_line(size = 1.2),
        axis.ticks = element_line(size = 1),
        legend.position = "none")
        
#gridExtra::grid.arrange(p2, p1, nrow = 1)
#pdf("../figs/concreteness_plot.pdf", width = 11, height = 5)

ggdraw(xlim = c(0,2)) +
  draw_plot(p2, 0, 0, 1, 1) +
  draw_plot(p1, 1, 0, 1, 1)

#dev.off()
conc_corr_ets %>%
  group_by(tile1, tile2) %>%
  summarize(corr = mean(corr)) %>%
  ggplot(aes(x = tile1, y = tile2, fill = corr, label = round(corr, 2))) +
  scale_fill_continuous(low = "white", high = "red") +
  ylab("concreteness decile 1") +
  xlab("concreteness decile 2") +
  geom_tile() + 
  geom_text()