Each language has equal number of words (200), but words differ across languages.
cos_dist <- read_csv("data/lang_pairwise_decile_distance_10x200.csv")
conc_corr_ms <- cos_dist %>%
group_by(concreteness_tile1, concreteness_tile2) %>%
multi_boot_standard(col = "mean_cos_dist") %>%
ungroup() %>%
mutate(concreteness_tile1 = as.factor(concreteness_tile1))
ggplot(conc_corr_ms, aes(x = concreteness_tile1, y = mean, group = 1)) +
geom_line(aes(group = concreteness_tile2,
color = as.factor(concreteness_tile2))) +
theme_classic()
conc_corr_ms <- cos_dist %>%
group_by(concreteness_tile1, concreteness_tile2) %>%
multi_boot_standard(col = "mean_cos_dist") %>%
ungroup() %>%
mutate(concreteness_tile2 = as.factor(concreteness_tile2))
ggplot(conc_corr_ms, aes(x = concreteness_tile2, y = mean, group = 1)) +
geom_line(aes(group = concreteness_tile1,
color = as.factor(concreteness_tile1))) +
theme_classic()
Each langauges has sample of same words (1000), but different number of words. I got these 10,000 words by looking at the words that were most common across models.
conc_corr <- read_csv("/Users/mollylewis/Documents/research/Projects/1_in_progress/L2ETS/studies/study2/analyses/5_concreteness/analysis_v4/data/lang_pairwise_tile_correlations.csv",
col_names = c("tile1", "tile2", "corr", "lang1", "lang2"))
conc_corr_ms <- conc_corr %>%
group_by(tile1, tile2) %>%
multi_boot_standard(col = "corr") %>%
ungroup() %>%
mutate(tile1 = as.factor(tile1))
conc_corr_ms <- conc_corr %>%
group_by(tile1, tile2) %>%
multi_boot_standard(col = "corr") %>%
ungroup() %>%
filter(tile2 <= tile1) %>%
mutate(tile1 = as.factor(tile1)) %>%
mutate(group_type = ifelse(tile1 == tile2, "same quintile", "different quintile"),
tile1 = fct_relevel(tile1, "10", after = Inf)) %>%
filter(tile1 != tile2)
ggplot(conc_corr_ms, aes(x = tile1, y = mean, group = 1)) +
geom_line(aes(group = tile2,
color = as.factor(tile2))) +
theme_classic()
conc_corr_ms <- conc_corr %>%
group_by(tile1, tile2) %>%
multi_boot_standard(col = "corr") %>%
ungroup() %>%
mutate(tile2 = as.factor(tile2))
conc_corr_ms <- conc_corr %>%
group_by(tile1, tile2) %>%
multi_boot_standard(col = "corr") %>%
ungroup() %>%
filter(tile2 <= tile1) %>%
mutate(tile2 = as.factor(tile2)) %>%
mutate(group_type = ifelse(tile1 == tile2, "same quintile", "different quintile"),
tile2 = fct_relevel(tile2, "10", after = Inf)) %>%
filter(tile1 != tile2)
ggplot(conc_corr_ms, aes(x = tile2, y = mean, group = 1)) +
geom_line(aes(group = tile1,
color = as.factor(tile1))) +
theme_classic()
N words per langauge/decile actually in analysis in V4: