SCORE_METADATA <- here("data/raw/models/all_model/merged_metadata.csv")
meta_data <- read_csv(SCORE_METADATA)
meta_data %>%
summarize(mean_score = mean(score),
sd_score = sd(score)) %>%
kable(digits = 2)
mean_score | sd_score |
---|---|
3.51 | 0.91 |
LANG_NAME_PATH <- here("data/processed/lang_names/ets_to_google_langcodes_complete.csv")
langs_clean <- read_csv(LANG_NAME_PATH)
DISTANCE_INFILE_ALL <- here("analyses/01_distinctiveness/data/sampled_essay_distances_all.csv")
mean_language_distances_all <- read_csv(DISTANCE_INFILE_ALL,
col_names = c("language", "score_group", "distance_type", "distance", "n", "sample_id")) %>%
mutate_if(is.character, as.factor)
dists_wide_all <- mean_language_distances_all %>%
select(-n) %>%
spread(distance_type, distance) %>%
mutate(same_diff_distance = same_language/diff_language) %>%
select(-diff_language, -same_language)
lang_means_all <- dists_wide_all %>%
group_by(language) %>%
tidyboot_mean(column = same_diff_distance, nboot = 1000)
lang_means_all %>%
left_join(langs_clean %>%
mutate(ETS_lang_name = toupper(ETS_lang_name)),
by = c("language" = "ETS_lang_name")) %>%
ggplot(aes(x = lang_name3, y = mean - 1)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower - 1, ymax = ci_upper - 1),
position = position_dodge(width = 1)) +
scale_y_continuous(name = "Semantic Distinctiveness \n(between / within cosine distance)",
breaks = c(0, .2, .4, .6),
labels = c("1.0", "1.2", "1.4", "1.6"),
limits = c(0, .6)) + # rescale so y-axis starts at 1
scale_fill_manual(values = c( "#fb9a99", "#a6cee3"), name = "Essay Score") +
xlab("Language") +
theme_classic(base_size = 20) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.position = c(.1, 0.8),
axis.line = element_line(size = 1.2),
axis.ticks = element_line(size = 1),
legend.background = element_blank())
Descriptives
# take mean across runs
language_means <- dists_wide_all %>%
group_by(language) %>%
summarize(same_diff_distance = mean(same_diff_distance))
# take mean across langauges
language_means %>%
summarize(mean = mean(same_diff_distance),
sd = sd(same_diff_distance)) %>%
mutate_all(round, 2)%>%
kable()
mean | sd |
---|---|
1.26 | 0.09 |
Is each language > 1?
MU <- 1
t.test(language_means %>%
pull(same_diff_distance),
mu = MU) %>%
tidy() %>%
mutate_at(vars(estimate, statistic), round, 2) %>%
kable()
estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
---|---|---|---|---|---|---|---|
1.26 | 17.09 | 0 | 34 | 1.225575 | 1.286447 | One Sample t-test | two.sided |
t.test(language_means %>%
pull(same_diff_distance),
mu = MU) %>%
tidy() %>%
mutate_at(vars(estimate, statistic), round, 2) %>%
kable()
estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
---|---|---|---|---|---|---|---|
1.26 | 17.09 | 0 | 34 | 1.225575 | 1.286447 | One Sample t-test | two.sided |
Null hypothesis: test distribution is normal
ks.test(language_means$same_diff_distance, "pnorm",
mean=mean(language_means$same_diff_distance),
sd=sd(language_means$same_diff_distance))
##
## One-sample Kolmogorov-Smirnov test
##
## data: language_means$same_diff_distance
## D = 0.090512, p-value = 0.9119
## alternative hypothesis: two-sided
null hypothesis: two means the same
wilcox.test(language_means$same_diff_distance, mu = MU)
##
## Wilcoxon signed rank test
##
## data: language_means$same_diff_distance
## V = 630, p-value = 5.821e-11
## alternative hypothesis: true location is not equal to 1
DISTANCE_INFILE <- here("analyses/01_distinctiveness/data/sampled_essay_distances_high_low.csv")
FIG_PATH <- here("analyses/01_distinctiveness/results/distinctiveness_fig.pdf")
Main plot
mean_language_distances <- read_csv(DISTANCE_INFILE,
col_names = c("language", "score_group", "distance_type", "distance", "n", "sample_id"))
dists_wide <- mean_language_distances %>%
select(-n) %>%
group_by(score_group) %>%
spread(distance_type, distance) %>%
mutate(same_diff_distance = same_language/diff_language) %>%
select(-diff_language, -same_language)
lang_means <- dists_wide %>%
group_by(language, score_group) %>%
tidyboot_mean(column = same_diff_distance, nboot = 1000)
main_plot <- lang_means %>%
left_join(langs_clean %>%
mutate(ETS_lang_name = toupper(ETS_lang_name)),
by = c("language" = "ETS_lang_name")) %>%
ggplot(aes(x = lang_name3, y = mean - 1, group = score_group, fill = score_group)) +
geom_bar(stat = "identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower - 1, ymax = ci_upper - 1),
position = position_dodge(width = 1)) +
scale_y_continuous(name = "Semantic Distinctiveness \n(between / within cosine distance)",
breaks = c(0, .2, .4, .6),
labels = c("1.0", "1.2", "1.4", "1.6"),
limits = c(0, .6)) + # rescale so y-axis starts at 1
scale_fill_manual(values = c( "#fb9a99", "#a6cee3"), name = "Essay Score") +
xlab("Language") +
theme_classic(base_size = 20) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.position = c(.1, 0.8),
axis.line = element_line(size = 1.2),
axis.ticks = element_line(size = 1),
legend.background = element_blank())
Histogram inset
inset_plot <- ggplot(meta_data, aes(x = score)) +
geom_histogram(binwidth = 1) +
xlab("Essay Score") +
ylab("N Essays") +
theme_classic(base_size = 10)
plot_with_inset <-
ggdraw() +
draw_plot(main_plot) +
draw_plot(inset_plot, x = 0.83, y = .75, width = .14, height = .2)
plot_with_inset
pdf(FIG_PATH, width = 14)
plot_with_inset
dev.off()
language_means_hl <- dists_wide %>%
group_by(language, score_group) %>%
summarize(same_diff_distance = mean(same_diff_distance)) %>%
mutate(score_group = fct_rev(score_group))
language_means_hl %>%
group_by(score_group) %>%
summarize(mean = mean(same_diff_distance),
sd = sd(same_diff_distance)) %>%
mutate_if(is.numeric, round, 2) %>%
kable()
score_group | mean | sd |
---|---|---|
low | 1.27 | 0.08 |
high | 1.21 | 0.09 |
t.test(same_diff_distance ~ score_group,
data = language_means_hl, paired = T) %>%
tidy() %>%
mutate_at(vars(estimate, statistic), round, 2) %>%
kable()
estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
---|---|---|---|---|---|---|---|
0.06 | 5.73 | 1.9e-06 | 34 | 0.0401324 | 0.0842251 | Paired t-test | two.sided |
Null hypothesis: test distribution is normal
low_data <- language_means_hl %>%
filter(score_group == "low") %>%
pull(same_diff_distance)
high_data <- language_means_hl %>%
filter(score_group == "high") %>%
pull(same_diff_distance)
ks.test(low_data, "pnorm", mean=mean(low_data), sd=sd(low_data))
##
## One-sample Kolmogorov-Smirnov test
##
## data: low_data
## D = 0.13227, p-value = 0.5296
## alternative hypothesis: two-sided
ks.test(high_data, "pnorm", mean=mean(high_data), sd=sd(high_data))
##
## One-sample Kolmogorov-Smirnov test
##
## data: high_data
## D = 0.10694, p-value = 0.7791
## alternative hypothesis: two-sided
null hypothesis: two samples come from same distribution
wilcox.test(low_data, high_data, paired=TRUE)
##
## Wilcoxon signed rank test
##
## data: low_data and high_data
## V = 574, p-value = 3.647e-06
## alternative hypothesis: true location shift is not equal to 0