# 1. Summarise across both Test types & any grouping column
summarise_group_all <- function(df, group_id) {
df %>%
group_by(Test, !!sym(group_id)) %>%
summarise(
total = n(),
correct = sum(`Is Answer Correct`),
incorrect = total - correct,
.groups = "drop"
) %>%
mutate(
pct_corr = correct / total,
pct_inc = incorrect / total
) %>%
pivot_longer(
c(pct_inc, pct_corr),
names_to = "Outcome",
values_to = "Proportion"
) %>%
mutate(
Outcome = factor(
if_else(Outcome == "pct_inc", "Incorrect", "Correct"),
levels = c("Incorrect", "Correct")
),
# lock in whatever order the IDs appear
!!sym(group_id) := factor(!!sym(group_id),
levels = unique(!!sym(group_id)))
)
}
# 2. Build the combined facet plot + annotate n + neat theme
build_combined_group_plot <- function(df, group_id, group_desc, plot_title) {
# 1. Replace NA labels/descriptions with "Other"
df2 <- df %>%
mutate(
!!sym(group_desc) := replace_na(!!sym(group_desc), "Other")
)
# 2. Summarise across both Test types
sum_df <- df2 %>%
group_by(Test, !!sym(group_id)) %>%
summarise(
total = n(),
correct = sum(`Is Answer Correct`),
incorrect = total - correct,
.groups = "drop"
) %>%
mutate(
pct_corr = correct / total,
pct_inc = incorrect / total
) %>%
pivot_longer(
c(pct_inc, pct_corr),
names_to = "Outcome",
values_to = "Proportion"
) %>%
mutate(
Outcome = factor(
if_else(Outcome == "pct_inc", "Incorrect", "Correct"),
levels = c("Incorrect","Correct")
),
!!sym(group_id) := factor(!!sym(group_id),
levels = unique(!!sym(group_id)))
)
# 3. Build the caption
cap <- df2 %>%
distinct(!!sym(group_id), !!sym(group_desc)) %>%
arrange(!!sym(group_id)) %>%
transmute(pair = paste0(!!sym(group_id), ": ", !!sym(group_desc))) %>%
pull(pair) %>%
str_c(collapse = " | ")
# 4. Label data for n
lab_df <- sum_df %>%
distinct(Test, !!sym(group_id), total) %>%
mutate(label = paste0("n = ", comma(total)))
# 5. Plot
ggplot(sum_df,
aes(x = Proportion,
y = !!sym(group_id),
fill = Outcome)) +
geom_col(position = "fill", colour = "grey90") +
geom_text(
data = lab_df,
inherit.aes = FALSE,
aes(x = 1.01,
y = !!sym(group_id),
label = label),
hjust = 0, size = 3
) +
facet_wrap(~Test, scales = "free_y") +
coord_cartesian(xlim = c(0, 1.12), clip = "off") +
scale_x_continuous(
labels = percent_format(accuracy = 1),
breaks = seq(0, 1, .25),
minor_breaks = seq(0, 1, .05),
expand = c(0.01, 0.01)
) +
scale_y_discrete(labels = \(x) str_remove(x, "^.*_")) +
scale_fill_manual(values = c("Incorrect" = "#d95f02",
"Correct" = "#1b9e77")) +
geom_text(
aes(label = scales::percent(Proportion, accuracy = 1)),
position = position_fill(vjust = 0.5),
colour = "white",
size = 3
) +
labs(
title = plot_title,
x = NULL,
y = str_replace_all(group_id, "_", " "),
fill = NULL,
caption = cap
) +
theme_minimal(base_size = 12) +
theme(
axis.ticks = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.direction = "horizontal",
axis.text.y = element_text(
hjust = 1,
margin = margin(r = -6)
),
plot.margin = margin(5.5, 25, 5.5, 5.5),
plot.caption = element_text(hjust = 0)
)
}
# 3. Apply it to each grouping type
plot_error_type <- build_combined_group_plot(results_MNA_df,
"Error_type_label",
"Error_type_desc",
"Performance by error type")
plot_error_position <- build_combined_group_plot(results_MNA_df,
"Error_position_label",
"Error_position_desc",
"Performance by error position")
plot_range <- build_combined_group_plot(results_MNA_df,
"Range_label",
"Range_desc",
"Performance by range")
plot_cross_decade <- build_combined_group_plot(results_MNA_df,
"Cross_decade",
"Cross_decade_desc",
"Performance by cross decade")
plot_error_type