df <- read.csv("ican-icar-2025-v1.csv")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ readr 2.2.0
## ✔ ggplot2 4.0.2 ✔ stringr 1.6.0
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ✔ purrr 1.2.1 ✔ tidyr 1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(ggplotify)
library(ggrepel)
library(tidyverse)
# Graph of Mathematics Minimum Proficiency Levels (Age 10)
df |>
filter(
CountryName %in% c("Kenya", "Mali", "Mozambique", "Senegal", "Tanzania", "Uganda"),
ch02 == 10
) |>
drop_na(MPLMath, HHWeightProvided) |>
group_by(CountryName) |>
summarise(
pct = sum(MPLMath * HHWeightProvided) / sum(HHWeightProvided)
) |>
mutate(CountryName = fct_reorder(CountryName, pct)) |>
ggplot(aes(x = pct, y = CountryName)) +
geom_col() +
scale_x_continuous(labels = scales::percent) +
theme_minimal() +
theme(
axis.title = element_blank(),
panel.grid = element_blank()
)

african_countries <- c("Kenya", "Mali", "Mozambique", "Senegal", "Tanzania", "Uganda")
plot_data <- df |>
filter(
CountryName %in% african_countries,
ch02 == 10
) |>
drop_na(MPLMath, MPLReading, HHWeightProvided) |>
group_by(CountryName) |>
summarise(
math = sum(MPLMath * HHWeightProvided) / sum(HHWeightProvided),
reading = sum(MPLReading * HHWeightProvided) / sum(HHWeightProvided)
) |>
pivot_longer(
cols = c(math, reading),
names_to = "subject",
values_to = "pct"
) |>
mutate(
subject = recode(subject,
math = "Mathematics",
reading = "Reading")
)
order_df <- plot_data |>
filter(subject == "Mathematics") |>
arrange(pct)
plot_data |>
mutate(CountryName = factor(CountryName, levels = order_df$CountryName)) |>
ggplot(aes(x = pct, y = CountryName, fill = subject)) +
geom_col(position = position_dodge(width = 0.7), width = 0.6) +
geom_text(
aes(label = paste0(round(pct * 100), "%")),
position = position_dodge(width = 0.7),
hjust = -0.2,
fontface = "bold",
size = 3.5
) +
scale_fill_manual(
values = c("Mathematics" = "#2171B5", "Reading" = "#238B45"),
name = NULL
) +
scale_x_continuous(
labels = scales::percent,
expand = expansion(mult = c(0, 0.15))
) +
labs(
title = "Numeracy and reading proficiency across African countries",
subtitle = "Weighted % of 10-year-old children meeting minimum proficiency"
) +
theme_minimal() +
theme(
axis.title = element_blank(),
panel.grid = element_blank(),
plot.title = element_text(face = "bold"),
legend.position = "top"
)

math_lang_summary <- df %>%
filter(
CountryName %in% c("Kenya", "Mali", "Mozambique", "Senegal", "Tanzania", "Uganda"),
ch02 == 10,
!is.na(HHWeightProvided)
) %>%
group_by(CountryName) %>%
summarise(
Mathematics = weighted.mean(MPLMath, w = HHWeightProvided, na.rm = TRUE),
Reading = weighted.mean(MPLReading, w = HHWeightProvided, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_longer(
cols = c(Mathematics, Reading),
names_to = "subject",
values_to = "proficiency_rate"
)
country_order <- math_lang_summary %>%
filter(subject == "Mathematics") %>%
arrange(proficiency_rate) %>%
pull(CountryName)
math_lang_summary <- math_lang_summary %>%
mutate(CountryName = factor(CountryName, levels = country_order))
ggplot(math_lang_summary,
aes(x = proficiency_rate, y = CountryName, fill = subject)) +
geom_col(
data = ~ dplyr::filter(.x, CountryName != "Kenya"),
position = position_dodge(width = 0.7),
width = 0.6,
alpha = 0.35
) +
geom_col(
data = ~ dplyr::filter(.x, CountryName == "Kenya"),
position = position_dodge(width = 0.7),
width = 0.6,
alpha = 1
) +
geom_text(
aes(label = paste0(round(proficiency_rate * 100), "%")),
position = position_dodge(width = 0.7),
hjust = -0.2,
size = 3.5,
fontface = "bold"
) +
scale_fill_manual(
values = c("Mathematics" = "#2171B5", "Reading" = "#238B45"),
name = NULL
) +
scale_x_continuous(
labels = percent_format(accuracy = 1),
limits = c(0, 1),
expand = expansion(mult = c(0, 0.15))
) +
labs(
title = "Numeracy and reading proficiency among 10-year-olds",
subtitle = "Weighted proportion meeting MPL",
caption = "Source: ICAN-ICAR 2025"
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "top",
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)

african_countries <- c("Kenya", "Mali", "Mozambique", "Senegal", "Tanzania", "Uganda")
plot_data <- df |>
filter(
CountryName %in% african_countries,
ch02 == 10
) |>
drop_na(MPLMath, MPLReading, HHWeightProvided) |>
group_by(CountryName) |>
summarise(
math = sum(MPLMath * HHWeightProvided) / sum(HHWeightProvided),
reading = sum(MPLReading * HHWeightProvided) / sum(HHWeightProvided)
) |>
pivot_longer(
cols = c(math, reading),
names_to = "subject",
values_to = "pct"
) |>
mutate(
subject = recode(subject,
math = "Mathematics",
reading = "Reading")
)
# Order countries by Mathematics (lowest to highest)
order_df <- plot_data |>
filter(subject == "Mathematics") |>
arrange(pct)
plot_data |>
mutate(CountryName = factor(CountryName, levels = order_df$CountryName)) |>
ggplot(aes(x = pct, y = CountryName, fill = subject)) +
geom_col(position = position_dodge(width = 0.7), width = 0.6) +
geom_text(
aes(label = paste0(round(pct * 100), "%")),
position = position_dodge(width = 0.7),
hjust = -0.2,
fontface = "bold",
size = 3.5
) +
scale_fill_manual(
values = c("Mathematics" = "#2171B5", "Reading" = "#238B45"),
name = NULL
) +
scale_x_continuous(
labels = scales::percent,
expand = expansion(mult = c(0, 0.15))
) +
labs(
title = "Numeracy and reading proficiency across African countries",
subtitle = "Weighted % of 10-year-old children meeting minimum proficiency"
) +
theme_minimal() +
theme(
axis.title = element_blank(),
panel.grid = element_blank(),
plot.title = element_text(face = "bold"),
legend.position = "top"
)

math_lang_summary <- df %>%
filter(
CountryName %in% c("Kenya", "Mali", "Mozambique", "Senegal", "Tanzania", "Uganda"),
ch02 == 10,
!is.na(HHWeightProvided)
) %>%
group_by(CountryName) %>%
summarise(
Mathematics = weighted.mean(MPLMath, w = HHWeightProvided, na.rm = TRUE),
Reading = weighted.mean(MPLReading, w = HHWeightProvided, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_longer(
cols = c(Mathematics, Reading),
names_to = "subject",
values_to = "proficiency_rate"
)
# Order countries by Mathematics
country_order <- math_lang_summary %>%
filter(subject == "Mathematics") %>%
arrange(proficiency_rate) %>%
pull(CountryName)
math_lang_summary <- math_lang_summary %>%
mutate(CountryName = factor(CountryName, levels = country_order))
ggplot(math_lang_summary,
aes(x = proficiency_rate, y = CountryName, fill = subject)) +
# Faded bars (non-Kenya)
geom_col(
data = ~ dplyr::filter(.x, CountryName != "Kenya"),
position = position_dodge(width = 0.7),
width = 0.6,
alpha = 0.35
) +
# Highlighted Kenya bars
geom_col(
data = ~ dplyr::filter(.x, CountryName == "Kenya"),
position = position_dodge(width = 0.7),
width = 0.6,
alpha = 1
) +
geom_text(
aes(label = paste0(round(proficiency_rate * 100), "%")),
position = position_dodge(width = 0.7),
hjust = -0.2,
size = 3.5,
fontface = "bold"
) +
scale_fill_manual(
values = c("Mathematics" = "#2171B5", "Reading" = "#238B45"),
name = NULL
) +
scale_x_continuous(
labels = percent_format(accuracy = 1),
limits = c(0, 1),
expand = expansion(mult = c(0, 0.15))
) +
labs(
title = "Numeracy and reading proficiency among 10-year-olds",
subtitle = "Weighted proportion of children meeting Minimum Proficiency Level, age 10",
x = "Proportion meeting MPL",
y = NULL,
caption = "Source: ICAN-ICAR 2025. African countries only. Estimates are weighted proportions."
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "top",
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)

plot_data <- df %>%
filter(
CountryName %in% c("Kenya", "Tanzania", "Uganda"),
ch06a >= 1 & ch06a <= 9,
!is.na(MPLMath), !is.na(MPLReading), !is.na(HHWeightProvided)
) %>%
group_by(CountryName, ch06a) %>%
summarise(
Mathematics = weighted.mean(MPLMath, w = HHWeightProvided),
Reading = weighted.mean(MPLReading, w = HHWeightProvided),
.groups = "drop"
) %>%
pivot_longer(
cols = c(Mathematics, Reading),
names_to = "subject",
values_to = "proportion"
)
ggplot(plot_data,
aes(x = ch06a, y = proportion,
colour = CountryName,
linetype = subject)) +
geom_line(linewidth = 0.8) +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
scale_x_continuous(breaks = 1:9) +
theme_minimal() +
theme(
axis.title = element_blank()
)

# Data prep
plot_data <- df %>%
filter(
CountryName %in% c("Kenya", "Tanzania", "Uganda"),
ch06a >= 1 & ch06a <= 9,
!is.na(MPLMath), !is.na(MPLReading), !is.na(HHWeightProvided)
) %>%
group_by(CountryName, ch06a) %>%
summarise(
Mathematics = weighted.mean(MPLMath, w = HHWeightProvided),
Reading = weighted.mean(MPLReading, w = HHWeightProvided),
.groups = "drop"
) %>%
pivot_longer(
cols = c(Mathematics, Reading),
names_to = "subject",
values_to = "proportion"
) %>%
mutate(
line_colour = if_else(subject == "Reading", "#C0392B", "#BDC3C7"),
line_type = case_when(
CountryName == "Uganda" ~ "solid",
CountryName == "Kenya" ~ "dotted",
CountryName == "Tanzania" ~ "dashed"
),
line_width = if_else(CountryName == "Uganda" & subject == "Reading", 1.4, 0.7),
line_alpha = if_else(CountryName == "Uganda" & subject == "Reading", 1, 0.55),
line_label = paste(CountryName, subject)
)
# End-of-line labels
end_labels <- plot_data %>% filter(ch06a == 9)
# Grade 5 gap
grade5_reading <- plot_data %>%
filter(
ch06a == 5,
subject == "Reading",
CountryName %in% c("Uganda", "Kenya")
) %>%
select(CountryName, proportion) %>%
pivot_wider(names_from = CountryName, values_from = proportion)
gap_pct <- round(abs(grade5_reading$Kenya - grade5_reading$Uganda) * 100)
uganda_g5 <- grade5_reading$Uganda
kenya_g5 <- grade5_reading$Kenya
uganda_reading <- ggplot(
plot_data,
aes(x = ch06a, y = proportion,
colour = line_colour,
linetype = line_type,
linewidth = line_width,
alpha = line_alpha,
group = line_label)
) +
geom_line() +
# Direct labels (always visible, not faded)
geom_text_repel(
data = end_labels,
inherit.aes = FALSE,
aes(x = ch06a, y = proportion, label = line_label),
colour = "black",
hjust = 0,
direction = "y",
nudge_x = 0.6,
xlim = c(9.5, 13),
size = 3,
fontface = "bold",
segment.size = 0.3,
force = 2
) +
# Bracket
annotate("segment",
x = 5, xend = 5,
y = uganda_g5, yend = kenya_g5,
colour = "#C0392B", linewidth = 0.5) +
annotate("segment",
x = 4.85, xend = 5.15,
y = uganda_g5, yend = uganda_g5,
colour = "#C0392B", linewidth = 0.5) +
annotate("segment",
x = 4.85, xend = 5.15,
y = kenya_g5, yend = kenya_g5,
colour = "#C0392B", linewidth = 0.5) +
annotate("text",
x = 5.3,
y = (uganda_g5 + kenya_g5) / 2,
label = paste0(
gap_pct,
" percentage point gap between Uganda and Kenya Reading at Grade 5"
),
colour = "#C0392B",
fontface = "italic",
size = 2.8,
hjust = 0) +
scale_colour_identity() +
scale_linetype_identity() +
scale_linewidth_identity() +
scale_alpha_identity() +
scale_x_continuous(
breaks = 1:9,
limits = c(1, 13),
expand = expansion(mult = c(0.02, 0))
) +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
labs(
title = "Uganda’s reading gap: a pattern the data raises but cannot explain",
subtitle = "Maths proficiency is broadly similar across Kenya, Tanzania and Uganda. Yet by Grade 9, Uganda’s reading rate is much lower than that of its neighbours — raising questions about what drives this gap.",
x = "Grade",
y = "Proportion meeting Minimum Proficiency Level",
caption = "Source: ICAN-ICAR 2025. Estimates are weighted proportions."
) +
coord_cartesian(clip = "off") +
theme_minimal(base_size = 11) +
theme(
legend.position = "none",
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(size = 9, colour = "grey40"),
plot.caption = element_text(size = 8, colour = "grey60"),
panel.grid.minor = element_blank(),
plot.margin = margin(10, 80, 10, 10) # space for labels
)
uganda_reading

# --- Overall Kenya average ---
kenya_overall_rate <- df %>%
filter(CountryName == "Kenya",
!is.na(MPLMath), !is.na(HHWeightProvided)) %>%
summarise(rate = weighted.mean(MPLMath, w = HHWeightProvided)) %>%
pull(rate)
# --- Gender ---
kenya_gender <- df %>%
filter(CountryName == "Kenya",
!is.na(MPLMath), !is.na(HHWeightProvided), !is.na(ch03)) %>%
mutate(label = if_else(ch03 == 1, "Female", "Male")) %>%
group_by(label) %>%
summarise(
prof_rate = weighted.mean(MPLMath, w = HHWeightProvided),
headcount = sum(HHWeightProvided),
.groups = "drop"
) %>%
mutate(category = "Gender")
# --- Location ---
kenya_location <- df %>%
filter(CountryName == "Kenya",
!is.na(MPLMath), !is.na(HHWeightProvided),
Location %in% c("Rural", "Urban")) %>%
group_by(label = Location) %>%
summarise(
prof_rate = weighted.mean(MPLMath, w = HHWeightProvided),
headcount = sum(HHWeightProvided),
.groups = "drop"
) %>%
mutate(category = "Location")
# --- Enrolment ---
kenya_enrol <- df %>%
filter(CountryName == "Kenya",
!is.na(MPLMath), !is.na(HHWeightProvided),
!is.na(EnrolmentStatus)) %>%
group_by(label = EnrolmentStatus) %>%
summarise(
prof_rate = weighted.mean(MPLMath, w = HHWeightProvided),
headcount = sum(HHWeightProvided),
.groups = "drop"
) %>%
mutate(category = "Enrolment")
# --- Grade ---
kenya_grade <- df %>%
filter(CountryName == "Kenya",
!is.na(MPLMath), !is.na(HHWeightProvided),
!is.na(ch06a), ch06a >= 0, ch06a <= 10) %>%
mutate(label = if_else(ch06a == 0, "Pre-Primary", paste0("Grade ", ch06a))) %>%
group_by(label, ch06a) %>%
summarise(
prof_rate = weighted.mean(MPLMath, w = HHWeightProvided),
headcount = sum(HHWeightProvided),
.groups = "drop"
) %>%
arrange(ch06a) %>%
mutate(category = "Grade") %>%
select(-ch06a)
# --- Age group ---
kenya_age <- df %>%
filter(CountryName == "Kenya",
!is.na(MPLMath), !is.na(HHWeightProvided),
!is.na(ch02)) %>%
mutate(label = case_when(
ch02 >= 5 & ch02 <= 7 ~ "5-7 yrs",
ch02 >= 8 & ch02 <= 10 ~ "8-10 yrs",
ch02 >= 11 & ch02 <= 13 ~ "11-13 yrs",
ch02 >= 14 & ch02 <= 16 ~ "14-16 yrs"
)) %>%
filter(!is.na(label)) %>%
group_by(label) %>%
summarise(
prof_rate = weighted.mean(MPLMath, w = HHWeightProvided),
headcount = sum(HHWeightProvided),
.groups = "drop"
) %>%
mutate(
label = factor(label, levels = c("5-7 yrs", "8-10 yrs", "11-13 yrs", "14-16 yrs"))
) %>%
arrange(label) %>%
mutate(
label = as.character(label),
category = "Age Group"
)
# --- Combine ---
kenya_plot_data <- bind_rows(
kenya_gender, kenya_location, kenya_enrol,
kenya_grade, kenya_age
)
kenya_plot_data$label <- factor(
kenya_plot_data$label,
levels = unique(kenya_plot_data$label)
)
# --- Domain structure ---
kenya_domain_breaks <- kenya_plot_data %>%
mutate(x_pos = as.numeric(label)) %>%
group_by(category) %>%
summarise(x_end = max(x_pos), .groups = "drop") %>%
filter(x_end < max(as.numeric(kenya_plot_data$label)))
kenya_domain_labels <- kenya_plot_data %>%
mutate(x_pos = as.numeric(label)) %>%
group_by(category) %>%
summarise(x_mid = mean(x_pos), .groups = "drop")
# --- Plot ---
kenya_bubble <- ggplot(
kenya_plot_data,
aes(x = as.numeric(label), y = prof_rate)
) +
geom_vline(
xintercept = kenya_domain_breaks$x_end + 0.5,
colour = "grey80",
linewidth = 0.4
) +
geom_hline(
yintercept = kenya_overall_rate,
linetype = "dashed",
colour = "grey40",
linewidth = 0.6
) +
annotate(
"text",
x = 1,
y = kenya_overall_rate + 0.03,
label = paste0("Kenya average: ", round(kenya_overall_rate * 100), "%"),
hjust = 0,
size = 3.2,
colour = "grey30",
fontface = "italic"
) +
annotate(
"text",
x = kenya_domain_labels$x_mid,
y = 1.08,
label = kenya_domain_labels$category,
fontface = "bold",
colour = "grey40",
size = 3
) +
geom_point(
aes(size = headcount, fill = category),
shape = 21,
colour = "white",
stroke = 0.8,
alpha = 0.85
) +
geom_text(
aes(label = paste0(round(prof_rate * 100), "%")),
colour = "white",
fontface = "bold",
size = 2.8
) +
scale_fill_manual(
values = c(
"Gender" = "#2171B5",
"Location" = "#E6AB02",
"Enrolment" = "#7570B3",
"Grade" = "#1B9E77",
"Age Group" = "#D95F02"
),
name = "Domain",
guide = guide_legend(order = 1)
) +
scale_size_continuous(
range = c(4, 20),
labels = comma,
name = "Number of children (weighted)",
guide = guide_legend(
override.aes = list(fill = "grey70", colour = "white"),
order = 2
)
) +
scale_x_continuous(
breaks = seq_along(levels(kenya_plot_data$label)),
labels = levels(kenya_plot_data$label),
expand = expansion(add = 0.6)
) +
scale_y_continuous(
labels = percent_format(accuracy = 1),
limits = c(0, 1.12),
breaks = seq(0, 1, 0.2)
) +
coord_cartesian(clip = "off") +
labs(
title = "Numeracy proficiency by child characteristics — Kenya",
subtitle = "Bubble size = weighted total number of children. Dashed line = Kenya average.",
x = NULL,
y = NULL,
caption = "Source: ICAN-ICAR 2025, Kenya. Estimates are weighted proportions."
) +
theme_minimal(base_size = 11) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(colour = "grey90", linewidth = 0.3),
legend.position = "bottom",
legend.box = "horizontal"
)
kenya_bubble

# --- Overall Mali rate ---
mali_overall_rate <- df %>%
filter(CountryName == "Mali",
!is.na(MPLMath), !is.na(HHWeightProvided)) %>%
summarise(rate = weighted.mean(MPLMath, w = HHWeightProvided)) %>%
pull(rate)
# --- Build all groups (same as yours) ---
mali_gender <- df %>%
filter(CountryName == "Mali",
!is.na(MPLMath), !is.na(HHWeightProvided), !is.na(ch03)) %>%
mutate(label = if_else(ch03 == 1, "Female", "Male")) %>%
group_by(label) %>%
summarise(
prof_rate = weighted.mean(MPLMath, w = HHWeightProvided),
headcount = sum(HHWeightProvided),
.groups = "drop"
) %>%
mutate(category = "Gender")
mali_location <- df %>%
filter(CountryName == "Mali",
!is.na(MPLMath), !is.na(HHWeightProvided),
Location %in% c("Rural", "Urban")) %>%
group_by(label = Location) %>%
summarise(
prof_rate = weighted.mean(MPLMath, w = HHWeightProvided),
headcount = sum(HHWeightProvided),
.groups = "drop"
) %>%
mutate(category = "Location")
mali_enrol <- df %>%
filter(CountryName == "Mali",
!is.na(MPLMath), !is.na(HHWeightProvided),
!is.na(EnrolmentStatus)) %>%
group_by(label = EnrolmentStatus) %>%
summarise(
prof_rate = weighted.mean(MPLMath, w = HHWeightProvided),
headcount = sum(HHWeightProvided),
.groups = "drop"
) %>%
mutate(category = "Enrolment")
mali_grade <- df %>%
filter(CountryName == "Mali",
!is.na(MPLMath), !is.na(HHWeightProvided),
!is.na(ch06a), ch06a >= 0, ch06a <= 10) %>%
mutate(label = if_else(ch06a == 0, "Pre-Primary", paste0("Grade ", ch06a))) %>%
group_by(label, ch06a) %>%
summarise(
prof_rate = weighted.mean(MPLMath, w = HHWeightProvided),
headcount = sum(HHWeightProvided),
.groups = "drop"
) %>%
arrange(ch06a) %>%
mutate(category = "Grade") %>%
select(-ch06a)
mali_age <- df %>%
filter(CountryName == "Mali",
!is.na(MPLMath), !is.na(HHWeightProvided),
!is.na(ch02)) %>%
mutate(label = case_when(
ch02 >= 5 & ch02 <= 7 ~ "5-7 yrs",
ch02 >= 8 & ch02 <= 10 ~ "8-10 yrs",
ch02 >= 11 & ch02 <= 13 ~ "11-13 yrs",
ch02 >= 14 & ch02 <= 16 ~ "14-16 yrs"
)) %>%
filter(!is.na(label)) %>%
group_by(label) %>%
summarise(
prof_rate = weighted.mean(MPLMath, w = HHWeightProvided),
headcount = sum(HHWeightProvided),
.groups = "drop"
) %>%
mutate(
label = factor(label, levels = c("5-7 yrs", "8-10 yrs", "11-13 yrs", "14-16 yrs"))
) %>%
arrange(label) %>%
mutate(label = as.character(label),
category = "Age Group")
# --- Combine (FIXED ordering) ---
mali_plot_data <- bind_rows(
mali_gender, mali_location, mali_enrol,
mali_grade, mali_age
)
mali_plot_data$label <- factor(
mali_plot_data$label,
levels = unique(mali_plot_data$label)
)
# --- Domain structure ---
mali_domain_breaks <- mali_plot_data %>%
mutate(x_pos = as.numeric(label)) %>%
group_by(category) %>%
summarise(x_end = max(x_pos), .groups = "drop") %>%
filter(x_end < max(as.numeric(mali_plot_data$label)))
mali_domain_labels <- mali_plot_data %>%
mutate(x_pos = as.numeric(label)) %>%
group_by(category) %>%
summarise(x_mid = mean(x_pos), .groups = "drop")
# --- Plot ---
mali_bubble <- ggplot(
mali_plot_data,
aes(x = as.numeric(label), y = prof_rate)
) +
geom_vline(
xintercept = mali_domain_breaks$x_end + 0.5,
colour = "grey80",
linewidth = 0.4
) +
geom_hline(
yintercept = mali_overall_rate,
linetype = "dashed",
colour = "grey40",
linewidth = 0.6
) +
annotate(
"text",
x = 1,
y = mali_overall_rate + 0.03,
label = paste0("Mali average: ", round(mali_overall_rate * 100), "%"),
hjust = 0,
size = 3.2,
colour = "grey30",
fontface = "italic"
) +
annotate(
"text",
x = mali_domain_labels$x_mid,
y = 1.08,
label = mali_domain_labels$category,
fontface = "bold",
colour = "grey40",
size = 3
) +
geom_point(
aes(size = headcount, fill = category),
shape = 21,
colour = "white",
stroke = 0.8,
alpha = 0.85
) +
geom_text(
aes(label = paste0(round(prof_rate * 100), "%")),
colour = "white",
fontface = "bold",
size = 2.8
) +
scale_fill_manual(
values = c(
"Gender" = "#2171B5",
"Location" = "#E6AB02",
"Enrolment" = "#7570B3",
"Grade" = "#1B9E77",
"Age Group" = "#D95F02"
),
name = "Domain",
guide = guide_legend(order = 1)
) +
scale_size_continuous(
range = c(4, 20),
labels = comma,
name = "Number of children (weighted)",
guide = guide_legend(
override.aes = list(fill = "grey70", colour = "white"),
order = 2
)
) +
scale_x_continuous(
breaks = seq_along(levels(mali_plot_data$label)),
labels = levels(mali_plot_data$label),
expand = expansion(add = 0.6)
) +
scale_y_continuous(
labels = percent_format(accuracy = 1),
limits = c(0, 1.12),
breaks = seq(0, 1, 0.2)
) +
coord_cartesian(clip = "off") +
labs(
title = "Numeracy proficiency by child characteristics — Mali",
subtitle = "Bubble size = weighted total number of children. Dashed line = Mali average.",
x = NULL,
y = NULL,
caption = "Source: ICAN-ICAR 2025, Mali. Estimates are weighted proportions."
) +
theme_minimal(base_size = 11) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(colour = "grey90", linewidth = 0.3),
legend.position = "bottom",
legend.box = "horizontal"
)
mali_bubble
