library(tidyverse)
dataset <- read.csv("dataset.csv")
This line chart shows how the average statistical capacity score changed globally from 2016 to 2023. The score rose from 58.6 to 69.7, an 11-point gain, indicating steady progress in countries’ ability to collect and use data.
# Calculate yearly averages
yearly_avg <- dataset %>%
filter(!is.na(overall_score)) %>%
group_by(year) %>%
summarise(avg_score = round(mean(overall_score), 1)) %>%
filter(year >= 2016)
# Plot
ggplot(yearly_avg, aes(x = year, y = avg_score)) +
geom_line(color = "#1E90C0", linewidth = 1.5) +
geom_point(color = "#1E90C0", size = 3) +
geom_text(aes(label = round(avg_score, 0)),
vjust = -1.2, size = 3.5, color = "#1A2B3C") +
scale_x_continuous(breaks = 2016:2023) +
scale_y_continuous(limits = c(50, 80), breaks = seq(50, 80, 5)) +
labs(
title = "Global Statistical Capacity Has Steadily Increased Over Time",
x = "Year",
y = "Average Statistical Capacity Score (0-100)",
caption = "Source: World Bank | Average overall score across all countries, 2016-2023"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 14, color = "#0A2342"),
axis.title = element_text(size = 11, color = "#1A2B3C"),
axis.text = element_text(size = 10, color = "#1A2B3C"),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
plot.caption = element_text(face = "italic", color = "gray50", hjust = 0)
)
ggsave("slide5_trend.png", width = 8, height = 4.5, dpi = 300)
This bar chart compares two phases of growth. The blue bars (2016-2019) show slower progress (+4.5 points), while the green bars (2020-2023) show a faster acceleration (+6.6 points), suggesting a structural shift in global data capacity.
# Add phase label to yearly averages
yearly_avg <- yearly_avg %>%
mutate(phase = ifelse(year <= 2019,
"2016-2019 (Slow Growth)",
"2020-2023 (Accelerated Growth)"))
# Plot
ggplot(yearly_avg, aes(x = factor(year), y = avg_score, fill = phase)) +
geom_col() +
scale_fill_manual(values = c(
"2016-2019 (Slow Growth)" = "#1E90C0",
"2020-2023 (Accelerated Growth)" = "#028A5B"
)) +
scale_y_continuous(limits = c(50, 75), oob = scales::squish,
breaks = seq(50, 75, 5)) +
labs(
title = "Growth Accelerated Sharply After 2019 - A Structural Shift in Progress",
x = "Year",
y = "Average Statistical Capacity Score (0-100)",
fill = NULL,
caption = "Source: World Bank | Blue = 2016-2019 (slow phase) | Green = 2020-2023 (accelerated phase)"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 13, color = "#0A2342"),
axis.title = element_text(size = 11, color = "#1A2B3C"),
axis.text = element_text(size = 10, color = "#1A2B3C"),
legend.position = "bottom",
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
plot.caption = element_text(face = "italic", color = "gray50", hjust = 0)
)
ggsave("slide6_acceleration.png", width = 8, height = 4.5, dpi = 300)
This horizontal bar chart compares average statistical capacity scores across 7 world regions in 2023. North America and Europe lead with scores above 84, while Sub-Saharan Africa scores just 60, a 33-point gap that highlights a persistent inequality in data readiness across regions.
# Calculate regional averages for 2023
regional_avg <- dataset %>%
filter(!is.na(overall_score), year == 2023) %>%
group_by(region) %>%
summarise(avg_score = round(mean(overall_score), 1)) %>%
mutate(
tier = case_when(
avg_score >= 80 ~ "High performers",
avg_score < 65 ~ "Lagging regions",
TRUE ~ "Mid-tier regions"
)
)
# Plot
ggplot(regional_avg, aes(x = avg_score,
y = reorder(region, avg_score),
fill = tier)) +
geom_col() +
geom_text(aes(label = round(avg_score, 0)),
hjust = -0.3, size = 3.5, color = "#1A2B3C") +
scale_fill_manual(values = c(
"High performers" = "#028A5B",
"Mid-tier regions" = "#1E90C0",
"Lagging regions" = "#E07B39"
)) +
scale_x_continuous(limits = c(0, 105), breaks = seq(0, 100, 10)) +
labs(
title = "Some Regions Improved Faster - Sub-Saharan Africa Still Lags Behind",
x = "Average Statistical Capacity Score (0-100)",
y = NULL,
fill = NULL,
caption = "Source: World Bank | 2023 data\nA 33-point gap separates the top and bottom regions."
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 13, color = "#0A2342"),
axis.title.x = element_text(size = 11, color = "#1A2B3C"),
axis.text = element_text(size = 10, color = "#1A2B3C"),
legend.position = "right",
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
plot.caption = element_text(face = "italic", color = "gray50", hjust = 0)
)
ggsave("slide7_regions.png", width = 8, height = 4.5, dpi = 300)