install_if_missing <- function(pkgs) {
miss <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
if (length(miss)) install.packages(miss, quiet = TRUE)
}
install_if_missing(c("revealjs","tidyverse","ggplot2","plotly","gapminder","scales","glue","knitr"))
suppressPackageStartupMessages({
library(tidyverse)
library(ggplot2)
library(plotly)
library(gapminder)
library(scales)
library(glue)
})
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
# Gapminder: country-year panel (1952–2007); cols: country, continent, year, lifeExp, pop, gdpPercap
df <- gapminder
# Key years
year_min <- min(df$year)
year_max <- max(df$year)
# Population-weighted regional averages (avoid bias from small countries)
cont_ts <- df %>%
group_by(continent, year) %>%
summarise(
pop = sum(pop),
lifeExp_pw = sum(lifeExp * pop) / sum(pop),
gdpPercap_pw = sum(gdpPercap * pop) / sum(pop),
.groups = "drop"
)
# Latest snapshot
df_latest <- df %>% filter(year == year_max)
# Improvements in life expectancy between first and last year (dynamic, no curly-eval issues)
improve <- df %>%
group_by(country, continent) %>%
summarise(
lifeExp_start = lifeExp[year == year_min][1],
lifeExp_end = lifeExp[year == year_max][1],
delta = lifeExp_end - lifeExp_start,
.groups = "drop"
) %>%
filter(!is.na(lifeExp_start), !is.na(lifeExp_end)) %>%
arrange(desc(delta))
# Selected countries to trace their journey
focus_countries <- c("China", "India", "Brazil", "United States", "Nigeria", "Ethiopia")
journey <- df %>% filter(country %in% focus_countries)
Open data: Gapminder (via the gapminder R package)
Variables: lifeExp (years), gdpPercap (USD), pop (population), country, continent, year
Regional lines are population-weighted (reflect where people live)
Visuals: interactive (plotly) + one animation
knitr::kable(tibble(
Countries = n_distinct(df$country),
Continents = n_distinct(df$continent),
Years = paste(year_min, year_max, sep = "–"),
Rows = nrow(df)
))
| Countries | Continents | Years | Rows |
|---|---|---|---|
| 142 | 5 | 1952–2007 | 1704 |
Bubble size represents population (pre-scaled for clarity).
X-axis on log scale: compresses very high incomes to reveal structure.
Expect a positive association (richer → longer lives), with regional clustering.
df_latest2 <- df %>%
filter(year == year_max, is.finite(gdpPercap), gdpPercap > 0,
is.finite(lifeExp), lifeExp > 0, pop > 0) %>%
mutate(
# Pre-scale bubble size (area ~ pop) in pixels — robust and warning-free
bubble_px = scales::rescale(sqrt(pop), to = c(4, 20)),
tooltip = glue("{country}
Life expectancy: {number(lifeExp, 0.1)} yrs
GDP per capita: ${number(gdpPercap, 1)}
Population: {number(pop/1e6, 0.1)}M")
)
p3 <- ggplot(df_latest2, aes(x = gdpPercap, y = lifeExp,
color = continent, size = bubble_px, text = tooltip)) +
geom_point(alpha = 0.9) +
scale_size_identity() + # use pixel sizes we computed
scale_x_log10(labels = label_number(big.mark = ",")) +
labs(
title = glue("Health vs Wealth — {year_max}"),
x = "GDP per capita (log scale)",
y = "Life expectancy (years)",
color = NULL
) +
theme_minimal(base_size = 14) +
theme(legend.position = "top")
plotly::ggplotly(p3, tooltip = "text")
Countries generally move up (longer lives) and right (higher incomes) over time.
Not all move the same: pace and volatility differ by region.
Animation rendered as a GIF for reliability.
# Build once locally, then embed; RPubs will display the GIF
if (!file.exists("slide4.gif")) {
ensure <- function(pkgs) {
miss <- pkgs[!sapply(pkgs, requireNamespace, quietly = TRUE)]
if (length(miss)) install.packages(miss, quiet = TRUE)
}
ensure(c("gganimate","gifski","png"))
library(gganimate)
df_anim <- df %>% filter(gdpPercap > 0, lifeExp > 0, pop > 0)
p <- ggplot(df_anim, aes(gdpPercap, lifeExp, size = pop, colour = continent, group = country)) +
geom_point(alpha = 0.85, show.legend = FALSE) +
scale_x_log10(labels = scales::label_number(big.mark = ",")) +
scale_size(range = c(1.5, 10)) +
labs(
title = "From 1952 to 2007: most countries got richer and healthier",
subtitle = "Year: {frame_time}",
x = "GDP per capita (log scale)",
y = "Life expectancy (years)"
) +
theme_minimal(base_size = 14)
anim <- p + transition_time(year) + ease_aes("linear")
animate(
anim,
duration = 12, fps = 20, width = 900, height = 520,
renderer = gifski_renderer("slide4.gif")
)
}
knitr::include_graphics("slide4.gif")
Every region improved markedly in life expectancy.
The gap between regions narrowed, but still remains.
Population-weighting prevents small countries from skewing the trend.
plot_ly(
cont_ts, x = ~year, y = ~lifeExp_pw, color = ~continent, colors = "Dark2",
type = "scatter", mode = "lines+markers",
hovertemplate = "<b>%{fullData.name}</b><br>%{x}: %{y:.1f} years<extra></extra>"
) %>%
layout(
title = "Life expectancy rose in every region",
xaxis = list(title = "Year"),
yaxis = list(title = "Years"),
legend = list(orientation = "h", x = 0, y = 1.1),
margin = list(l = 60, r = 20, t = 60, b = 50)
)
Incomes rose almost everywhere, but regional gaps persist.
Log scale shows proportional changes more clearly.
Rapid catch-up is visible in parts of Asia after 1990.
plot_ly(
cont_ts, x = ~year, y = ~gdpPercap_pw, color = ~continent, colors = "Dark2",
type = "scatter", mode = "lines+markers",
hovertemplate = "<b>%{fullData.name}</b><br>%{x}: $%{y:.0f}<extra></extra>"
) %>%
layout(
title = "Incomes rose too, but gaps remain",
xaxis = list(title = "Year"),
yaxis = list(title = "GDP per capita (log scale)", type = "log"),
legend = list(orientation = "h", x = 0, y = 1.1),
margin = list(l = 60, r = 20, t = 60, b = 50)
)
Distributions show spread within each region (not just the average).
Africa and the Americas have wide dispersion; Europe is tighter.
Box+violin helps see both range and central tendency.
plot_ly(
df_latest, x = ~continent, y = ~lifeExp, color = ~continent, colors = "Set2",
type = "violin", box = list(visible = TRUE), meanline = list(visible = TRUE),
hovertemplate = "<b>%{x}</b><br>Life expectancy: %{y:.1f} years<extra></extra>"
) %>%
layout(
title = glue("Across-country inequality within regions — {year_max}"),
xaxis = list(title = "Continent"),
yaxis = list(title = "Life expectancy (years)"),
showlegend = FALSE,
margin = list(l = 60, r = 20, t = 60, b = 50)
)
Big gains are possible even at lower incomes (public health advances).
Ranks by improvement in life expectancy from {r year_min} to {r year_max}.
Highlights the scale of change (years gained).
top_n <- 15
plot_ly(
improve %>% slice_head(n = top_n),
x = ~delta, y = ~reorder(country, delta), color = ~continent, colors = "Set2",
type = "bar", orientation = "h",
hovertemplate = "<b>%{y}</b><br>Gain in life expectancy: %{x:.1f} years<extra></extra>"
) %>%
layout(
title = glue("Top {top_n} life expectancy gains, {year_min} → {year_max}"),
xaxis = list(title = "Years gained"),
yaxis = list(title = ""),
legend = list(orientation = "h"),
margin = list(l = 120, r = 20, t = 60, b = 40)
)
Trajectories reveal different development paths (e.g., fast catch-up vs. steady growth).
Some countries improved life expectancy before large income gains.
Comparing across continents shows diverse timelines.
plot_ly() %>%
add_trace(
data = journey,
x = ~gdpPercap, y = ~lifeExp, color = ~country, colors = "Set1",
type = "scatter", mode = "lines+markers",
text = ~glue("{country} — {year}
Life expectancy: {number(lifeExp, 0.1)} yrs
GDP per capita: ${number(gdpPercap, 1)}"),
hoverinfo = "text"
) %>%
layout(
title = "Selected country trajectories (1952–2007)",
xaxis = list(title = "GDP per capita (log scale)", type = "log"),
yaxis = list(title = "Life expectancy (years)"),
legend = list(orientation = "h", x = 0, y = 1.1),
margin = list(l = 60, r = 20, t = 60, b = 50)
)
Gapminder (R package): https://cran.r-project.org/package=gapminder
Gapminder project: https://www.gapminder.org/data/
R packages: tidyverse, plotly, ggplot2, revealjs, gganimate, gifski