Setup: packages and data

install_if_missing <- function(pkgs) {
  miss <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
  if (length(miss)) install.packages(miss, quiet = TRUE)
}
install_if_missing(c("revealjs","tidyverse","ggplot2","plotly","gapminder","scales","glue","knitr"))

suppressPackageStartupMessages({
  library(tidyverse)
  library(ggplot2)
  library(plotly)
  library(gapminder)
  library(scales)
  library(glue)
})

knitr::opts_chunk$set(message = FALSE, warning = FALSE)

# Gapminder: country-year panel (1952–2007); cols: country, continent, year, lifeExp, pop, gdpPercap
df <- gapminder

# Key years
year_min <- min(df$year)
year_max <- max(df$year)

# Population-weighted regional averages (avoid bias from small countries)
cont_ts <- df %>%
  group_by(continent, year) %>%
  summarise(
    pop = sum(pop),
    lifeExp_pw = sum(lifeExp * pop) / sum(pop),
    gdpPercap_pw = sum(gdpPercap * pop) / sum(pop),
    .groups = "drop"
  )

# Latest snapshot
df_latest <- df %>% filter(year == year_max)

# Improvements in life expectancy between first and last year (dynamic, no curly-eval issues)
improve <- df %>%
  group_by(country, continent) %>%
  summarise(
    lifeExp_start = lifeExp[year == year_min][1],
    lifeExp_end   = lifeExp[year == year_max][1],
    delta = lifeExp_end - lifeExp_start,
    .groups = "drop"
  ) %>%
  filter(!is.na(lifeExp_start), !is.na(lifeExp_end)) %>%
  arrange(desc(delta))

# Selected countries to trace their journey
focus_countries <- c("China", "India", "Brazil", "United States", "Nigeria", "Ethiopia")
journey <- df %>% filter(country %in% focus_countries)

Data and method

Open data: Gapminder (via the gapminder R package)

Variables: lifeExp (years), gdpPercap (USD), pop (population), country, continent, year

Regional lines are population-weighted (reflect where people live)

Visuals: interactive (plotly) + one animation

knitr::kable(tibble(
  Countries = n_distinct(df$country),
  Continents = n_distinct(df$continent),
  Years = paste(year_min, year_max, sep = "–"),
  Rows = nrow(df)
))
Countries Continents Years Rows
142 5 1952–2007 1704

Health vs Wealth — latest snapshot (2007)

Bubble size represents population (pre-scaled for clarity).

X-axis on log scale: compresses very high incomes to reveal structure.

Expect a positive association (richer → longer lives), with regional clustering.

df_latest2 <- df %>%
  filter(year == year_max, is.finite(gdpPercap), gdpPercap > 0,
         is.finite(lifeExp), lifeExp > 0, pop > 0) %>%
  mutate(
    # Pre-scale bubble size (area ~ pop) in pixels — robust and warning-free
    bubble_px = scales::rescale(sqrt(pop), to = c(4, 20)),
    tooltip = glue("{country}
Life expectancy: {number(lifeExp, 0.1)} yrs
GDP per capita: ${number(gdpPercap, 1)}
Population: {number(pop/1e6, 0.1)}M")
  )

p3 <- ggplot(df_latest2, aes(x = gdpPercap, y = lifeExp,
                             color = continent, size = bubble_px, text = tooltip)) +
  geom_point(alpha = 0.9) +
  scale_size_identity() +                    # use pixel sizes we computed
  scale_x_log10(labels = label_number(big.mark = ",")) +
  labs(
    title = glue("Health vs Wealth — {year_max}"),
    x = "GDP per capita (log scale)",
    y = "Life expectancy (years)",
    color = NULL
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "top")

plotly::ggplotly(p3, tooltip = "text")

Animated journey (1952 → 2007)

Countries generally move up (longer lives) and right (higher incomes) over time.

Not all move the same: pace and volatility differ by region.

Animation rendered as a GIF for reliability.

# Build once locally, then embed; RPubs will display the GIF
if (!file.exists("slide4.gif")) {
  ensure <- function(pkgs) {
    miss <- pkgs[!sapply(pkgs, requireNamespace, quietly = TRUE)]
    if (length(miss)) install.packages(miss, quiet = TRUE)
  }
  ensure(c("gganimate","gifski","png"))
  library(gganimate)

  df_anim <- df %>% filter(gdpPercap > 0, lifeExp > 0, pop > 0)

  p <- ggplot(df_anim, aes(gdpPercap, lifeExp, size = pop, colour = continent, group = country)) +
    geom_point(alpha = 0.85, show.legend = FALSE) +
    scale_x_log10(labels = scales::label_number(big.mark = ",")) +
    scale_size(range = c(1.5, 10)) +
    labs(
      title = "From 1952 to 2007: most countries got richer and healthier",
      subtitle = "Year: {frame_time}",
      x = "GDP per capita (log scale)",
      y = "Life expectancy (years)"
    ) +
    theme_minimal(base_size = 14)

  anim <- p + transition_time(year) + ease_aes("linear")

  animate(
    anim,
    duration = 12, fps = 20, width = 900, height = 520,
    renderer = gifski_renderer("slide4.gif")
  )
}
knitr::include_graphics("slide4.gif")

Regions — life expectancy over time (population-weighted)

Every region improved markedly in life expectancy.

The gap between regions narrowed, but still remains.

Population-weighting prevents small countries from skewing the trend.

plot_ly(
  cont_ts, x = ~year, y = ~lifeExp_pw, color = ~continent, colors = "Dark2",
  type = "scatter", mode = "lines+markers",
  hovertemplate = "<b>%{fullData.name}</b><br>%{x}: %{y:.1f} years<extra></extra>"
) %>%
  layout(
    title = "Life expectancy rose in every region",
    xaxis = list(title = "Year"),
    yaxis = list(title = "Years"),
    legend = list(orientation = "h", x = 0, y = 1.1),
    margin = list(l = 60, r = 20, t = 60, b = 50)
  )

Regions — income per person over time (population-weighted, log)

Incomes rose almost everywhere, but regional gaps persist.

Log scale shows proportional changes more clearly.

Rapid catch-up is visible in parts of Asia after 1990.

plot_ly(
  cont_ts, x = ~year, y = ~gdpPercap_pw, color = ~continent, colors = "Dark2",
  type = "scatter", mode = "lines+markers",
  hovertemplate = "<b>%{fullData.name}</b><br>%{x}: $%{y:.0f}<extra></extra>"
) %>%
  layout(
    title = "Incomes rose too, but gaps remain",
    xaxis = list(title = "Year"),
    yaxis = list(title = "GDP per capita (log scale)", type = "log"),
    legend = list(orientation = "h", x = 0, y = 1.1),
    margin = list(l = 60, r = 20, t = 60, b = 50)
  )

Within-region inequality — life expectancy (2007)

Distributions show spread within each region (not just the average).

Africa and the Americas have wide dispersion; Europe is tighter.

Box+violin helps see both range and central tendency.

plot_ly(
  df_latest, x = ~continent, y = ~lifeExp, color = ~continent, colors = "Set2",
  type = "violin", box = list(visible = TRUE), meanline = list(visible = TRUE),
  hovertemplate = "<b>%{x}</b><br>Life expectancy: %{y:.1f} years<extra></extra>"
) %>%
  layout(
    title = glue("Across-country inequality within regions — {year_max}"),
    xaxis = list(title = "Continent"),
    yaxis = list(title = "Life expectancy (years)"),
    showlegend = FALSE,
    margin = list(l = 60, r = 20, t = 60, b = 50)
  )

Who improved most? (first → last year)

Big gains are possible even at lower incomes (public health advances).

Ranks by improvement in life expectancy from {r year_min} to {r year_max}.

Highlights the scale of change (years gained).

top_n <- 15
plot_ly(
  improve %>% slice_head(n = top_n),
  x = ~delta, y = ~reorder(country, delta), color = ~continent, colors = "Set2",
  type = "bar", orientation = "h",
  hovertemplate = "<b>%{y}</b><br>Gain in life expectancy: %{x:.1f} years<extra></extra>"
) %>%
  layout(
    title = glue("Top {top_n} life expectancy gains, {year_min} → {year_max}"),
    xaxis = list(title = "Years gained"),
    yaxis = list(title = ""),
    legend = list(orientation = "h"),
    margin = list(l = 120, r = 20, t = 60, b = 40)
  )

Country journeys — health vs wealth paths

Trajectories reveal different development paths (e.g., fast catch-up vs. steady growth).

Some countries improved life expectancy before large income gains.

Comparing across continents shows diverse timelines.

plot_ly() %>%
  add_trace(
    data = journey,
    x = ~gdpPercap, y = ~lifeExp, color = ~country, colors = "Set1",
    type = "scatter", mode = "lines+markers",
    text = ~glue("{country} — {year}
Life expectancy: {number(lifeExp, 0.1)} yrs
GDP per capita: ${number(gdpPercap, 1)}"),
    hoverinfo = "text"
  ) %>%
  layout(
    title = "Selected country trajectories (1952–2007)",
    xaxis = list(title = "GDP per capita (log scale)", type = "log"),
    yaxis = list(title = "Life expectancy (years)"),
    legend = list(orientation = "h", x = 0, y = 1.1),
    margin = list(l = 60, r = 20, t = 60, b = 50)
  )

References (open/public data)

Gapminder (R package): https://cran.r-project.org/package=gapminder

Gapminder project: https://www.gapminder.org/data/

R packages: tidyverse, plotly, ggplot2, revealjs, gganimate, gifski