Week 3: Challenge

Which socioeconomic demographics will shape the population landscape in East Asia & Pacific by 2050, and how do they compare to today?

# Load packages
library(dplyr)
library(tidyverse)
library(readr)
# Import data
population_estimates <- read_csv("Population-EstimatesCSV.csv")

# Data Tidying / Transformation
pivot <- population_estimates |> 
  pivot_longer(cols = `1960`:`2050`, names_to = "Year", values_to = "Population") |> 
  mutate(Year = as.integer(Year))

What is the most populated demographic in East Asia & Pacific in 2025?

pop_2025 <- pivot |> 
 filter(Year == 2025, `Country Name` == "East Asia & Pacific") |> 
   group_by(`Indicator Name`) |> 
  summarize(Population = sum(Population, na.rm = TRUE)) |> 
  arrange(desc(Population))

pop_2025

## # A tibble: 189 × 2
##    `Indicator Name`                    Population
##    <chr>                                    <dbl>
##  1 Population, total                   2389693167
##  2 Population ages 15-64, total        1636918278
##  3 Urban population                    1539106338
##  4 Population, male                    1204549156
##  5 Population, female                  1185144011
##  6 Rural population                     850586829
##  7 Population ages 15-64, male          836993472
##  8 Population ages 15-64, female        799924803
##  9 Population ages 0-14, total          420166878
## 10 Population ages 65 and above, total  332608011
## # ℹ 179 more rows

print(paste("The most populated demographic in East Asia & Pacific is", pop_2025[2,1]))

## [1] "The most populated demographic in East Asia & Pacific is Population ages 15-64, total"

What demographic in East Asia & Pacific is projected to grow the most by 2050?

pop_growth <- pivot |> 
  filter(Year %in% c(2025, 2050), `Country Name` == "East Asia & Pacific") |> 
  group_by(`Indicator Name`, Year) |> 
  summarize(Population = sum(Population, na.rm = TRUE)) |> 
  pivot_wider(names_from = Year, values_from = Population) |> 
  mutate(CAGR = (`2050` / `2025`)^(1/25) -1) |> 
  arrange(desc(CAGR))

pop_growth

## # A tibble: 189 × 4
## # Groups:   Indicator Name [189]
##    `Indicator Name`                                         `2025` `2050`   CAGR
##    <chr>                                                     <dbl>  <dbl>  <dbl>
##  1 Population ages 80 and above, male (% of male populatio… 2.04e0 6.80e0 0.0492
##  2 Population ages 80 and above, male                       2.46e7 7.76e7 0.0470
##  3 Population ages 80 and older, female (% of female popul… 3.41e0 1.00e1 0.0441
##  4 Population ages 80 and above, female                     4.04e7 1.14e8 0.0424
##  5 Population ages 75-79, male (% of male population)       2.27e0 5.11e0 0.0329
##  6 Population ages 75-79, male                              2.74e7 5.83e7 0.0307
##  7 Age dependency ratio, old                                2.03e1 4.28e1 0.0303
##  8 Population ages 75-79, female (% of female population)   2.93e0 6.05e0 0.0294
##  9 Population ages 75-79, female                            3.48e7 6.89e7 0.0278
## 10 Population ages 65 and above, male (% of male populatio… 1.23e1 2.36e1 0.0265
## # ℹ 179 more rows

print(paste("The demographic in East Asia & Pacific projected to grow the most by 2050 is", pop_growth[2,1]))

## [1] "The demographic in East Asia & Pacific projected to grow the most by 2050 is Population ages 80 and above, male"

library(ggplot2)

pop_growth |> 
  filter(`Indicator Name` %in% c("Population ages 80 and above, male", "Population ages 80 and above, female", "Population ages 75-79, male", "Population ages 75-79, female")) |> 
  ggplot(aes(reorder(`Indicator Name`, CAGR), CAGR, fill = `Indicator Name`)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Projected Population CAGR (2025-2050) in East Asia & Pacific",
    x = "Demographic",
    y = "CAGR (Annual Growth Rate)"
  )