Init
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
readxl,
patchwork
)
theme_set(theme_bw())
options(
digits = 3
)
#multithreading
#library(future)
#plan(multisession(workers = 8))
Functions
Data
#country list:
countries_iso = c(
"CAN",
"FRA",
"DEU",
"ITA",
"JPN",
"ESP",
"GBR",
"USA",
"DNK",
"RUS",
"CHN",
"BRA",
"IND"
)
#UN population data
un_pop_raw = read_excel("data/WPP2024_POP_F02_1_POPULATION_5-YEAR_AGE_GROUPS_BOTH_SEXES.xlsx", range = "A17:AF22000", guess_max = 10000) %>% df_legalize_names()
#subset to what we want, long form
un_pop = un_pop_raw %>%
rename(
ISO = ISO3_Alpha_code
) %>%
filter(
Year %in% (1990:2023),
!is.na(ISO)
) %>%
select(ISO, Year, x20_24:x100plus) %>%
pivot_longer(
cols = x20_24:x100plus,
names_to = "age_group",
values_to = "population_count"
) %>%
mutate(
age_group = str_remove(age_group, "x"),
age_group = str_remove(age_group, "plus"),
age_group = str_replace_all(age_group, "_", "-"),
population_count = as.numeric(population_count)
)
#age incomes for correction
age_income = structure(list(Age_group = c("0-4", "5-9", "10-14", "15-19",
"20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54",
"55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85-89",
"90-94", "95-99", "100"), income = c(0L, 0L, 0L, 0L, 18798L,
48463L, 48463L, 79259L, 79259L, 98597L, 98597L, 99836L, 99836L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-21L))
age_income$income_r = age_income$income / age_income$income[5]
age_income
#world bank GDPs
wb_gdp_raw = read_csv("data/gdp-per-capita-worldbank/gdp-per-capita-worldbank.csv", guess_max = 10000) %>% df_legalize_names()
## Rows: 7063 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, GDP per capita, PPP (constant 2021 international $)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
wb_gdp = wb_gdp_raw %>%
rename(
ISO = Code,
GDP = GDP_per_capita_PPP_constant_2021_international
) %>%
filter(
Year %in% c(1990:2023),
!is.na(ISO),
)
#only keep countries with data from 1990
wb_gdp = wb_gdp %>%
group_by(ISO) %>%
filter(
n() == length(1990:2023)
) %>%
ungroup()
#compute correction factors per year and country
age_correction_factor = un_pop %>%
left_join(
age_income,
by = c("age_group" = "Age_group")
) %>%
group_by(ISO, Year) %>%
summarise(
correction_factor = sum(population_count * income_r) / sum(population_count),
.groups = "drop"
)
#what is the correction factor for USA in 1990?
age_correction_factor %>%
filter(
ISO == "USA",
Year == 1990
)
#set USA 1990 correction factor to 1
age_correction_factor$correction_factor_USA = age_correction_factor$correction_factor / age_correction_factor$correction_factor[age_correction_factor$ISO == "USA" & age_correction_factor$Year == 1990]
#join to GDPs and adjust
wb_gdp = wb_gdp %>%
left_join(
age_correction_factor,
by = c("ISO", "Year")
) %>%
mutate(
GDP_adj = GDP / correction_factor_USA
)
#GDP index as of 2000
wb_gdp = wb_gdp %>%
group_by(ISO) %>%
mutate(
GDP_index = GDP/ GDP[Year == 1990],
GDP_adj_index = GDP_adj / GDP_adj[Year == 1990],
) %>%
ungroup()
Analysis
#plot adjustment factor for select countries
wb_gdp %>%
filter(
ISO %in% countries_iso
) %>%
GG_lines("Year", "correction_factor_USA", "ISO", points = F) +
labs(
y = "Adjustment factor",
title = "GDP adjustment factor for age structure",
subtitle = "USA 1990 = 1"
)

GG_save("figs/gdp_adjustment_factor.png")
#plot GDP index for select countries
#one without india and china and one with
p1 = wb_gdp %>%
filter(
ISO %in% countries_iso,
!ISO %in% c("IND", "CHN")
) %>%
GG_lines("Year", "GDP_adj_index", "ISO", points = F) +
#y as percent
scale_y_continuous(labels = scales::percent_format(accuracy = 1))
p2 = wb_gdp %>%
filter(
ISO %in% countries_iso
) %>%
GG_lines("Year", "GDP_adj_index", "ISO", points = F) +
#y as percent
scale_y_continuous(labels = scales::percent_format(accuracy = 1))
#and regular GDP index
p3 = wb_gdp %>%
filter(
ISO %in% countries_iso,
!ISO %in% c("IND", "CHN")
) %>%
GG_lines("Year", "GDP_index", "ISO", points = F) +
#y as percent
scale_y_continuous(labels = scales::percent_format(accuracy = 1))
p4 = wb_gdp %>%
filter(
ISO %in% countries_iso
) %>%
GG_lines("Year", "GDP_index", "ISO", points = F) +
#y as percent
scale_y_continuous(labels = scales::percent_format(accuracy = 1))
#combine
p1 + p2 + p3 + p4 +
plot_annotation(
title = "GDP per capita index",
subtitle = "USA 1990 = 1",
caption = "Top: adjusted for age structure\nBottom: unadjusted"
) +
plot_layout(ncol = 2)
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/gdp_index.png")
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 11 unlabeled data points (too many overlaps). Consider increasing max.overlaps