Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  readxl,
  patchwork
)

theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
#library(future)
#plan(multisession(workers = 8))

Functions

Data

#country list:
countries_iso = c(
  "CAN",
  "FRA",
  "DEU",
  "ITA",
  "JPN",
  "ESP",
  "GBR",
  "USA",
  "DNK",
  "RUS",
  "CHN",
  "BRA",
  "IND"
)

#UN population data
un_pop_raw = read_excel("data/WPP2024_POP_F02_1_POPULATION_5-YEAR_AGE_GROUPS_BOTH_SEXES.xlsx", range = "A17:AF22000", guess_max = 10000) %>% df_legalize_names()

#subset to what we want, long form
un_pop = un_pop_raw %>% 
  rename(
    ISO = ISO3_Alpha_code
  ) %>% 
  filter(
    Year %in% (1990:2023),
    !is.na(ISO)
  ) %>% 
  select(ISO, Year, x20_24:x100plus) %>% 
  pivot_longer(
    cols = x20_24:x100plus,
    names_to = "age_group",
    values_to = "population_count"
  ) %>% 
  mutate(
    age_group = str_remove(age_group, "x"),
    age_group = str_remove(age_group, "plus"),
    age_group = str_replace_all(age_group, "_", "-"),
    population_count = as.numeric(population_count)
  )

#age incomes for correction
age_income = structure(list(Age_group = c("0-4", "5-9", "10-14", "15-19", 
"20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", 
"55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85-89", 
"90-94", "95-99", "100"), income = c(0L, 0L, 0L, 0L, 18798L, 
48463L, 48463L, 79259L, 79259L, 98597L, 98597L, 99836L, 99836L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA, 
-21L))

age_income$income_r = age_income$income / age_income$income[5]
age_income

#world bank GDPs
wb_gdp_raw = read_csv("data/gdp-per-capita-worldbank/gdp-per-capita-worldbank.csv", guess_max = 10000) %>% df_legalize_names()

## Rows: 7063 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, GDP per capita, PPP (constant 2021 international $)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

wb_gdp = wb_gdp_raw %>% 
  rename(
    ISO = Code,
    GDP = GDP_per_capita_PPP_constant_2021_international
  ) %>%
  filter(
    Year %in% c(1990:2023),
    !is.na(ISO),
  )

#only keep countries with data from 1990
wb_gdp = wb_gdp %>% 
  group_by(ISO) %>% 
  filter(
    n() == length(1990:2023)
  ) %>% 
  ungroup()

#compute correction factors per year and country
age_correction_factor = un_pop %>% 
  left_join(
    age_income,
    by = c("age_group" = "Age_group")
  ) %>% 
  group_by(ISO, Year) %>% 
  summarise(
    correction_factor = sum(population_count * income_r) / sum(population_count),
    .groups = "drop"
  )

#what is the correction factor for USA in 1990?
age_correction_factor %>% 
  filter(
    ISO == "USA",
    Year == 1990
  )

#set USA 1990 correction factor to 1
age_correction_factor$correction_factor_USA = age_correction_factor$correction_factor / age_correction_factor$correction_factor[age_correction_factor$ISO == "USA" & age_correction_factor$Year == 1990]

#join to GDPs and adjust
wb_gdp = wb_gdp %>% 
  left_join(
    age_correction_factor,
    by = c("ISO", "Year")
  ) %>% 
  mutate(
    GDP_adj = GDP / correction_factor_USA
  )

#GDP index as of 2000
wb_gdp = wb_gdp %>% 
  group_by(ISO) %>%
  mutate(
    GDP_index = GDP/ GDP[Year == 1990],
    GDP_adj_index = GDP_adj / GDP_adj[Year == 1990],
  ) %>% 
  ungroup()

Analysis

#plot adjustment factor for select countries
wb_gdp %>% 
  filter(
    ISO %in% countries_iso
  ) %>% 
  GG_lines("Year", "correction_factor_USA", "ISO", points = F) +
  labs(
    y = "Adjustment factor",
    title = "GDP adjustment factor for age structure",
    subtitle = "USA 1990 = 1"
  )

GG_save("figs/gdp_adjustment_factor.png")

#plot GDP index for select countries
#one without india and china and one with
p1 = wb_gdp %>% 
  filter(
    ISO %in% countries_iso,
    !ISO %in% c("IND", "CHN")
  ) %>% 
  GG_lines("Year", "GDP_adj_index", "ISO", points = F) +
  #y as percent
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))

p2 = wb_gdp %>%
  filter(
    ISO %in% countries_iso
  ) %>% 
  GG_lines("Year", "GDP_adj_index", "ISO", points = F) +
    #y as percent
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))

#and regular GDP index
p3 = wb_gdp %>% 
  filter(
    ISO %in% countries_iso,
    !ISO %in% c("IND", "CHN")
  ) %>% 
  GG_lines("Year", "GDP_index", "ISO", points = F) +
    #y as percent
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))

p4 = wb_gdp %>%
  filter(
    ISO %in% countries_iso
  ) %>% 
  GG_lines("Year", "GDP_index", "ISO", points = F) +
    #y as percent
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))

#combine
p1 + p2 + p3 + p4 +
  plot_annotation(
    title = "GDP per capita index",
    subtitle = "USA 1990 = 1",
    caption = "Top: adjusted for age structure\nBottom: unadjusted"
  ) +
  plot_layout(ncol = 2)

## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/gdp_index.png")

## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 11 unlabeled data points (too many overlaps). Consider increasing max.overlaps

Meta

#versions
write_sessioninfo()

## R version 4.5.0 (2025-04-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.3.0       readxl_1.4.5          kirkegaard_2025-05-02
##  [4] psych_2.5.3           assertthat_0.2.1      weights_1.0.4        
##  [7] Hmisc_5.2-3           magrittr_2.0.3        lubridate_1.9.4      
## [10] forcats_1.0.0         stringr_1.5.1         dplyr_1.1.4          
## [13] purrr_1.0.4           readr_2.1.5           tidyr_1.3.1          
## [16] tibble_3.2.1          ggplot2_3.5.2         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1  farver_2.1.2      fastmap_1.2.0     digest_0.6.37    
##  [5] rpart_4.1.24      timechange_0.3.0  lifecycle_1.0.4   cluster_2.1.8.1  
##  [9] survival_3.8-3    gdata_3.0.1       compiler_4.5.0    rlang_1.1.6      
## [13] sass_0.4.10       tools_4.5.0       yaml_2.3.10       data.table_1.17.0
## [17] knitr_1.50        labeling_0.4.3    htmlwidgets_1.6.4 bit_4.6.0        
## [21] mnormt_2.1.1      withr_3.0.2       foreign_0.8-90    nnet_7.3-20      
## [25] grid_4.5.0        jomo_2.7-6        colorspace_2.1-1  mice_3.17.0      
## [29] scales_1.3.0      gtools_3.9.5      iterators_1.0.14  MASS_7.3-65      
## [33] cli_3.6.4         crayon_1.5.3      rmarkdown_2.29    ragg_1.4.0       
## [37] reformulas_0.4.0  generics_0.1.3    rstudioapi_0.17.1 tzdb_0.5.0       
## [41] minqa_1.2.8       cachem_1.1.0      splines_4.5.0     parallel_4.5.0   
## [45] cellranger_1.1.0  base64enc_0.1-3   vctrs_0.6.5       boot_1.3-31      
## [49] glmnet_4.1-8      Matrix_1.7-3      jsonlite_2.0.0    hms_1.1.3        
## [53] ggrepel_0.9.6     bit64_4.6.0-1     mitml_0.4-5       Formula_1.2-5    
## [57] htmlTable_2.4.3   systemfonts_1.2.2 foreach_1.5.2     jquerylib_0.1.4  
## [61] rematch_2.0.0     glue_1.8.0        nloptr_2.2.1      pan_1.9          
## [65] codetools_0.2-19  stringi_1.8.7     shape_1.4.6.1     gtable_0.3.6     
## [69] lme4_1.1-37       munsell_0.5.1     pillar_1.10.2     htmltools_0.5.8.1
## [73] R6_2.6.1          textshaping_1.0.0 Rdpack_2.6.4      vroom_1.6.5      
## [77] evaluate_1.0.3    lattice_0.22-5    rbibutils_2.3     backports_1.5.0  
## [81] broom_1.0.8       bslib_0.9.0       Rcpp_1.0.14       gridExtra_2.3    
## [85] nlme_3.1-168      checkmate_2.3.2   xfun_0.52         pkgconfig_2.0.3

#write data to file for reuse
# d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}

GDP per what

02 May, 2025

Init

Functions

Data

Analysis

Meta