Init

#global options
options(
  digits = 2,
  contrasts = c("contr.treatment", "contr.treatment")
)

#packages
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  readxl,
  patchwork
)

#ggplot2
theme_set(theme_bw())

Functions

Data

#outdated data from OWID
# fertility = read_csv("data/children-born-per-woman.csv") %>% 
#   df_legalize_names()
fertility = read_excel("data/WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx",
                       col_types = c(
                         "ISO3 Alpha-code" = "text"
                       ),
                       sheet = 1,
                       range = "A17:BM20613") %>% 
  df_legalize_names()

GDPpc = read_excel("data/API_NY.GDP.PCAP.PP.CD_DS2_en_excel_v2_186.xls", sheet = "Data", range = "A4:BO270") %>% 
  df_legalize_names()

oecd = read_csv("data/oecd.csv") %>% 
  df_legalize_names()
## Rows: 38 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Code, Name
## dbl (1): Accession
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#join parts we need
d = full_join(
  #fertility for 2021, newest, only entities with ISO
  fertility %>% 
    filter(Year == 2021, !is.na(ISO3_Alpha_code)) %>% 
    rename(ISO = ISO3_Alpha_code) %>% 
    select(ISO, Total_Fertility_Rate_live_births_per_woman, Total_Population_as_of_1_January_thousands),
  
  #GDP pc PPP
  GDPpc %>% 
    select(Country_Code, x2021) %>% 
    rename(
      ISO = Country_Code, 
      GDPpcPPP2021 = x2021
    ),
  
  by = "ISO"
) %>% 
  full_join(
    oecd %>% select(
      Code, Accession
    ) %>% 
      rename(
        OECD_member_since = Accession,
        ISO = Code
      ),
    
    by = "ISO"
  )

#no dups
assert_that(!anyNA(d$ISO))
## [1] TRUE
#reverse ISO
d$country = pu_translate(d$ISO, reverse = T)
## No match: XKX
## No match: AFE
## No match: AFW
## No match: ARB
## No match: CEB
## No match: CHI
## No match: CSS
## No match: EAP
## No match: EAR
## No match: EAS
## No match: ECA
## No match: ECS
## No match: EMU
## No match: EUU
## No match: FCS
## No match: HIC
## No match: HPC
## No match: IBD
## No match: IBT
## No match: IDA
## No match: IDB
## No match: IDX
## No match: INX
## No match: LAC
## No match: LCN
## No match: LDC
## No match: LIC
## No match: LMC
## No match: LMY
## No match: LTE
## No match: MEA
## No match: MIC
## No match: MNA
## No match: NAC
## No match: OED
## No match: OSS
## No match: PRE
## No match: PSS
## No match: PST
## No match: SAS
## No match: SSA
## No match: SSF
## No match: SST
## No match: TEA
## No match: TEC
## No match: TLA
## No match: TMN
## No match: TSA
## No match: TSS
## No match: UMC
## No match: WLD
#remove those we dont have a name for (non-country units)
d %<>% filter(!is.na(country))

#add OECD dummy
d$OECD = !is.na(d$OECD_member_since)

#numericals
d %<>% mutate(
  Total_Fertility_Rate_live_births_per_woman = Total_Fertility_Rate_live_births_per_woman %>% as.numeric(),
  population2021 = as.numeric(Total_Population_as_of_1_January_thousands) * 1000
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Total_Fertility_Rate_live_births_per_woman =
##   Total_Fertility_Rate_live_births_per_woman %>% as.numeric()`.
## Caused by warning in `Total_Fertility_Rate_live_births_per_woman %>% as.numeric()`:
## ! NAs introduced by coercion
#European countries
d$European = d$ISO %in% c("AUT", "BEL", "BGR", "BLR", "CHE", "CYP", "CZE", "DEU", "DNK", "ESP", "EST", "FIN", "FRA", "GBR", "GRC", "HRV", "HUN", "IRL", "ISL", "ITA", "LVA", "LTU", "LUX", "MDA", "MCO", "MNE", "NLD", "NOR", "POL", "PRT", "ROU", "RUS", "SMR", "SRB", "SVK", "SVN", "SWE", "UKR", "AUS", "CAN", "NZL", "USA")

Analysis

p_oecd = d %>% filter(OECD) %>% 
  GG_scatter("GDPpcPPP2021", "Total_Fertility_Rate_live_births_per_woman", case_names = "country") + 
  ylab("Children per woman, 2021 estimate, UN") +
  scale_x_log10("GDP per capita, PPP, 2021, World Bank") +
  ggtitle("OECD data")

p_oecd
## `geom_smooth()` using formula = 'y ~ x'

p_world = d %>% 
  GG_scatter("GDPpcPPP2021", "Total_Fertility_Rate_live_births_per_woman", case_names = "country", color = "OECD") + 
  ylab("Children per woman, 2021 estimate, UN") +
  scale_x_log10("GDP per capita, PPP, 2021, World Bank") +
  ggtitle("World-wide data")

p_world
## `geom_smooth()` using formula = 'y ~ x'

p_world + p_oecd
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/wealth cor fertility, OECD vs. world.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
p_european = d %>% filter(European, population2021 > 4e6) %>% 
  GG_scatter("GDPpcPPP2021", "Total_Fertility_Rate_live_births_per_woman", case_names = "country") + 
  ylab("Children per woman, 2021 estimate, UN") +
  scale_x_log10("GDP per capita, PPP, 2021, World Bank") +
  ggtitle("European countries data")

p_european
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/wealth cor fertility, European.png")
## `geom_smooth()` using formula = 'y ~ x'

Meta

write_sessioninfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_DK.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_DK.UTF-8        LC_COLLATE=en_DK.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_DK.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.1.2       readxl_1.4.2          kirkegaard_2023-04-30
##  [4] psych_2.3.3           assertthat_0.2.1      weights_1.0.4        
##  [7] Hmisc_5.1-0           magrittr_2.0.3        lubridate_1.9.2      
## [10] forcats_1.0.0         stringr_1.5.0         dplyr_1.1.2          
## [13] purrr_1.0.1           readr_2.1.4           tidyr_1.3.0          
## [16] tibble_3.2.1          ggplot2_3.4.2         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.0  farver_2.1.1      fastmap_1.1.1     digest_0.6.31    
##  [5] rpart_4.1.23      timechange_0.2.0  lifecycle_1.0.3   cluster_2.1.6    
##  [9] survival_3.5-7    gdata_2.19.0      compiler_4.3.2    rlang_1.1.1      
## [13] sass_0.4.6        tools_4.3.2       utf8_1.2.3        yaml_2.3.7       
## [17] data.table_1.14.8 knitr_1.43        labeling_0.4.2    htmlwidgets_1.6.2
## [21] bit_4.0.5         mnormt_2.1.1      withr_2.5.0       foreign_0.8-86   
## [25] nnet_7.3-19       grid_4.3.2        fansi_1.0.4       jomo_2.7-6       
## [29] colorspace_2.1-0  mice_3.16.0       scales_1.2.1      gtools_3.9.4     
## [33] iterators_1.0.14  MASS_7.3-60       cli_3.6.1         crayon_1.5.2     
## [37] rmarkdown_2.22    ragg_1.2.5        generics_0.1.3    rstudioapi_0.14  
## [41] tzdb_0.4.0        minqa_1.2.5       cachem_1.0.8      splines_4.3.2    
## [45] parallel_4.3.2    cellranger_1.1.0  base64enc_0.1-3   vctrs_0.6.3      
## [49] boot_1.3-28       glmnet_4.1-7      Matrix_1.6-3      jsonlite_1.8.5   
## [53] hms_1.1.3         bit64_4.0.5       mitml_0.4-5       Formula_1.2-5    
## [57] htmlTable_2.4.1   systemfonts_1.0.4 foreach_1.5.2     jquerylib_0.1.4  
## [61] rematch_1.0.1     glue_1.6.2        nloptr_2.0.3      pan_1.6          
## [65] codetools_0.2-19  stringi_1.7.12    gtable_0.3.3      shape_1.4.6      
## [69] lme4_1.1-33       munsell_0.5.0     pillar_1.9.0      htmltools_0.5.5  
## [73] R6_2.5.1          textshaping_0.3.6 vroom_1.6.3       evaluate_0.21    
## [77] lattice_0.22-5    highr_0.10        backports_1.4.1   broom_1.0.5      
## [81] bslib_0.5.0       Rcpp_1.0.10       gridExtra_2.3     nlme_3.1-163     
## [85] checkmate_2.2.0   mgcv_1.9-1        xfun_0.39         pkgconfig_2.0.3
#upload to OSF
#avoid uploading the data in case they freak out again
if (F) {
  library(osfr)
  
  #auth
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload files
  #overwrite existing (versioning)
  osf_upload(osf_proj, conflicts = "overwrite", 
             path = c(
               "figs",
               "data",
               "notebook.html",
               "notebook.Rmd",
             ))
}