Init
#global options
options(
digits = 2,
contrasts = c("contr.treatment", "contr.treatment")
)
#packages
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
readxl,
patchwork
)
#ggplot2
theme_set(theme_bw())
Functions
Data
#outdated data from OWID
# fertility = read_csv("data/children-born-per-woman.csv") %>%
# df_legalize_names()
fertility = read_excel("data/WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx",
col_types = c(
"ISO3 Alpha-code" = "text"
),
sheet = 1,
range = "A17:BM20613") %>%
df_legalize_names()
GDPpc = read_excel("data/API_NY.GDP.PCAP.PP.CD_DS2_en_excel_v2_186.xls", sheet = "Data", range = "A4:BO270") %>%
df_legalize_names()
oecd = read_csv("data/oecd.csv") %>%
df_legalize_names()
## Rows: 38 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Code, Name
## dbl (1): Accession
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#join parts we need
d = full_join(
#fertility for 2021, newest, only entities with ISO
fertility %>%
filter(Year == 2021, !is.na(ISO3_Alpha_code)) %>%
rename(ISO = ISO3_Alpha_code) %>%
select(ISO, Total_Fertility_Rate_live_births_per_woman, Total_Population_as_of_1_January_thousands),
#GDP pc PPP
GDPpc %>%
select(Country_Code, x2021) %>%
rename(
ISO = Country_Code,
GDPpcPPP2021 = x2021
),
by = "ISO"
) %>%
full_join(
oecd %>% select(
Code, Accession
) %>%
rename(
OECD_member_since = Accession,
ISO = Code
),
by = "ISO"
)
#no dups
assert_that(!anyNA(d$ISO))
## [1] TRUE
#reverse ISO
d$country = pu_translate(d$ISO, reverse = T)
## No match: XKX
## No match: AFE
## No match: AFW
## No match: ARB
## No match: CEB
## No match: CHI
## No match: CSS
## No match: EAP
## No match: EAR
## No match: EAS
## No match: ECA
## No match: ECS
## No match: EMU
## No match: EUU
## No match: FCS
## No match: HIC
## No match: HPC
## No match: IBD
## No match: IBT
## No match: IDA
## No match: IDB
## No match: IDX
## No match: INX
## No match: LAC
## No match: LCN
## No match: LDC
## No match: LIC
## No match: LMC
## No match: LMY
## No match: LTE
## No match: MEA
## No match: MIC
## No match: MNA
## No match: NAC
## No match: OED
## No match: OSS
## No match: PRE
## No match: PSS
## No match: PST
## No match: SAS
## No match: SSA
## No match: SSF
## No match: SST
## No match: TEA
## No match: TEC
## No match: TLA
## No match: TMN
## No match: TSA
## No match: TSS
## No match: UMC
## No match: WLD
#remove those we dont have a name for (non-country units)
d %<>% filter(!is.na(country))
#add OECD dummy
d$OECD = !is.na(d$OECD_member_since)
#numericals
d %<>% mutate(
Total_Fertility_Rate_live_births_per_woman = Total_Fertility_Rate_live_births_per_woman %>% as.numeric(),
population2021 = as.numeric(Total_Population_as_of_1_January_thousands) * 1000
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Total_Fertility_Rate_live_births_per_woman =
## Total_Fertility_Rate_live_births_per_woman %>% as.numeric()`.
## Caused by warning in `Total_Fertility_Rate_live_births_per_woman %>% as.numeric()`:
## ! NAs introduced by coercion
#European countries
d$European = d$ISO %in% c("AUT", "BEL", "BGR", "BLR", "CHE", "CYP", "CZE", "DEU", "DNK", "ESP", "EST", "FIN", "FRA", "GBR", "GRC", "HRV", "HUN", "IRL", "ISL", "ITA", "LVA", "LTU", "LUX", "MDA", "MCO", "MNE", "NLD", "NOR", "POL", "PRT", "ROU", "RUS", "SMR", "SRB", "SVK", "SVN", "SWE", "UKR", "AUS", "CAN", "NZL", "USA")
Analysis
p_oecd = d %>% filter(OECD) %>%
GG_scatter("GDPpcPPP2021", "Total_Fertility_Rate_live_births_per_woman", case_names = "country") +
ylab("Children per woman, 2021 estimate, UN") +
scale_x_log10("GDP per capita, PPP, 2021, World Bank") +
ggtitle("OECD data")
p_oecd
## `geom_smooth()` using formula = 'y ~ x'

p_world = d %>%
GG_scatter("GDPpcPPP2021", "Total_Fertility_Rate_live_births_per_woman", case_names = "country", color = "OECD") +
ylab("Children per woman, 2021 estimate, UN") +
scale_x_log10("GDP per capita, PPP, 2021, World Bank") +
ggtitle("World-wide data")
p_world
## `geom_smooth()` using formula = 'y ~ x'

p_world + p_oecd
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/wealth cor fertility, OECD vs. world.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
p_european = d %>% filter(European, population2021 > 4e6) %>%
GG_scatter("GDPpcPPP2021", "Total_Fertility_Rate_live_births_per_woman", case_names = "country") +
ylab("Children per woman, 2021 estimate, UN") +
scale_x_log10("GDP per capita, PPP, 2021, World Bank") +
ggtitle("European countries data")
p_european
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/wealth cor fertility, European.png")
## `geom_smooth()` using formula = 'y ~ x'