Init
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
haven,
readxl,
googlesheets4
)
theme_set(theme_bw())
options(
digits = 3
)
Data
#from OWID, but you can also use the same UN data we have loaded
fert = read_csv("data/children-per-woman-un.csv")
## Rows: 18722 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Fertility rate - Sex: all - Age: all - Variant: estimates
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# NIQ = read_excel("data/hlopostharm.xlsx")
gs4_deauth()
NIQ2 = read_sheet("https://docs.google.com/spreadsheets/d/1cReoeIZLlbxOrN4_wnj52Q6UWFepXSJGi3Gj5m6ZAZg/edit?gid=0#gid=0")
## ✔ Reading from "National IQ datasets".
## ✔ Range 'Sheet1'.
#UN data
pop = read_excel("data/WPP2024_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT.xlsx", skip = 16, guess_max = 10000) %>%
filter(!is.na(`ISO3 Alpha-code`)) %>%
df_legalize_names() %>%
select(ISO3_Alpha_code, Year, Total_Population_as_of_1_January_thousands) %>%
rename(
ISO = ISO3_Alpha_code,
Population = Total_Population_as_of_1_January_thousands
) %>% mutate(
Population = as.numeric(Population) * 1000
)
#future estimates
pop_future = read_excel("data/WPP2024_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT.xlsx", skip = 16, guess_max = 10000, sheet = 2) %>%
filter(!is.na(`ISO3 Alpha-code`)) %>%
df_legalize_names() %>%
select(ISO3_Alpha_code, Year, Total_Population_as_of_1_January_thousands) %>%
rename(
ISO = ISO3_Alpha_code,
Population = Total_Population_as_of_1_January_thousands
) %>% mutate(
Population = as.numeric(Population) * 1000
)
#join
pop = bind_rows(
pop,
pop_future
)
#join
d = inner_join(
fert %>% filter(Year == 2022) %>% df_legalize_names() %>% select(-Entity) %>% rename(ISO = Code),
NIQ2 %>% df_legalize_names() %>% select(ISO, IQ_6datasets)
) %>%
mutate(
name = pu_translate(ISO, reverse = T)
) %>%
rename(
IQ = IQ_6datasets,
fertility = Fertility_rate_Sex_all_Age_all_Variant_estimates
)
## Joining with `by = join_by(ISO)`
#mean world IQ by year
pop_long = inner_join(
pop,
NIQ2 %>% df_legalize_names() %>% select(ISO, IQ_6datasets)
)
## Joining with `by = join_by(ISO)`
world_mean = pop_long %>%
group_by(Year) %>%
summarise(
world_mean_IQ = wtd_mean(IQ_6datasets, Population)
)
Analysis
#fertility vs IQ
GG_scatter(d, "IQ", "fertility", case_names = "name") +
geom_smooth() +
scale_x_continuous(breaks = seq(0, 200, 5)) +
scale_y_continuous(breaks = seq(0, 10, 0.5)) +
labs(
x = "National IQ (6 dataset average)",
y = "Fertility rate (2022, UN)",
title = "National IQ and fertility rate"
)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

GG_save("figures/IQ_fertility.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#test for heteroscedasticity
d$resid_spline = mgcv::gam(fertility ~ s(IQ), data = d) %>% resid()
test_HS(resid = d$resid_spline, d$IQ)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
#world IQ decline
world_mean %>%
ggplot(aes(x = Year, y = world_mean_IQ, linetype = Year > 2023)) +
geom_line() +
geom_vline(xintercept = 2023, linetype = 2) +
labs(
x = "Year",
y = "World mean IQ",
title = "World mean IQ by year",
linetype = "Forecast"
)

GG_save("figures/world_mean_IQ.png")