Init
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
googlesheets4,
rms,
lme4,
ggeffects,
broom, broom.mixed,
splines,
flextable
)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:purrr':
##
## compose
theme_set(theme_bw())
options(
digits = 3
)
#multithreading
#library(future)
#plan(multisession(workers = 8))
Functions
Data
d1 = read_csv("data/womens-educational-attainment-vs-fertility/womens-educational-attainment-vs-fertility.csv") %>%
df_legalize_names() %>%
rename(
TFR = Fertility_rate_Sex_all_Age_all_Variant_estimates,
female_ed = Combined_average_years_of_education_for_15_64_years_female_youth_and_adults,
ISO = Code
)
## Rows: 60209 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Entity, Code, World regions according to OWID
## dbl (4): Year, Fertility rate - Sex: all - Age: all - Variant: estimates, Co...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#regions
gs4_deauth()
d_regions = read_sheet("https://docs.google.com/spreadsheets/d/1ToJWNbwYY--w0_nh05slEv4ZCko4a-XZc1rAkymTxAA/edit?gid=0#gid=0") %>%
df_legalize_names() %>%
rename(
ISO = ISO3
)
## ✔ Reading from "Countries regional codings".
## ✔ Range 'Sheet1'.
#join
d = inner_join(
d1,
d_regions %>% select(ISO, Region1, Region2, Region3)
)
## Joining with `by = join_by(ISO)`
Analysis
#simple plot
d %>%
GG_scatter("female_ed", "TFR") +
geom_smooth() +
labs(
title = "Total fertility rate as function of female education",
subtitle = "OLS with restricted cubic spline (all data mixed)",
y = "Total fertility rate",
x = "Average years of education for 15-64 years females"
)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("figs/ols spline scatter.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#linear models with regional controls
list(
ols(TFR ~ female_ed, data = d),
ols(TFR ~ female_ed + Year, data = d),
ols(TFR ~ female_ed + Year + Region1, data = d),
ols(TFR ~ female_ed + Year + Region2, data = d),
ols(TFR ~ female_ed + Year + Region3, data = d),
ols(TFR ~ rcs(female_ed) + Year + Region3, data = d)
) %>% summarize_models(collapse_factors = T)
## number of knots in rcs defaulting to 5
ols(TFR ~ rcs(female_ed) + Year + Region3, data = d) %>%
ggaverage(terms = c("female_ed")) %>%
plot() +
labs(
title = "Total fertility rate as function of female education, year, and regions",
subtitle = "OLS with restricted cubic spline",
y = "Total fertility rate",
x = "Average years of education for 15-64 years females"
)
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5

GG_save("figs/ols spline.png")
#multi-level model with country fixed effects
#using lmer
multi_fit = lmer(TFR ~ female_ed + Year + (1 + female_ed + Year | ISO), data = d)
## boundary (singular) fit: see help('isSingular')
multi_fit %>% summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: TFR ~ female_ed + Year + (1 + female_ed + Year | ISO)
## Data: d
##
## REML criterion at convergence: 3054
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.416 -0.598 0.057 0.586 4.246
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ISO (Intercept) 1.90e-01 0.43605
## female_ed 1.30e-01 0.35994 0.11
## Year 2.19e-06 0.00148 0.11 -0.98
## Residual 1.87e-01 0.43292
## Number of obs: 1720, groups: ISO, 145
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 18.87473 2.98454 6.32
## female_ed -0.40687 0.03510 -11.59
## Year -0.00645 0.00155 -4.17
##
## Correlation of Fixed Effects:
## (Intr) feml_d
## female_ed 0.380
## Year -0.996 -0.456
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
multi_fit %>% fixef()
## (Intercept) female_ed Year
## 18.87473 -0.40687 -0.00645
multi_fit %>% tidy()
multi_fit %>%
ggaverage(terms = c("female_ed")) %>%
plot() +
labs(
title = "Total fertility rate as function of female education and year",
subtitle = "LMER multilevel model with random slopes and intercepts",
y = "Total fertility rate",
x = "Average years of education for 15-64 years females"
)

GG_save("figs/multilevel linear.png")
multi_fit %>%
ggaverage(terms = c("Year")) %>%
plot()

#spline for education
multi_fit2 = lmer(TFR ~ ns(female_ed, df = 3) + Year + (1 + female_ed + Year | ISO), data = d)
## boundary (singular) fit: see help('isSingular')
multi_fit2 %>% summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: TFR ~ ns(female_ed, df = 3) + Year + (1 + female_ed + Year | ISO)
## Data: d
##
## REML criterion at convergence: 2815
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.020 -0.535 0.018 0.574 4.487
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ISO (Intercept) 2.76e-01 0.52548
## female_ed 1.63e-01 0.40392 0.29
## Year 2.92e-06 0.00171 -0.11 -0.98
## Residual 1.56e-01 0.39445
## Number of obs: 1720, groups: ISO, 145
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 31.01556 3.21211 9.66
## ns(female_ed, df = 3)1 -4.33907 0.29123 -14.90
## ns(female_ed, df = 3)2 -4.09410 0.60990 -6.71
## ns(female_ed, df = 3)3 -3.50209 0.56431 -6.21
## Year -0.01253 0.00168 -7.45
##
## Correlation of Fixed Effects:
## (Intr) n(_,d=3)1 n(_,d=3)2 n(_,d=3)3
## ns(f_,d=3)1 0.324
## ns(f_,d=3)2 0.465 0.881
## ns(f_,d=3)3 0.376 0.832 0.960
## Year -0.995 -0.403 -0.541 -0.451
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
multi_fit2 %>% fixef()
## (Intercept) ns(female_ed, df = 3)1 ns(female_ed, df = 3)2
## 31.0156 -4.3391 -4.0941
## ns(female_ed, df = 3)3 Year
## -3.5021 -0.0125
multi_fit2 %>% tidy()
multi_fit2 %>%
ggaverage(terms = c("female_ed [all]")) %>%
plot() +
labs(
title = "Total fertility rate as function of female education and year",
subtitle = "LMER multilevel model with random slopes and intercepts, and natural spline for education",
y = "Total fertility rate",
x = "Average years of education for 15-64 years females"
)

GG_save("figs/multilevel splines.png")
multi_fit2 %>%
ggaverage(terms = c("Year")) %>%
plot()
## Model contains splines or polynomial terms. Consider using `terms="Year
## [all]"` to get smooth plots. See also package-vignette 'Adjusted
## Predictions at Specific Values'.
