Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  googlesheets4,
  rms,
  lme4,
  ggeffects,
  broom, broom.mixed,
  splines,
  flextable
)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:purrr':
## 
##     compose
theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
#library(future)
#plan(multisession(workers = 8))

Functions

Data

d1 = read_csv("data/womens-educational-attainment-vs-fertility/womens-educational-attainment-vs-fertility.csv") %>% 
  df_legalize_names() %>% 
  rename(
    TFR = Fertility_rate_Sex_all_Age_all_Variant_estimates,
    female_ed = Combined_average_years_of_education_for_15_64_years_female_youth_and_adults,
    ISO = Code
  )
## Rows: 60209 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Entity, Code, World regions according to OWID
## dbl (4): Year, Fertility rate - Sex: all - Age: all - Variant: estimates, Co...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#regions
gs4_deauth()
d_regions = read_sheet("https://docs.google.com/spreadsheets/d/1ToJWNbwYY--w0_nh05slEv4ZCko4a-XZc1rAkymTxAA/edit?gid=0#gid=0") %>% 
  df_legalize_names() %>% 
  rename(
    ISO = ISO3
  )
## ✔ Reading from "Countries regional codings".
## ✔ Range 'Sheet1'.
#join
d = inner_join(
  d1,
  d_regions %>% select(ISO, Region1, Region2, Region3)
)
## Joining with `by = join_by(ISO)`

Analysis

#simple plot
d %>% 
  GG_scatter("female_ed", "TFR") +
  geom_smooth() +
  labs(    
    title = "Total fertility rate as function of female education",
    subtitle = "OLS with restricted cubic spline (all data mixed)",
    y = "Total fertility rate",
    x = "Average years of education for 15-64 years females"
  )
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("figs/ols spline scatter.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#linear models with regional controls
list(
  ols(TFR ~ female_ed, data = d),
  ols(TFR ~ female_ed + Year, data = d),
  ols(TFR ~ female_ed + Year + Region1, data = d),
  ols(TFR ~ female_ed + Year + Region2, data = d),
  ols(TFR ~ female_ed + Year + Region3, data = d),
  ols(TFR ~ rcs(female_ed) + Year + Region3, data = d)
) %>% summarize_models(collapse_factors = T)
## number of knots in rcs defaulting to 5
ols(TFR ~ rcs(female_ed) + Year + Region3, data = d) %>% 
  ggaverage(terms = c("female_ed")) %>%
  plot() +
  labs(
    title = "Total fertility rate as function of female education, year, and regions",
    subtitle = "OLS with restricted cubic spline",
    y = "Total fertility rate",
    x = "Average years of education for 15-64 years females"
  )
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5
## number of knots in rcs defaulting to 5

GG_save("figs/ols spline.png")

#multi-level model with country fixed effects
#using lmer
multi_fit = lmer(TFR ~ female_ed + Year + (1 + female_ed + Year | ISO), data = d)
## boundary (singular) fit: see help('isSingular')
multi_fit %>% summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: TFR ~ female_ed + Year + (1 + female_ed + Year | ISO)
##    Data: d
## 
## REML criterion at convergence: 3054
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.416 -0.598  0.057  0.586  4.246 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr       
##  ISO      (Intercept) 1.90e-01 0.43605             
##           female_ed   1.30e-01 0.35994   0.11      
##           Year        2.19e-06 0.00148   0.11 -0.98
##  Residual             1.87e-01 0.43292             
## Number of obs: 1720, groups:  ISO, 145
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 18.87473    2.98454    6.32
## female_ed   -0.40687    0.03510  -11.59
## Year        -0.00645    0.00155   -4.17
## 
## Correlation of Fixed Effects:
##           (Intr) feml_d
## female_ed  0.380       
## Year      -0.996 -0.456
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
multi_fit %>% fixef()
## (Intercept)   female_ed        Year 
##    18.87473    -0.40687    -0.00645
multi_fit %>% tidy()
multi_fit %>% 
  ggaverage(terms = c("female_ed")) %>% 
  plot() +
  labs(
    title = "Total fertility rate as function of female education and year",
    subtitle = "LMER multilevel model with random slopes and intercepts",
    y = "Total fertility rate",
    x = "Average years of education for 15-64 years females"
  )

GG_save("figs/multilevel linear.png")

multi_fit %>% 
  ggaverage(terms = c("Year")) %>% 
  plot()

#spline for education
multi_fit2 = lmer(TFR ~ ns(female_ed, df = 3) + Year + (1 + female_ed + Year | ISO), data = d)
## boundary (singular) fit: see help('isSingular')
multi_fit2 %>% summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: TFR ~ ns(female_ed, df = 3) + Year + (1 + female_ed + Year |      ISO)
##    Data: d
## 
## REML criterion at convergence: 2815
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.020 -0.535  0.018  0.574  4.487 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr       
##  ISO      (Intercept) 2.76e-01 0.52548             
##           female_ed   1.63e-01 0.40392   0.29      
##           Year        2.92e-06 0.00171  -0.11 -0.98
##  Residual             1.56e-01 0.39445             
## Number of obs: 1720, groups:  ISO, 145
## 
## Fixed effects:
##                        Estimate Std. Error t value
## (Intercept)            31.01556    3.21211    9.66
## ns(female_ed, df = 3)1 -4.33907    0.29123  -14.90
## ns(female_ed, df = 3)2 -4.09410    0.60990   -6.71
## ns(female_ed, df = 3)3 -3.50209    0.56431   -6.21
## Year                   -0.01253    0.00168   -7.45
## 
## Correlation of Fixed Effects:
##             (Intr) n(_,d=3)1 n(_,d=3)2 n(_,d=3)3
## ns(f_,d=3)1  0.324                              
## ns(f_,d=3)2  0.465  0.881                       
## ns(f_,d=3)3  0.376  0.832     0.960             
## Year        -0.995 -0.403    -0.541    -0.451   
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
multi_fit2 %>% fixef()
##            (Intercept) ns(female_ed, df = 3)1 ns(female_ed, df = 3)2 
##                31.0156                -4.3391                -4.0941 
## ns(female_ed, df = 3)3                   Year 
##                -3.5021                -0.0125
multi_fit2 %>% tidy()
multi_fit2 %>% 
  ggaverage(terms = c("female_ed  [all]")) %>% 
  plot() +
  labs(
    title = "Total fertility rate as function of female education and year",
    subtitle = "LMER multilevel model with random slopes and intercepts, and natural spline for education",
    y = "Total fertility rate",
    x = "Average years of education for 15-64 years females"
  )

GG_save("figs/multilevel splines.png")

multi_fit2 %>% 
  ggaverage(terms = c("Year")) %>% 
  plot()
## Model contains splines or polynomial terms. Consider using `terms="Year
##   [all]"` to get smooth plots. See also package-vignette 'Adjusted
##   Predictions at Specific Values'.

Meta

#versions
write_sessioninfo()
## R version 4.5.0 (2025-04-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] splines   stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] flextable_0.9.7       broom.mixed_0.2.9.6   broom_1.0.8          
##  [4] ggeffects_2.2.1       lme4_1.1-37           Matrix_1.7-3         
##  [7] rms_8.0-0             googlesheets4_1.1.1   kirkegaard_2025-05-09
## [10] psych_2.5.3           assertthat_0.2.1      weights_1.0.4        
## [13] Hmisc_5.2-3           magrittr_2.0.3        lubridate_1.9.4      
## [16] forcats_1.0.0         stringr_1.5.1         dplyr_1.1.4          
## [19] purrr_1.0.4           readr_2.1.5           tidyr_1.3.1          
## [22] tibble_3.2.1          ggplot2_3.5.2         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.17.1       jsonlite_2.0.0          shape_1.4.6.1          
##   [4] datawizard_1.0.2        TH.data_1.1-3           jomo_2.7-6             
##   [7] farver_2.1.2            nloptr_2.2.1            rmarkdown_2.29         
##  [10] fs_1.6.6                ragg_1.4.0              vctrs_0.6.5            
##  [13] minqa_1.2.8             askpass_1.2.1           base64enc_0.1-3        
##  [16] htmltools_0.5.8.1       polspline_1.1.25        haven_2.5.4            
##  [19] curl_6.2.2              cellranger_1.1.0        Formula_1.2-5          
##  [22] mitml_0.4-5             sass_0.4.10             parallelly_1.43.0      
##  [25] bslib_0.9.0             htmlwidgets_1.6.4       plyr_1.8.9             
##  [28] sandwich_3.1-1          zoo_1.8-14              cachem_1.1.0           
##  [31] uuid_1.2-1              lifecycle_1.0.4         iterators_1.0.14       
##  [34] pkgconfig_2.0.3         R6_2.6.1                fastmap_1.2.0          
##  [37] rbibutils_2.3           future_1.40.0           digest_0.6.37          
##  [40] colorspace_2.1-1        furrr_0.3.1             textshaping_1.0.0      
##  [43] labeling_0.4.3          timechange_0.3.0        gdata_3.0.1            
##  [46] mgcv_1.9-1              httr_1.4.7              compiler_4.5.0         
##  [49] gargle_1.5.2            bit64_4.6.0-1           fontquiver_0.2.1       
##  [52] withr_3.0.2             htmlTable_2.4.3         backports_1.5.0        
##  [55] pan_1.9                 MASS_7.3-65             quantreg_6.1           
##  [58] openssl_2.3.2           gtools_3.9.5            tools_4.5.0            
##  [61] foreign_0.8-90          googledrive_2.1.1       zip_2.3.2              
##  [64] nnet_7.3-20             glue_1.8.0              nlme_3.1-168           
##  [67] grid_4.5.0              checkmate_2.3.2         cluster_2.1.8.1        
##  [70] generics_0.1.3          gtable_0.3.6            tzdb_0.5.0             
##  [73] data.table_1.17.0       hms_1.1.3               xml2_1.3.8             
##  [76] foreach_1.5.2           pillar_1.10.2           vroom_1.6.5            
##  [79] lattice_0.22-5          survival_3.8-3          bit_4.6.0              
##  [82] SparseM_1.84-2          tidyselect_1.2.1        fontLiberation_0.1.0   
##  [85] knitr_1.50              fontBitstreamVera_0.1.1 reformulas_0.4.0       
##  [88] gridExtra_2.3           xfun_0.52               stringi_1.8.7          
##  [91] yaml_2.3.10             boot_1.3-31             evaluate_1.0.3         
##  [94] codetools_0.2-19        officer_0.6.8           gdtools_0.4.2          
##  [97] cli_3.6.4               rpart_4.1.24            systemfonts_1.2.2      
## [100] Rdpack_2.6.4            munsell_0.5.1           jquerylib_0.1.4        
## [103] Rcpp_1.0.14             globals_0.17.0          parallel_4.5.0         
## [106] MatrixModels_0.5-4      marginaleffects_0.28.0  listenv_0.9.1          
## [109] glmnet_4.1-8            mvtnorm_1.3-3           scales_1.3.0           
## [112] crayon_1.5.3            insight_1.3.1           rlang_1.1.6            
## [115] multcomp_1.4-28         mnormt_2.1.1            mice_3.17.0
#write data to file for reuse
d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}