Data

Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  rms,
  ggeffects,
  patchwork,
  Polychrome,
  viridis
)
## Loading required package: viridisLite
# theme_set(theme_bw())

options(
    digits = 3
)

Data

d = read_csv("data/life-expectancy.csv") %>% 
  df_legalize_names()
## Rows: 20449 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Life expectancy at birth (historical)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#1950-2019 USSR
USSR = c("SVK", "SVN", "RUS", "CZE", "EST", "LVA", "LTU", "ALB", "POL", "MNE", "SRB", "HUN", "ROU", "BGR", "BLR", "UKR")
d1950 = d %>% filter(Year >= 1950, Year <= 2019, Code %in% USSR) %>% mutate(ISO = fct_drop(Code), life_expect = Life_expectancy_at_birth_historical)

#models
models = list(
  linear = ols(life_expect ~ Year + Entity, data = d1950),
  lsp = ols(life_expect ~ lsp(Year, c(1970, 1990)) + Entity, data = d1950),
  rcs = ols(life_expect ~ rcs(Year) + Entity, data = d1950),
  rcs_all = ols(life_expect ~ rcs(Year) * Entity, data = d1950)
)

models %>% summarize_models(collapse_factors = "Entity", collapse_nonlinear = T) %>% filter(!str_detect(`Predictor/Model`, "\\*"))
#compare LR
lrtest(models$linear, models$lsp)
## 
## Model 1: life_expect ~ Year + Entity
## Model 2: life_expect ~ lsp(Year, c(1970, 1990)) + Entity
## 
## L.R. Chisq       d.f.          P 
##        195          2          0
lrtest(models$linear, models$rcs)
## 
## Model 1: life_expect ~ Year + Entity
## Model 2: life_expect ~ rcs(Year) + Entity
## 
## L.R. Chisq       d.f.          P 
##        236          3          0
lrtest(models$lsp, models$rcs)
## 
## Model 1: life_expect ~ lsp(Year, c(1970, 1990)) + Entity
## Model 2: life_expect ~ rcs(Year) + Entity
## 
## L.R. Chisq       d.f.          P 
##   4.10e+01   1.00e+00   1.53e-10
lrtest(models$rcs, models$rcs_all)
## 
## Model 1: life_expect ~ rcs(Year) + Entity
## Model 2: life_expect ~ rcs(Year) * Entity
## 
## L.R. Chisq       d.f.          P 
##       2699         60          0
#plot models
n_countries = length(unique(d1950$Entity))
# colors = createPalette(n_countries, c("#ff0000", "#00ff00", "#0000ff"))
colors = viridis_pal(option = "D")(n_countries)
colors %>% length()
## [1] 16
#plots
p1 = ggpredict(models$linear, terms = c("Year", "Entity")) %>% 
  plot(add.data = T, colors = colors) +
  geom_vline(xintercept = 1990, linetype = "dashed") +
  geom_text(label = "Fall of USSR", x = 1985, y  = 80, mapping = aes(color = NULL, group = 1, fill = NULL)) +
  scale_color_discrete("Country") +
  guides(color = guide_legend(override.aes = aes(label = "", alpha = 0))) +
  scale_x_continuous(breaks = seq(1950, 3000, by = 10)) +
  scale_y_continuous("Life expectancy") +
  ggtitle("Actual communism is bad for your health") +
  theme_set(theme_bw())
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p2 = ggpredict(models$lsp, terms = c("Year", "Entity")) %>% 
  plot(add.data = T, colors = colors) +
  geom_vline(xintercept = 1990, linetype = "dashed") +
  geom_text(label = "Fall of USSR", x = 1985, y  = 80, mapping = aes(color = NULL, group = 1, fill = NULL)) +
  scale_color_discrete("Country") +
  guides(color = guide_legend(override.aes = aes(label = "", alpha = 0))) +
  scale_x_continuous(breaks = seq(1950, 3000, by = 10)) +
  scale_y_continuous("Life expectancy") +
  ggtitle("Actual communism is bad for your health") +
  theme_set(theme_bw())
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p3 = ggpredict(models$rcs, terms = c("Year", "Entity")) %>% 
  plot(add.data = T, colors = colors) +
  geom_vline(xintercept = 1990, linetype = "dashed") +
  geom_text(label = "Fall of USSR", x = 1985, y  = 80, mapping = aes(color = NULL, group = 1, fill = NULL)) +
  scale_color_discrete("Country") +
  guides(color = guide_legend(override.aes = aes(label = "", alpha = 0))) +
  scale_x_continuous(breaks = seq(1950, 3000, by = 10)) +
  scale_y_continuous("Life expectancy") +
  ggtitle("Actual communism is bad for your health") +
  theme_set(theme_bw())
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p4 = ggpredict(models$rcs_all, terms = c("Year", "Entity")) %>% 
  plot(add.data = T, colors = colors) +
  geom_vline(xintercept = 1990, linetype = "dashed") +
  geom_text(label = "Fall of USSR", x = 1985, y  = 80, mapping = aes(color = NULL, group = 1, fill = NULL)) +
  scale_color_discrete("Country") +
  guides(color = guide_legend(override.aes = aes(label = "", alpha = 0))) +
  scale_x_continuous(breaks = seq(1950, 3000, by = 10)) +
  scale_y_continuous("Life expectancy") +
  ggtitle("Actual communism is bad for your health") +
  theme_set(theme_bw())
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p1

GG_save("figs/linear_fit.png")

p2

GG_save("figs/linear_segments_fit.png")

p3

GG_save("figs/spline_fit.png")

p4

GG_save("figs/spline_fit_all.png")

Meta

#versions
write_sessioninfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] viridis_0.6.2         viridisLite_0.4.1     Polychrome_1.5.1     
##  [4] patchwork_1.1.2       ggeffects_1.2.1       rms_6.6-0            
##  [7] kirkegaard_2023-05-01 psych_2.3.3           assertthat_0.2.1     
## [10] weights_1.0.4         Hmisc_5.0-1           magrittr_2.0.3       
## [13] lubridate_1.9.2       forcats_1.0.0         stringr_1.5.0        
## [16] dplyr_1.1.2           purrr_1.0.1           readr_2.1.4          
## [19] tidyr_1.3.0           tibble_3.2.1          ggplot2_3.4.2        
## [22] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] mnormt_2.1.1         gridExtra_2.3        sandwich_3.0-2      
##  [4] rlang_1.1.0          multcomp_1.4-23      polspline_1.1.22    
##  [7] compiler_4.3.0       gdata_2.18.0.1       systemfonts_1.0.4   
## [10] vctrs_0.6.2          quantreg_5.95        rvest_1.0.3         
## [13] crayon_1.5.2         pkgconfig_2.0.3      fastmap_1.1.1       
## [16] backports_1.4.1      labeling_0.4.2       utf8_1.2.3          
## [19] rmarkdown_2.21       tzdb_0.3.0           haven_2.5.2         
## [22] nloptr_2.0.3         ragg_1.2.5           bit_4.0.5           
## [25] MatrixModels_0.5-1   xfun_0.39            cachem_1.0.7        
## [28] jsonlite_1.8.4       highr_0.10           broom_1.0.4         
## [31] parallel_4.3.0       cluster_2.1.4        R6_2.5.1            
## [34] bslib_0.4.2          stringi_1.7.12       boot_1.3-28         
## [37] rpart_4.1.19         jquerylib_0.1.4      Rcpp_1.0.10         
## [40] knitr_1.42           zoo_1.8-12           base64enc_0.1-3     
## [43] Matrix_1.5-1         splines_4.3.0        nnet_7.3-18         
## [46] timechange_0.2.0     tidyselect_1.2.0     rstudioapi_0.14     
## [49] yaml_2.3.7           codetools_0.2-19     plyr_1.8.8          
## [52] lattice_0.20-45      withr_2.5.0          evaluate_0.20       
## [55] foreign_0.8-82       survival_3.5-3       xml2_1.3.4          
## [58] pillar_1.9.0         mice_3.15.0          checkmate_2.2.0     
## [61] insight_0.19.1       generics_0.1.3       vroom_1.6.1         
## [64] hms_1.1.3            munsell_0.5.0        scales_1.2.1        
## [67] minqa_1.2.5          gtools_3.9.4         glue_1.6.2          
## [70] scatterplot3d_0.3-43 tools_4.3.0          data.table_1.14.8   
## [73] lme4_1.1-33          SparseM_1.81         webshot_0.5.4       
## [76] mvtnorm_1.1-3        grid_4.3.0           colorspace_2.1-0    
## [79] nlme_3.1-162         htmlTable_2.4.1      Formula_1.2-5       
## [82] cli_3.6.1            textshaping_0.3.6    kableExtra_1.3.4    
## [85] fansi_1.0.4          svglite_2.1.1        gtable_0.3.3        
## [88] sass_0.4.5           digest_0.6.31        TH.data_1.1-2       
## [91] farver_2.1.1         htmlwidgets_1.6.2    htmltools_0.5.5     
## [94] lifecycle_1.0.3      httr_1.4.5           bit64_4.0.5         
## [97] MASS_7.3-58.3