library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
rms,
ggeffects,
patchwork,
Polychrome,
viridis
)
## Loading required package: viridisLite
# theme_set(theme_bw())
options(
digits = 3
)
d = read_csv("data/life-expectancy.csv") %>%
df_legalize_names()
## Rows: 20449 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Life expectancy at birth (historical)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#1950-2019 USSR
USSR = c("SVK", "SVN", "RUS", "CZE", "EST", "LVA", "LTU", "ALB", "POL", "MNE", "SRB", "HUN", "ROU", "BGR", "BLR", "UKR")
d1950 = d %>% filter(Year >= 1950, Year <= 2019, Code %in% USSR) %>% mutate(ISO = fct_drop(Code), life_expect = Life_expectancy_at_birth_historical)
#models
models = list(
linear = ols(life_expect ~ Year + Entity, data = d1950),
lsp = ols(life_expect ~ lsp(Year, c(1970, 1990)) + Entity, data = d1950),
rcs = ols(life_expect ~ rcs(Year) + Entity, data = d1950),
rcs_all = ols(life_expect ~ rcs(Year) * Entity, data = d1950)
)
models %>% summarize_models(collapse_factors = "Entity", collapse_nonlinear = T) %>% filter(!str_detect(`Predictor/Model`, "\\*"))
#compare LR
lrtest(models$linear, models$lsp)
##
## Model 1: life_expect ~ Year + Entity
## Model 2: life_expect ~ lsp(Year, c(1970, 1990)) + Entity
##
## L.R. Chisq d.f. P
## 195 2 0
lrtest(models$linear, models$rcs)
##
## Model 1: life_expect ~ Year + Entity
## Model 2: life_expect ~ rcs(Year) + Entity
##
## L.R. Chisq d.f. P
## 236 3 0
lrtest(models$lsp, models$rcs)
##
## Model 1: life_expect ~ lsp(Year, c(1970, 1990)) + Entity
## Model 2: life_expect ~ rcs(Year) + Entity
##
## L.R. Chisq d.f. P
## 4.10e+01 1.00e+00 1.53e-10
lrtest(models$rcs, models$rcs_all)
##
## Model 1: life_expect ~ rcs(Year) + Entity
## Model 2: life_expect ~ rcs(Year) * Entity
##
## L.R. Chisq d.f. P
## 2699 60 0
#plot models
n_countries = length(unique(d1950$Entity))
# colors = createPalette(n_countries, c("#ff0000", "#00ff00", "#0000ff"))
colors = viridis_pal(option = "D")(n_countries)
colors %>% length()
## [1] 16
#plots
p1 = ggpredict(models$linear, terms = c("Year", "Entity")) %>%
plot(add.data = T, colors = colors) +
geom_vline(xintercept = 1990, linetype = "dashed") +
geom_text(label = "Fall of USSR", x = 1985, y = 80, mapping = aes(color = NULL, group = 1, fill = NULL)) +
scale_color_discrete("Country") +
guides(color = guide_legend(override.aes = aes(label = "", alpha = 0))) +
scale_x_continuous(breaks = seq(1950, 3000, by = 10)) +
scale_y_continuous("Life expectancy") +
ggtitle("Actual communism is bad for your health") +
theme_set(theme_bw())
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p2 = ggpredict(models$lsp, terms = c("Year", "Entity")) %>%
plot(add.data = T, colors = colors) +
geom_vline(xintercept = 1990, linetype = "dashed") +
geom_text(label = "Fall of USSR", x = 1985, y = 80, mapping = aes(color = NULL, group = 1, fill = NULL)) +
scale_color_discrete("Country") +
guides(color = guide_legend(override.aes = aes(label = "", alpha = 0))) +
scale_x_continuous(breaks = seq(1950, 3000, by = 10)) +
scale_y_continuous("Life expectancy") +
ggtitle("Actual communism is bad for your health") +
theme_set(theme_bw())
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p3 = ggpredict(models$rcs, terms = c("Year", "Entity")) %>%
plot(add.data = T, colors = colors) +
geom_vline(xintercept = 1990, linetype = "dashed") +
geom_text(label = "Fall of USSR", x = 1985, y = 80, mapping = aes(color = NULL, group = 1, fill = NULL)) +
scale_color_discrete("Country") +
guides(color = guide_legend(override.aes = aes(label = "", alpha = 0))) +
scale_x_continuous(breaks = seq(1950, 3000, by = 10)) +
scale_y_continuous("Life expectancy") +
ggtitle("Actual communism is bad for your health") +
theme_set(theme_bw())
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p4 = ggpredict(models$rcs_all, terms = c("Year", "Entity")) %>%
plot(add.data = T, colors = colors) +
geom_vline(xintercept = 1990, linetype = "dashed") +
geom_text(label = "Fall of USSR", x = 1985, y = 80, mapping = aes(color = NULL, group = 1, fill = NULL)) +
scale_color_discrete("Country") +
guides(color = guide_legend(override.aes = aes(label = "", alpha = 0))) +
scale_x_continuous(breaks = seq(1950, 3000, by = 10)) +
scale_y_continuous("Life expectancy") +
ggtitle("Actual communism is bad for your health") +
theme_set(theme_bw())
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p1
GG_save("figs/linear_fit.png")
p2
GG_save("figs/linear_segments_fit.png")
p3
GG_save("figs/spline_fit.png")
p4
GG_save("figs/spline_fit_all.png")
#versions
write_sessioninfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] viridis_0.6.2 viridisLite_0.4.1 Polychrome_1.5.1
## [4] patchwork_1.1.2 ggeffects_1.2.1 rms_6.6-0
## [7] kirkegaard_2023-05-01 psych_2.3.3 assertthat_0.2.1
## [10] weights_1.0.4 Hmisc_5.0-1 magrittr_2.0.3
## [13] lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0
## [16] dplyr_1.1.2 purrr_1.0.1 readr_2.1.4
## [19] tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2
## [22] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] mnormt_2.1.1 gridExtra_2.3 sandwich_3.0-2
## [4] rlang_1.1.0 multcomp_1.4-23 polspline_1.1.22
## [7] compiler_4.3.0 gdata_2.18.0.1 systemfonts_1.0.4
## [10] vctrs_0.6.2 quantreg_5.95 rvest_1.0.3
## [13] crayon_1.5.2 pkgconfig_2.0.3 fastmap_1.1.1
## [16] backports_1.4.1 labeling_0.4.2 utf8_1.2.3
## [19] rmarkdown_2.21 tzdb_0.3.0 haven_2.5.2
## [22] nloptr_2.0.3 ragg_1.2.5 bit_4.0.5
## [25] MatrixModels_0.5-1 xfun_0.39 cachem_1.0.7
## [28] jsonlite_1.8.4 highr_0.10 broom_1.0.4
## [31] parallel_4.3.0 cluster_2.1.4 R6_2.5.1
## [34] bslib_0.4.2 stringi_1.7.12 boot_1.3-28
## [37] rpart_4.1.19 jquerylib_0.1.4 Rcpp_1.0.10
## [40] knitr_1.42 zoo_1.8-12 base64enc_0.1-3
## [43] Matrix_1.5-1 splines_4.3.0 nnet_7.3-18
## [46] timechange_0.2.0 tidyselect_1.2.0 rstudioapi_0.14
## [49] yaml_2.3.7 codetools_0.2-19 plyr_1.8.8
## [52] lattice_0.20-45 withr_2.5.0 evaluate_0.20
## [55] foreign_0.8-82 survival_3.5-3 xml2_1.3.4
## [58] pillar_1.9.0 mice_3.15.0 checkmate_2.2.0
## [61] insight_0.19.1 generics_0.1.3 vroom_1.6.1
## [64] hms_1.1.3 munsell_0.5.0 scales_1.2.1
## [67] minqa_1.2.5 gtools_3.9.4 glue_1.6.2
## [70] scatterplot3d_0.3-43 tools_4.3.0 data.table_1.14.8
## [73] lme4_1.1-33 SparseM_1.81 webshot_0.5.4
## [76] mvtnorm_1.1-3 grid_4.3.0 colorspace_2.1-0
## [79] nlme_3.1-162 htmlTable_2.4.1 Formula_1.2-5
## [82] cli_3.6.1 textshaping_0.3.6 kableExtra_1.3.4
## [85] fansi_1.0.4 svglite_2.1.1 gtable_0.3.3
## [88] sass_0.4.5 digest_0.6.31 TH.data_1.1-2
## [91] farver_2.1.1 htmlwidgets_1.6.2 htmltools_0.5.5
## [94] lifecycle_1.0.3 httr_1.4.5 bit64_4.0.5
## [97] MASS_7.3-58.3