options(digits = 2)
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
googlesheets4,
ggrepel,
rms,
DT
)
theme_set(theme_bw())
gs4_deauth()
d = read_sheet("https://docs.google.com/spreadsheets/d/1wo-5tx1zkXGqlouD3jkWLBe8Cs7qpBtYxXw5UCzs4Bc/edit#gid=0") %>%
df_legalize_names()
## ✔ Reading from
## "Assisted reproductive technology in Europe: results generated from European registers by ESHRE".
## ✔ Range 'data'.
d$ART = d$ARTpct %>% map_dbl(function(x) {
if (is.null(x)) return(NA_real_)
as.numeric(x)
})
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
#ISO
d$ISO = d$Country %>%
mapvalues(c("The UK"), c("UK")) %>%
pu_translate()
## No exact match: Switzerlandb
## No exact match: Czech Republic
## No exact match: The Netherlands
## No exact match: The Nederlands
## No exact match: Bosnia–Herzegovina
## No exact match: Bosnia-Herzegovina, Federation part
## No exact match: Armemia
## No exact match: North-Macedonia
## Best fuzzy match found: Switzerlandb -> Switzerland with distance 1.00
## Best fuzzy match found: Czech Republic -> Czech Republic with distance 1.00
## Best fuzzy match found: The Netherlands -> The Netherlands with distance 1.00
## Best fuzzy match found: The Nederlands -> The Netherlands with distance 2.00
## Best fuzzy match found: Bosnia–Herzegovina -> Bosnia-Herzegovina with distance 1.00
## Best fuzzy match found: Bosnia-Herzegovina, Federation part -> Bosnia-Herzegovina with distance 17.00
## Best fuzzy match found: Armemia -> Armenia with distance 1.00
## Best fuzzy match found: North-Macedonia -> North Macedonia with distance 1.00
d$Country_clean = d$ISO %>% pu_translate(reverse = T)
What is the yearly growth rate within countries?
#across all data
ols(ART ~ Year, data = d)
## Frequencies of Missing Values Due to Each Variable
## ART Year
## 113 21
##
## Linear Regression Model
##
## ols(formula = ART ~ Year, data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 354 LR chi2 24.07 R2 0.066
## sigma1.5626 d.f. 1 R2 adj 0.063
## d.f. 352 Pr(> chi2) 0.0000 g 0.472
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.2051 -1.0178 -0.2375 1.0420 5.9949
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -133.6025 27.3989 -4.88 <0.0001
## Year 0.0678 0.0136 4.98 <0.0001
#fixed effects
ols(ART ~ Year + Country_clean, data = d)
## Frequencies of Missing Values Due to Each Variable
## ART Year Country_clean
## 113 21 21
##
## Linear Regression Model
##
## ols(formula = ART ~ Year + Country_clean, data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 354 LR chi2 705.72 R2 0.864
## sigma0.6337 d.f. 41 R2 adj 0.846
## d.f. 312 Pr(> chi2) 0.0000 g 1.684
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.946649 -0.260273 -0.005097 0.234202 3.414014
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -239.1345 13.4268 -17.81 <0.0001
## Year 0.1193 0.0067 17.84 <0.0001
## Country_clean=Armenia 0.3528 0.7127 0.50 0.6209
## Country_clean=Austria 2.2466 0.6636 3.39 0.0008
## Country_clean=Belarus -0.4925 0.6981 -0.71 0.4810
## Country_clean=Belgium 3.0853 0.6518 4.73 <0.0001
## Country_clean=Bosnia & Herzegovina -1.0932 0.8987 -1.22 0.2248
## Country_clean=Bulgaria 1.0665 0.7788 1.37 0.1718
## Country_clean=Croatia 1.4548 0.7085 2.05 0.0409
## Country_clean=Czech Republic 3.9063 0.6699 5.83 <0.0001
## Country_clean=Denmark 4.3231 0.6482 6.67 <0.0001
## Country_clean=Estonia 3.2749 0.6706 4.88 <0.0001
## Country_clean=Finland 2.7300 0.6488 4.21 <0.0001
## Country_clean=France 1.4850 0.6504 2.28 0.0231
## Country_clean=Germany 1.3997 0.6683 2.09 0.0370
## Country_clean=Greece 4.4375 0.7796 5.69 <0.0001
## Country_clean=Hungary 1.4947 0.7319 2.04 0.0420
## Country_clean=Iceland 3.1762 0.6482 4.90 <0.0001
## Country_clean=Ireland -0.8125 0.7796 -1.04 0.2981
## Country_clean=Italy 0.9648 0.6593 1.46 0.1444
## Country_clean=Kazakhstan 0.7542 0.7355 1.03 0.3060
## Country_clean=Latvia 0.3983 0.7763 0.51 0.6083
## Country_clean=Lithuania -0.9923 0.7125 -1.39 0.1647
## Country_clean=Luxembourg 1.7682 0.7362 2.40 0.0169
## Country_clean=Macedonia 1.4289 0.6653 2.15 0.0325
## Country_clean=Malta -0.4362 0.6881 -0.63 0.5266
## Country_clean=Moldova 0.1187 0.7095 0.17 0.8672
## Country_clean=Montenegro 0.7848 0.6663 1.18 0.2398
## Country_clean=Netherlands 1.8106 0.6729 2.69 0.0075
## Country_clean=Norway 2.5357 0.6545 3.87 0.0001
## Country_clean=Poland 0.0991 0.6977 0.14 0.8872
## Country_clean=Portugal 1.1384 0.6615 1.72 0.0862
## Country_clean=Romania -0.7028 0.7120 -0.99 0.3243
## Country_clean=Russia 0.3875 0.6981 0.56 0.5792
## Country_clean=Serbia -1.2077 0.7122 -1.70 0.0909
## Country_clean=Slovenia 3.9353 0.6518 6.04 <0.0001
## Country_clean=Spain 6.2675 0.6981 8.98 <0.0001
## Country_clean=Sweden 2.9239 0.6495 4.50 <0.0001
## Country_clean=Switzerland 1.2416 0.6578 1.89 0.0600
## Country_clean=Turkey 0.1017 0.7763 0.13 0.8959
## Country_clean=Ukraine 0.5778 0.7127 0.81 0.4181
## Country_clean=United Kingdom 1.4912 0.6497 2.30 0.0224
#standard approach
d %>%
filter(!is.na(Country_clean)) %>%
ggplot(aes(Year, ART/100, color = Country_clean)) +
geom_path() +
scale_x_continuous(breaks = seq(0, 10000, by = 2)) +
scale_y_continuous("Babies born with help from any assisted reproductive technology", labels = scales::percent) +
scale_color_discrete("") +
theme(legend.position="bottom") +
ggtitle("Use of assisted reproductive technology",
"Data from European Society of Human Reproduction and Embryology (ESHRE)")
## Warning: Removed 58 rows containing missing values (`geom_path()`).
#labels at end of line
d_last_year_data_for_country = plyr::ddply(d, c("Country_clean"), function(dd) {
# browser()
#last year of data for this country
dd %>% filter(!is.na(ART)) %>% arrange(Year) %>% last()
})
d %>%
filter(!is.na(Country_clean), !is.na(ART)) %>%
ggplot(aes(Year, ART/100, color = Country_clean)) +
geom_line() +
geom_point(size = 3) +
geom_text_repel(data = d_last_year_data_for_country, aes(label = Country_clean), nudge_x = 1) +
coord_cartesian(xlim = c(min(d$Year, na.rm = T), max(d$Year, na.rm = T))) +
scale_x_continuous(breaks = seq(0, 10000, by = 2)) +
scale_y_continuous("% of babies born with ART", labels = scales::percent) +
scale_color_discrete(guide = F) +
ggtitle("Use of assisted reproductive technology",
"Data from European Society of Human Reproduction and Embryology (ESHRE)")
## Warning: Removed 2 rows containing missing values (`geom_text_repel()`).
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
GG_save("figs/timeline.png")
## Warning: Removed 2 rows containing missing values (`geom_text_repel()`).
d %>% select(Country_clean, ISO, Year, ART) %>% DT::datatable()
write_sessioninfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DT_0.28 rms_6.7-0 ggrepel_0.9.3
## [4] googlesheets4_1.1.1 kirkegaard_2023-08-04 psych_2.3.6
## [7] assertthat_0.2.1 weights_1.0.4 Hmisc_5.1-0
## [10] magrittr_2.0.3 lubridate_1.9.2 forcats_1.0.0
## [13] stringr_1.5.0 dplyr_1.1.2 purrr_1.0.1
## [16] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
## [19] ggplot2_3.4.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] mnormt_2.1.1 gridExtra_2.3 readxl_1.4.3
## [4] sandwich_3.0-2 rlang_1.1.1 multcomp_1.4-25
## [7] polspline_1.1.23 compiler_4.3.1 gdata_2.19.0
## [10] systemfonts_1.0.4 vctrs_0.6.3 quantreg_5.95
## [13] crayon_1.5.2 pkgconfig_2.0.3 shape_1.4.6
## [16] fastmap_1.1.1 ellipsis_0.3.2 backports_1.4.1
## [19] labeling_0.4.2 utf8_1.2.3 rmarkdown_2.23
## [22] tzdb_0.4.0 nloptr_2.0.3 ragg_1.2.5
## [25] MatrixModels_0.5-2 xfun_0.39 glmnet_4.1-7
## [28] jomo_2.7-6 cachem_1.0.8 jsonlite_1.8.7
## [31] highr_0.10 pan_1.8 broom_1.0.5
## [34] parallel_4.3.1 cluster_2.1.4 R6_2.5.1
## [37] bslib_0.5.0 stringi_1.7.12 boot_1.3-28
## [40] rpart_4.1.19 jquerylib_0.1.4 cellranger_1.1.0
## [43] Rcpp_1.0.11 iterators_1.0.14 knitr_1.43
## [46] zoo_1.8-12 base64enc_0.1-3 Matrix_1.6-0
## [49] splines_4.3.1 nnet_7.3-19 timechange_0.2.0
## [52] tidyselect_1.2.0 stringdist_0.9.10 rstudioapi_0.15.0
## [55] yaml_2.3.7 codetools_0.2-19 curl_5.0.1
## [58] plyr_1.8.8 lattice_0.21-8 withr_2.5.0
## [61] evaluate_0.21 foreign_0.8-82 survival_3.5-5
## [64] pillar_1.9.0 mice_3.16.0 checkmate_2.2.0
## [67] foreach_1.5.2 generics_0.1.3 hms_1.1.3
## [70] munsell_0.5.0 scales_1.2.1 minqa_1.2.5
## [73] gtools_3.9.4 glue_1.6.2 tools_4.3.1
## [76] data.table_1.14.8 lme4_1.1-34 SparseM_1.81
## [79] fs_1.6.2 mvtnorm_1.2-2 grid_4.3.1
## [82] crosstalk_1.2.0 colorspace_2.1-0 nlme_3.1-162
## [85] htmlTable_2.4.1 googledrive_2.1.1 Formula_1.2-5
## [88] cli_3.6.1 textshaping_0.3.6 fansi_1.0.4
## [91] gargle_1.5.1 gtable_0.3.3 sass_0.4.6
## [94] digest_0.6.33 TH.data_1.1-2 farver_2.1.1
## [97] htmlwidgets_1.6.2 htmltools_0.5.5 lifecycle_1.0.3
## [100] httr_1.4.6 mitml_0.4-5 MASS_7.3-60