Download the data from the supplementary materials.
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
readxl,
patchwork
)
theme_set(theme_bw())
options(
digits = 3
)
reich = read_excel("data/media-2.xlsx", sheet = 5) %>%
df_legalize_names()
sib = reich %>%
filter(
source == "Howe et al. 2022 (PMID: 35534559)"
) %>%
select(
phenotype_name,
gamma_beta_with_HLA, gamma_z_beta_with_HLA, gamma_sign_beta_with_HLA, gamma_z_sign_beta_with_HLA, rs, rs_z,
note_Howe_et_al_2022
) %>%
pivot_longer(
cols = c(gamma_beta_with_HLA, gamma_z_beta_with_HLA, gamma_sign_beta_with_HLA, gamma_z_sign_beta_with_HLA, rs, rs_z),
names_to = "metric",
values_to = "value"
) %>%
pivot_wider(
names_from = note_Howe_et_al_2022,
values_from = value
) %>%
df_legalize_names()
#plots
#gamma
p_sib_gamma = sib %>%
filter(metric == "gamma_beta_with_HLA") %>%
GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
labs(
x = "Population estimate",
y = "Sibling estimate",
title = "Sibling vs. population estimates of gamma"
)
#gamma z
p_sib_gamma_z = sib %>%
filter(metric == "gamma_z_beta_with_HLA") %>%
GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
labs(
x = "Population estimate",
y = "Sibling estimate",
title = "Sibling vs. population estimates of gamma z"
)
#gamma sign
p_sib_gamma_sign = sib %>%
filter(metric == "gamma_sign_beta_with_HLA") %>%
GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
labs(
x = "Population estimate",
y = "Sibling estimate",
title = "Sibling vs. population estimates of gamma sign"
)
#gamma z sign
p_sib_gamma_z_sign = sib %>%
filter(metric == "gamma_z_sign_beta_with_HLA") %>%
GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
labs(
x = "Population estimate",
y = "Sibling estimate",
title = "Sibling vs. population estimates of gamma sign z"
)
#rs
p_sib_rs = sib %>%
filter(metric == "rs") %>%
GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
labs(
x = "Population estimate",
y = "Sibling estimate",
title = "Sibling vs. population estimates of rs"
)
#rs z
p_sib_rs_z = sib %>%
filter(metric == "rs_z") %>%
GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
labs(
x = "Population estimate",
y = "Sibling estimate",
title = "Sibling vs. population estimates of rs z"
)
#plot togeher
p_sib = patchwork::wrap_plots(p_sib_gamma, p_sib_gamma_z, p_sib_gamma_sign, p_sib_gamma_z_sign, p_sib_rs, p_sib_rs_z, nrow = 3)
p_sib
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/sibling GWAS vs. population estimates.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#the average correlation of the metrics
sib_cors = df_to_ldf(sib, by = "metric", remove_by = F) %>%
map_dfr(\(x) {
cor.test(x$Population_estimate, x$Sibling_estimate) %>%
broom::tidy() %>%
mutate(
metric = x$metric[1]
)
})
sib_cors$estimate %>%
describe2()
#versions
write_sessioninfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.2.0 readxl_1.4.3 kirkegaard_2024-09-17
## [4] psych_2.4.6.26 assertthat_0.2.1 weights_1.0.4
## [7] Hmisc_5.1-3 magrittr_2.0.3 lubridate_1.9.3
## [10] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [13] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [16] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 farver_2.1.2 fastmap_1.2.0 digest_0.6.37
## [5] rpart_4.1.23 timechange_0.3.0 lifecycle_1.0.4 cluster_2.1.6
## [9] survival_3.7-0 gdata_3.0.0 compiler_4.4.1 rlang_1.1.4
## [13] sass_0.4.9 tools_4.4.1 utf8_1.2.4 yaml_2.3.10
## [17] data.table_1.16.0 knitr_1.48 labeling_0.4.3 htmlwidgets_1.6.4
## [21] mnormt_2.1.1 withr_3.0.1 foreign_0.8-86 nnet_7.3-19
## [25] grid_4.4.1 fansi_1.0.6 jomo_2.7-6 colorspace_2.1-1
## [29] mice_3.16.0 scales_1.3.0 gtools_3.9.5 iterators_1.0.14
## [33] MASS_7.3-61 cli_3.6.3 rmarkdown_2.28 ragg_1.3.2
## [37] generics_0.1.3 rstudioapi_0.16.0 tzdb_0.4.0 minqa_1.2.8
## [41] cachem_1.1.0 splines_4.4.1 parallel_4.4.1 cellranger_1.1.0
## [45] base64enc_0.1-3 vctrs_0.6.5 boot_1.3-30 glmnet_4.1-8
## [49] Matrix_1.6-5 jsonlite_1.8.8 hms_1.1.3 mitml_0.4-5
## [53] Formula_1.2-5 htmlTable_2.4.3 systemfonts_1.1.0 foreach_1.5.2
## [57] jquerylib_0.1.4 glue_1.7.0 nloptr_2.1.1 pan_1.9
## [61] codetools_0.2-19 stringi_1.8.4 gtable_0.3.5 shape_1.4.6.1
## [65] lme4_1.1-35.5 munsell_0.5.1 pillar_1.9.0 htmltools_0.5.8.1
## [69] R6_2.5.1 textshaping_0.4.0 evaluate_0.24.0 lattice_0.22-5
## [73] highr_0.11 backports_1.5.0 broom_1.0.6 bslib_0.8.0
## [77] Rcpp_1.0.13 gridExtra_2.3 nlme_3.1-165 checkmate_2.3.2
## [81] mgcv_1.9-1 xfun_0.47 pkgconfig_2.0.3
#write data to file for reuse
# d %>% write_rds("data/data_for_reuse.rds")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}