Study

https://www.biorxiv.org/content/10.1101/2024.09.14.613021v1

Download the data from the supplementary materials.

Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  readxl,
  patchwork
)

theme_set(theme_bw())

options(
    digits = 3
)

Data

reich = read_excel("data/media-2.xlsx", sheet = 5) %>% 
  df_legalize_names()

Analysis

Sibling vs. between

sib = reich %>% 
  filter(
    source == "Howe et al. 2022 (PMID: 35534559)"
  ) %>% 
  select(
    phenotype_name,
    gamma_beta_with_HLA, gamma_z_beta_with_HLA, gamma_sign_beta_with_HLA, gamma_z_sign_beta_with_HLA, rs, rs_z,
    note_Howe_et_al_2022
  ) %>% 
  pivot_longer(
    cols = c(gamma_beta_with_HLA, gamma_z_beta_with_HLA, gamma_sign_beta_with_HLA, gamma_z_sign_beta_with_HLA, rs, rs_z),
    names_to = "metric",
    values_to = "value"
  ) %>% 
  pivot_wider(
    names_from = note_Howe_et_al_2022,
    values_from = value
  ) %>% 
  df_legalize_names()

#plots
#gamma
p_sib_gamma = sib %>% 
  filter(metric == "gamma_beta_with_HLA") %>%
  GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
  labs(
    x = "Population estimate",
    y = "Sibling estimate",
    title = "Sibling vs. population estimates of gamma"
  )

#gamma z
p_sib_gamma_z = sib %>% 
  filter(metric == "gamma_z_beta_with_HLA") %>%
  GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
  labs(
    x = "Population estimate",
    y = "Sibling estimate",
    title = "Sibling vs. population estimates of gamma z"
  )

#gamma sign
p_sib_gamma_sign = sib %>% 
  filter(metric == "gamma_sign_beta_with_HLA") %>%
  GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
  labs(
    x = "Population estimate",
    y = "Sibling estimate",
    title = "Sibling vs. population estimates of gamma sign"
  )

#gamma z sign
p_sib_gamma_z_sign = sib %>% 
  filter(metric == "gamma_z_sign_beta_with_HLA") %>%
  GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
  labs(
    x = "Population estimate",
    y = "Sibling estimate",
    title = "Sibling vs. population estimates of gamma sign z"
  )

#rs
p_sib_rs = sib %>% 
  filter(metric == "rs") %>%
  GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
  labs(
    x = "Population estimate",
    y = "Sibling estimate",
    title = "Sibling vs. population estimates of rs"
  )

#rs z
p_sib_rs_z = sib %>% 
  filter(metric == "rs_z") %>%
  GG_scatter("Population_estimate", "Sibling_estimate", case_names = "phenotype_name") +
  labs(
    x = "Population estimate",
    y = "Sibling estimate",
    title = "Sibling vs. population estimates of rs z"
  )

#plot togeher
p_sib = patchwork::wrap_plots(p_sib_gamma, p_sib_gamma_z, p_sib_gamma_sign, p_sib_gamma_z_sign, p_sib_rs, p_sib_rs_z, nrow = 3)

p_sib

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/sibling GWAS vs. population estimates.png")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#the average correlation of the metrics
sib_cors = df_to_ldf(sib, by = "metric", remove_by = F) %>% 
  map_dfr(\(x) {
    cor.test(x$Population_estimate, x$Sibling_estimate) %>% 
      broom::tidy() %>% 
      mutate(
        metric = x$metric[1]
      )
  })

sib_cors$estimate %>% 
  describe2()

Meta

#versions
write_sessioninfo()

## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.2.0       readxl_1.4.3          kirkegaard_2024-09-17
##  [4] psych_2.4.6.26        assertthat_0.2.1      weights_1.0.4        
##  [7] Hmisc_5.1-3           magrittr_2.0.3        lubridate_1.9.3      
## [10] forcats_1.0.0         stringr_1.5.1         dplyr_1.1.4          
## [13] purrr_1.0.2           readr_2.1.5           tidyr_1.3.1          
## [16] tibble_3.2.1          ggplot2_3.5.1         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1  farver_2.1.2      fastmap_1.2.0     digest_0.6.37    
##  [5] rpart_4.1.23      timechange_0.3.0  lifecycle_1.0.4   cluster_2.1.6    
##  [9] survival_3.7-0    gdata_3.0.0       compiler_4.4.1    rlang_1.1.4      
## [13] sass_0.4.9        tools_4.4.1       utf8_1.2.4        yaml_2.3.10      
## [17] data.table_1.16.0 knitr_1.48        labeling_0.4.3    htmlwidgets_1.6.4
## [21] mnormt_2.1.1      withr_3.0.1       foreign_0.8-86    nnet_7.3-19      
## [25] grid_4.4.1        fansi_1.0.6       jomo_2.7-6        colorspace_2.1-1 
## [29] mice_3.16.0       scales_1.3.0      gtools_3.9.5      iterators_1.0.14 
## [33] MASS_7.3-61       cli_3.6.3         rmarkdown_2.28    ragg_1.3.2       
## [37] generics_0.1.3    rstudioapi_0.16.0 tzdb_0.4.0        minqa_1.2.8      
## [41] cachem_1.1.0      splines_4.4.1     parallel_4.4.1    cellranger_1.1.0 
## [45] base64enc_0.1-3   vctrs_0.6.5       boot_1.3-30       glmnet_4.1-8     
## [49] Matrix_1.6-5      jsonlite_1.8.8    hms_1.1.3         mitml_0.4-5      
## [53] Formula_1.2-5     htmlTable_2.4.3   systemfonts_1.1.0 foreach_1.5.2    
## [57] jquerylib_0.1.4   glue_1.7.0        nloptr_2.1.1      pan_1.9          
## [61] codetools_0.2-19  stringi_1.8.4     gtable_0.3.5      shape_1.4.6.1    
## [65] lme4_1.1-35.5     munsell_0.5.1     pillar_1.9.0      htmltools_0.5.8.1
## [69] R6_2.5.1          textshaping_0.4.0 evaluate_0.24.0   lattice_0.22-5   
## [73] highr_0.11        backports_1.5.0   broom_1.0.6       bslib_0.8.0      
## [77] Rcpp_1.0.13       gridExtra_2.3     nlme_3.1-165      checkmate_2.3.2  
## [81] mgcv_1.9-1        xfun_0.47         pkgconfig_2.0.3

#write data to file for reuse
# d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}

Reanalysis of Reich 2024 ancient genomes selection paper (Akbari et al)

24 September, 2024

Study

Init

Data

Analysis

Sibling vs. between

Meta