options(digits = 2)
library(pacman)
p_load(kirkegaard, DT)
#vcf of snps
tg_snps = read_vcf("data/tg_snps.vcf")
#tg meta
tg_meta = read_tsv(str_glue("data/tg_samples.tsv")) %>% df_legalize_names()
## Parsed with column specification:
## cols(
## `Sample name` = col_character(),
## Sex = col_character(),
## `Biosample ID` = col_character(),
## `Population code` = col_character(),
## `Population name` = col_character(),
## `Superpopulation code` = col_character(),
## `Superpopulation name` = col_character(),
## `Data collections` = col_character()
## )
tg_meta %<>% filter(Sample_name %in% tg_snps$id)
#join
d = full_join(tg_snps, tg_meta, by = c("id" = "Sample_name"))
## Warning: Column `id`/`Sample_name` has different attributes on LHS and RHS of
## join
freqs_superpops = plyr::ddply(d, "Superpopulation_name", function(dd) {
#frequency of each snp
snps = names(dd) %>% str_subset("^rs")
snp_freqs = map(snps, ~wtd_mean(dd[[.]] / 2))
names(snp_freqs) = snps
#data frame
snp_freqs %>% as.list %>% as_tibble
})
freqs_superpops %>% df_round(3)
freqs_pops = plyr::ddply(d, "Population_name", function(dd) {
#frequency of each snp
snps = names(dd) %>% str_subset("^rs")
snp_freqs = map(snps, ~wtd_mean(dd[[.]] / 2))
names(snp_freqs) = snps
#data frame
snp_freqs %>% as.list %>% as_tibble
})
freqs_pops %>% df_round(3)
write_tsv(freqs_pops, "data/out/freqs_pops.tsv")
write_tsv(freqs_superpops, "data/out/freqs_superpops.tsv")
write_sessioninfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DT_0.12 kirkegaard_2018.05 metafor_2.1-0 Matrix_1.2-18
## [5] psych_1.9.12.31 magrittr_1.5 assertthat_0.2.1 weights_1.0.1
## [9] mice_3.8.0 gdata_2.18.0 Hmisc_4.3-1 Formula_1.2-3
## [13] survival_3.1-8 lattice_0.20-38 forcats_0.4.0 stringr_1.4.0
## [17] dplyr_0.8.4 purrr_0.3.3 readr_1.3.1 tidyr_1.0.2
## [21] tibble_2.1.3 ggplot2_3.2.1 tidyverse_1.3.0 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-144 fs_1.3.1 lubridate_1.7.4
## [4] RColorBrewer_1.1-2 httr_1.4.1 tools_3.6.2
## [7] backports_1.1.5 R6_2.4.1 rpart_4.1-15
## [10] DBI_1.1.0 lazyeval_0.2.2 colorspace_1.4-1
## [13] nnet_7.3-12 withr_2.1.2 tidyselect_1.0.0
## [16] gridExtra_2.3 mnormt_1.5-6 compiler_3.6.2
## [19] cli_2.0.2 rvest_0.3.5 htmlTable_1.13.3
## [22] xml2_1.2.2 scales_1.1.0 checkmate_2.0.0
## [25] digest_0.6.25 foreign_0.8-75 rmarkdown_2.1
## [28] base64enc_0.1-3 jpeg_0.1-8.1 pkgconfig_2.0.3
## [31] htmltools_0.4.0 dbplyr_1.4.2 htmlwidgets_1.5.1
## [34] rlang_0.4.4 readxl_1.3.1 rstudioapi_0.11
## [37] generics_0.0.2 jsonlite_1.6.1 gtools_3.8.1
## [40] acepack_1.4.1 Rcpp_1.0.3 munsell_0.5.0
## [43] fansi_0.4.1 lifecycle_0.1.0 stringi_1.4.6
## [46] yaml_2.2.1 plyr_1.8.5 grid_3.6.2
## [49] parallel_3.6.2 crayon_1.3.4 haven_2.2.0
## [52] splines_3.6.2 hms_0.5.3 knitr_1.28
## [55] pillar_1.4.3 reprex_0.3.0 glue_1.3.1
## [58] evaluate_0.14 latticeExtra_0.6-29 data.table_1.12.8
## [61] modelr_0.1.6 png_0.1-7 vctrs_0.2.3
## [64] cellranger_1.1.0 gtable_0.3.0 xfun_0.12
## [67] broom_0.5.5 cluster_2.1.0 ellipsis_0.3.0