library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
)
theme_set(theme_bw())
options(
digits = 3,
scipen = 5
)
#settings
n_loci = 10000
x_prop = 0.034
n_loci_x = round(n_loci*x_prop)
n_loci_nonx = n_loci - n_loci_x
n_cases_per_sex = 2000
sex = rep(c("female", "male"), each = 2000)
sex_F = (sex == "female")
#simulate data
simulate_data = function() {
#loci effects
loci_freqs = runif(n = n_loci, min = .01, max = .99)
#build matrix
alleles = matrix(NA, nrow = n_cases_per_sex * 2, ncol = n_loci)
#loop across variants to add
for (i in 1:n_loci) {
#is X?
if (i <= n_loci_x) {
#make male and female values separately
alleles[sex_F, i] = rbinom(n_cases_per_sex, size = 2, prob = loci_freqs[i]) / 2
alleles[!sex_F, i] = rbinom(n_cases_per_sex, size = 1, prob = loci_freqs[i])
} else {
alleles[, i] = rbinom(n_cases_per_sex, size = 2, prob = loci_freqs[i])
}
}
alleles
}
#make data
set.seed(1)
d1 = simulate_data()
#get scores, assuming equal effects of all loci
d1_scores = rowSums(d1)
d1_x_scores = rowSums(d1[, 1:n_loci_x])
#summary stats
(d1_x_scores_desc = describe2(d1_x_scores, group = sex))
## New names:
## • `` -> `...1`
(d1_scores_desc = describe2(d1_scores, group = sex))
## New names:
## • `` -> `...1`
#effect sizes in SD ratios
d1_x_scores_desc$sd[2] / d1_x_scores_desc$sd[1]
## [1] 1.46
d1_scores_desc$sd[2] / d1_scores_desc$sd[1]
## [1] 1.01
#test for variance equivalence
var.test(d1_x_scores[sex_F], d1_x_scores[!sex_F])
##
## F test to compare two variances
##
## data: d1_x_scores[sex_F] and d1_x_scores[!sex_F]
## F = 0.5, num df = 1999, denom df = 1999, p-value <2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.431 0.514
## sample estimates:
## ratio of variances
## 0.471
var.test(d1_scores[sex_F], d1_scores[!sex_F])
##
## F test to compare two variances
##
## data: d1_scores[sex_F] and d1_scores[!sex_F]
## F = 1, num df = 1999, denom df = 1999, p-value = 0.7
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.90 1.07
## sample estimates:
## ratio of variances
## 0.983
#figs
tibble(
PGS_X = d1_x_scores,
sex = sex
) %>%
GG_denhist("PGS_X", "sex") +
scale_x_continuous("Polygenic score of variants on X chromosome")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_save("figs/X_PGS_dist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
tibble(
PGS = d1_scores,
sex = sex
) %>%
GG_denhist("PGS", "sex") +
scale_x_continuous("Polygenic score of all variants")
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_save("figs/PGS_dist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#repeat experiment 100 times
set.seed(1)
sim_results = map_df(1:100, function(sim) {
# browser()
message(sim)
#make data
d1 = simulate_data()
#get scores, assuming equal effects of all loci
d1_scores = rowSums(d1)
d1_x_scores = rowSums(d1[, 1:n_loci_x])
tibble(
sim = sim,
sd_ratio_x = sd(d1_x_scores[!sex_F]) / sd(d1_x_scores[sex_F]),
sd_ratio = sd(d1_scores[!sex_F]) / sd(d1_scores[sex_F]),
var_ratio_test_x = var.test(d1_x_scores[sex_F], d1_x_scores[!sex_F])$p.value,
var_ratio_test = var.test(d1_scores[sex_F], d1_scores[!sex_F])$p.value
)
})
## 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23
## 24
## 25
## 26
## 27
## 28
## 29
## 30
## 31
## 32
## 33
## 34
## 35
## 36
## 37
## 38
## 39
## 40
## 41
## 42
## 43
## 44
## 45
## 46
## 47
## 48
## 49
## 50
## 51
## 52
## 53
## 54
## 55
## 56
## 57
## 58
## 59
## 60
## 61
## 62
## 63
## 64
## 65
## 66
## 67
## 68
## 69
## 70
## 71
## 72
## 73
## 74
## 75
## 76
## 77
## 78
## 79
## 80
## 81
## 82
## 83
## 84
## 85
## 86
## 87
## 88
## 89
## 90
## 91
## 92
## 93
## 94
## 95
## 96
## 97
## 98
## 99
## 100
#overall results
describe2(sim_results[-1]) %>% df_round(3)
#versions
write_sessioninfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kirkegaard_2023-08-04 psych_2.3.6 assertthat_0.2.1
## [4] weights_1.0.4 Hmisc_5.1-0 magrittr_2.0.3
## [7] lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0
## [10] dplyr_1.1.2 purrr_1.0.1 readr_2.1.4
## [13] tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2
## [16] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.0 farver_2.1.1 fastmap_1.1.1 digest_0.6.33
## [5] rpart_4.1.19 timechange_0.2.0 lifecycle_1.0.3 cluster_2.1.4
## [9] survival_3.5-5 gdata_2.19.0 compiler_4.3.1 rlang_1.1.1
## [13] sass_0.4.6 tools_4.3.1 utf8_1.2.3 yaml_2.3.7
## [17] data.table_1.14.8 knitr_1.43 labeling_0.4.2 htmlwidgets_1.6.2
## [21] mnormt_2.1.1 plyr_1.8.8 withr_2.5.0 foreign_0.8-82
## [25] nnet_7.3-19 grid_4.3.1 fansi_1.0.4 jomo_2.7-6
## [29] colorspace_2.1-0 mice_3.16.0 scales_1.2.1 gtools_3.9.4
## [33] iterators_1.0.14 MASS_7.3-60 cli_3.6.1 rmarkdown_2.23
## [37] ragg_1.2.5 generics_0.1.3 rstudioapi_0.15.0 tzdb_0.4.0
## [41] minqa_1.2.5 cachem_1.0.8 splines_4.3.1 parallel_4.3.1
## [45] base64enc_0.1-3 vctrs_0.6.3 boot_1.3-28 glmnet_4.1-7
## [49] Matrix_1.6-0 jsonlite_1.8.7 hms_1.1.3 mitml_0.4-5
## [53] Formula_1.2-5 htmlTable_2.4.1 systemfonts_1.0.4 foreach_1.5.2
## [57] jquerylib_0.1.4 glue_1.6.2 nloptr_2.0.3 pan_1.8
## [61] codetools_0.2-19 stringi_1.7.12 gtable_0.3.3 shape_1.4.6
## [65] lme4_1.1-34 munsell_0.5.0 pillar_1.9.0 htmltools_0.5.5
## [69] R6_2.5.1 textshaping_0.3.6 evaluate_0.21 lattice_0.21-8
## [73] highr_0.10 backports_1.4.1 broom_1.0.5 bslib_0.5.0
## [77] Rcpp_1.0.11 gridExtra_2.3 nlme_3.1-162 checkmate_2.2.0
## [81] xfun_0.39 pkgconfig_2.0.3
#write data to file for reuse
# d %>% write_rds("data/data_for_reuse.rds")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}