Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
    
)

theme_set(theme_bw())

options(
    digits = 3
)

Analysis

#make a predictor that is more valid for some groups than others
set.seed(1)
error_sds = seq(1, 3, length.out = 10)
group_means = rnorm(10, sd = 1)
group_means[1] = 0
group_n = 1000

#simulate data
d_sim = map_df(1:10, function(group_i) {
  tibble(
    group = group_i,
    true_PGS = rnorm(group_n, mean = group_means[group_i]),
    phenotype = true_PGS + rnorm(group_n, sd = 1),
    obs_PGS = (true_PGS + rnorm(group_n, sd = error_sds[group_i]))
  )
}) %>% mutate(
  group = factor(group)
)

#scatter by group
d_sim %>% 
  ggplot(aes(obs_PGS, phenotype, color = group)) +
  geom_point(alpha = 0.2) +
  geom_smooth() +
  xlab("Observed polygenic score")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

GG_save("PGS_validity_by_group.png")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#correlations by group
d_sim_means = d_sim %>% 
  group_by(group) %>% 
  summarise(
    cor_obs_PGS_phenotype = cor(obs_PGS, phenotype),
    true_PGS = mean(true_PGS),
    phenotype = mean(phenotype),
    obs_PGS = mean(obs_PGS)
  )

d_sim_means %>% 
  GG_scatter("obs_PGS", "phenotype", case_names = "group") +
  xlab("Observed polygenic score mean") +
  ylab("Phenotypic mean")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("group means.png")
## `geom_smooth()` using formula = 'y ~ x'

Meta

#versions
write_sessioninfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kirkegaard_2023-08-04 psych_2.3.6           assertthat_0.2.1     
##  [4] weights_1.0.4         Hmisc_5.1-0           magrittr_2.0.3       
##  [7] lubridate_1.9.2       forcats_1.0.0         stringr_1.5.0        
## [10] dplyr_1.1.2           purrr_1.0.1           readr_2.1.4          
## [13] tidyr_1.3.0           tibble_3.2.1          ggplot2_3.4.2        
## [16] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.0  farver_2.1.1      fastmap_1.1.1     digest_0.6.33    
##  [5] rpart_4.1.19      timechange_0.2.0  lifecycle_1.0.3   cluster_2.1.4    
##  [9] survival_3.5-5    gdata_2.19.0      compiler_4.3.1    rlang_1.1.1      
## [13] sass_0.4.6        tools_4.3.1       utf8_1.2.3        yaml_2.3.7       
## [17] data.table_1.14.8 knitr_1.43        labeling_0.4.2    htmlwidgets_1.6.2
## [21] mnormt_2.1.1      withr_2.5.0       foreign_0.8-82    nnet_7.3-19      
## [25] grid_4.3.1        fansi_1.0.4       jomo_2.7-6        colorspace_2.1-0 
## [29] mice_3.16.0       scales_1.2.1      gtools_3.9.4      iterators_1.0.14 
## [33] MASS_7.3-60       cli_3.6.1         rmarkdown_2.23    ragg_1.2.5       
## [37] generics_0.1.3    rstudioapi_0.15.0 tzdb_0.4.0        minqa_1.2.5      
## [41] cachem_1.0.8      splines_4.3.1     parallel_4.3.1    base64enc_0.1-3  
## [45] vctrs_0.6.3       boot_1.3-28       glmnet_4.1-7      Matrix_1.6-0     
## [49] jsonlite_1.8.7    hms_1.1.3         mitml_0.4-5       Formula_1.2-5    
## [53] htmlTable_2.4.1   systemfonts_1.0.4 foreach_1.5.2     jquerylib_0.1.4  
## [57] glue_1.6.2        nloptr_2.0.3      pan_1.8           codetools_0.2-19 
## [61] stringi_1.7.12    gtable_0.3.3      shape_1.4.6       lme4_1.1-34      
## [65] munsell_0.5.0     pillar_1.9.0      htmltools_0.5.5   R6_2.5.1         
## [69] textshaping_0.3.6 evaluate_0.21     lattice_0.21-8    highr_0.10       
## [73] backports_1.4.1   broom_1.0.5       bslib_0.5.0       Rcpp_1.0.11      
## [77] gridExtra_2.3     nlme_3.1-162      checkmate_2.2.0   mgcv_1.9-0       
## [81] xfun_0.39         pkgconfig_2.0.3