Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## Loading required package: weights
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: assertthat
## 
## Attaching package: 'assertthat'
## The following object is masked from 'package:tibble':
## 
##     has_name
## Loading required package: psych
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## Attaching package: 'kirkegaard'
## The following object is masked from 'package:psych':
## 
##     rescale
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric
## The following object is masked from 'package:base':
## 
##     +

Simulation showing the adjustment method

We simulate datasets where:

  • Reliability of the outcome, y, varies at random. This is controlling by sampling the error standard deviation from uniform distribution [0, 1].
  • Slopes for two predictors, x1 and x2, vary at random. Both sampled from uniform distribution [0, 2].
  • The two predictors are potentially correlated, based on a value drawn from uniform distribution [0, 1].
  • Variables are standardized so to use standardized coefficients.
  • True standardized slopes are computed before adding the error to y.
  • Reliability is calculated and compared with the observed reliability.
  • The betas are adjusted using x/sqrt(theoretical reliability), where x is the beta.
  • Ratios between corrected and true betas are computed, as well as for observed/expected reliability.
  • Sample size is always 10000. The simulation is repeated 1000 times.
set.seed(1)

#run many times, simulate random amounts of error in Y
sim_results = map_df(1:1000, function(i) {
  # browser()
  #randomly choose error sd
  error_sd = runif(1, min = 0, max = 1)
  
  #reliability
  rel = 1 / (1 + error_sd^2)
  
  #simulate potentially correlated predictors
  pred_cor = runif(1)
  x_vals = MASS::mvrnorm(10000, mu = rep(0, 2), Sigma = matrix(c(1, pred_cor, pred_cor, 1), nrow = 2))
  
  #data
  sim = tibble(
    x1 = x_vals[, 1],
    x2 = x_vals[, 2],
    y = (runif(1)*x1 + runif(1)*x2 + rnorm(10000, sd = runif(1, min = 0, max = 2))) %>% standardize(),
    y_ob = (y + rnorm(1000, sd = error_sd)) %>% standardize()
  )
  
  #observed rel
  rel_ob = cor(sim$y, sim$y_ob)^2
  
  #fits
  fit_true = lm(y ~ x1 + x2, data = sim)
  fit_ob = lm(y_ob ~ x1 + x2, data = sim)
  coef_true = coef(fit_true)
  coef_ob = coef(fit_ob)
  
  #results
  tibble(
    error_sd = error_sd,
    rel = rel,
    rel_ob = rel_ob,
    rel_ratio = rel_ob/rel,
    pred_cor = pred_cor,
    true_coef_x1 = coef_true[2],
    true_coef_x2 = coef_true[3],
    coef_x1 = coef_ob[2],
    coef_x2 = coef_ob[3],
    coef_x1_cor = coef_ob[2] / sqrt(rel),
    coef_x2_cor = coef_ob[3] / sqrt(rel),
    coef_x1_ratio = coef_x1_cor / true_coef_x1,
    coef_x2_ratio = coef_x2_cor / true_coef_x2
  )
})

describe2(sim_results)
#plot
GG_scatter(sim_results, "true_coef_x1", "coef_x1_cor")
## `geom_smooth()` using formula 'y ~ x'

Results show that reliability adjustment works perfectly well under these circumstances.

Meta

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kirkegaard_2022-05-06 psych_2.2.3           assertthat_0.2.1     
##  [4] weights_1.0.4         Hmisc_4.7-0           Formula_1.2-4        
##  [7] survival_3.2-13       lattice_0.20-45       magrittr_2.0.3       
## [10] forcats_0.5.1         stringr_1.4.0         dplyr_1.0.9          
## [13] purrr_0.3.4           readr_2.1.2           tidyr_1.2.0          
## [16] tibble_3.1.7          ggplot2_3.3.6         tidyverse_1.3.1      
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-157        fs_1.5.2            lubridate_1.8.0    
##  [4] RColorBrewer_1.1-3  httr_1.4.3          tools_4.2.0        
##  [7] backports_1.4.1     bslib_0.3.1         utf8_1.2.2         
## [10] R6_2.5.1            rpart_4.1.16        mgcv_1.8-40        
## [13] DBI_1.1.2           colorspace_2.0-3    nnet_7.3-17        
## [16] withr_2.5.0         mnormt_2.0.2        tidyselect_1.1.2   
## [19] gridExtra_2.3       compiler_4.2.0      cli_3.3.0          
## [22] rvest_1.0.2         htmlTable_2.4.0     mice_3.14.0        
## [25] xml2_1.3.3          labeling_0.4.2      sass_0.4.1         
## [28] scales_1.2.0        checkmate_2.1.0     digest_0.6.29      
## [31] foreign_0.8-82      minqa_1.2.4         rmarkdown_2.14     
## [34] base64enc_0.1-3     jpeg_0.1-9          pkgconfig_2.0.3    
## [37] htmltools_0.5.2     lme4_1.1-29         highr_0.9          
## [40] dbplyr_2.1.1        fastmap_1.1.0       htmlwidgets_1.5.4  
## [43] rlang_1.0.2         readxl_1.4.0        rstudioapi_0.13    
## [46] farver_2.1.0        jquerylib_0.1.4     generics_0.1.2     
## [49] jsonlite_1.8.0      gtools_3.9.2        Matrix_1.4-1       
## [52] Rcpp_1.0.8.3        munsell_0.5.0       fansi_1.0.3        
## [55] lifecycle_1.0.1     stringi_1.7.6       yaml_2.3.5         
## [58] MASS_7.3-57         grid_4.2.0          parallel_4.2.0     
## [61] gdata_2.18.0        crayon_1.5.1        haven_2.5.0        
## [64] splines_4.2.0       hms_1.1.1           tmvnsim_1.0-2      
## [67] knitr_1.39          pillar_1.7.0        boot_1.3-28        
## [70] reprex_2.0.1        glue_1.6.2          evaluate_0.15      
## [73] latticeExtra_0.6-29 data.table_1.14.2   modelr_0.1.8       
## [76] nloptr_2.0.1        png_0.1-7           vctrs_0.4.1        
## [79] tzdb_0.3.0          cellranger_1.1.0    gtable_0.3.0       
## [82] xfun_0.30           broom_0.8.0         cluster_2.1.3      
## [85] ellipsis_0.3.2