Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  MASS,
  broom,
  furrr,
  future,
  googlesheets4,
  conflicted
)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: future
theme_set(theme_bw())

options(
    digits = 3
)

n_samples = 200

plan(sequential)
plan(multisession)
plan()
## multisession:
## - args: function (..., workers = availableCores(), lazy = FALSE, rscript_libs = .libPaths(), envir = parent.frame())
## - tweaked: FALSE
## - call: plan(multisession)
conflicts_prefer(dplyr::select)
## [conflicted] Will prefer dplyr::select over any other package.
conflicts_prefer(dplyr::filter)
## [conflicted] Will prefer dplyr::filter over any other package.
conflicts_prefer(kirkegaard::`+`)
## [conflicted] Will prefer kirkegaard::`+` over any other package.

Analysis

Function

simulator = function(x1_beta = 0.5, x2_beta = 0.2, x1x2_beta = 0.1, x1x2_cor = 0, n_samples, skip_reg = F) {
  # browser()
  #make correlated IVs
  x1x2 = MASS::mvrnorm(
    n = n_samples,
    mu = c(0, 0),
    Sigma = matrix(c(1, x1x2_cor, x1x2_cor, 1), nrow = 2)
    )
  
  #make other variables
  d1 = tibble(
    x1 = x1x2[, 1],
    x2 = x1x2[, 2],
    y = x1*x1_beta + x2 * x2_beta + x1*x2*x1x2_beta + rnorm(n_samples),
    y_z = standardize(y),
    x1_y = x1*y_z,
    x2_y = x2*y_z
  )
  
  #ols
  ols = lm(y ~ x1*x2, data = d1) %>% summary() %>% tidy()
  
  #cpem correlations
  cpem1_r = d1 %$% cor.test(x2, x1_y)
  cpem2_r = d1 %$% cor.test(x1, x2_y)
  
  #save results
  y = tibble(
    #pars
    x1_beta = x1_beta,
    x2_beta = x2_beta,
    x1x2_beta = x1x2_beta,
    x1x2_cor = x1x2_cor,
    n_samples = n_samples,
    
    #results
    ols_beta = ols$estimate[4],
    ols_p = ols$p.value[4],
    cpem1_r_beta = cpem1_r[["estimate"]],
    cpem1_r_p = cpem1_r[["p.value"]],
    cpem2_r_beta = cpem2_r[["estimate"]],
    cpem2_r_p = cpem2_r[["p.value"]]
    

  )
  
  #these are the same as intercept free model
  if (!skip_reg) {
    cpem1 = d1 %$% lm(x2 ~ 0 + x1_y) %>% summary() %>% tidy()
    cpem2 = d1 %$% lm(x1 ~ 0 + x2_y) %>% summary() %>% tidy()
    cpem1_rev = d1 %$% lm(x1_y ~ 0 + x2) %>% summary() %>% tidy()
    cpem2_rev = d1 %$% lm(x2_y ~ 0 + x1) %>% summary() %>% tidy()
    
    y$cpem1_reg_beta = cpem1$estimate
    y$cpem1_reg_p = cpem1$p.value
    y$cpem2_reg_beta = cpem2$estimate
    y$cpem2_reg_p = cpem2$p.value
    
    y$cpem1_reg_rev_beta = cpem1_rev$estimate
    y$cpem1_reg_rev_p = cpem1_rev$p.value
    y$cpem2_reg_rev_beta = cpem2_rev$estimate
    y$cpem2_reg_rev_p = cpem2_rev$p.value
  }
  
  y
}

#examples
set.seed(1)
simulator(n = 200)
simulator(n = 200)
simulator(n = 500)
simulator(n = 1000)
simulator(n = 2000)
simulator(n = 5000)
simulator(n = 10000)
set.seed(1)
simulator(n = 10e6) %>% select(matches("_beta"))

Simulations

#simulation par grid
sim2_pars = expand_grid(
  n = c(200, 500, 1000, 2000, 5000, 10000),
  x1x2_cor = c(0, 0.2, 0.5, 0.8),
  x1x2_beta = c(0, 0.1, 0.2, 0.3),
  x1_beta = c(0.2, 0.5, 0.8),
  x2_beta = c(-0.2, 0, 0.2),
  
  run = seq(100)
)

set.seed(1)
results2 = future_map_dfr(1:nrow(sim2_pars), ~simulator(n = sim2_pars$n[.], 
                                                        x1x2_cor = sim2_pars$x1x2_cor[.], 
                                                        x1x2_beta = sim2_pars$x1x2_beta[.],
                                                        x1_beta = sim2_pars$x1_beta[.],
                                                        x2_beta = sim2_pars$x2_beta[.]
                                                        ), .options = furrr_options(seed = TRUE))

#long form
results2_long = results2 %>% 
  pivot_longer(cols = c(ols_beta:cpem2_reg_rev_p)) %>% 
  mutate(
    parameter = str_detect(name, "_beta") %>% if_else(true = "beta", false = "p"),
    method = str_match(name, "(.+)_") %>% .[, 2]
  )

#bias
#as function of sample size
results2_long %>% 
  filter(parameter == "beta") %>% 
  mutate(bias = value - x1x2_beta) %>% 
  GG_group_means(var = "bias", groupvar = "n_samples", subgroupvar = "method")

GG_save("figs/bias_function_of_n.png")

#as function of interaction size
results2_long %>% 
  filter(parameter == "beta") %>% 
  mutate(bias = value - x1x2_beta) %>% 
  GG_group_means(var = "bias", groupvar = "x1x2_beta", subgroupvar = "method") +
  xlab("True interaction size")

GG_save("figs/bias_function_of_x1x2.png")

#as function of x1
results2_long %>% 
  filter(parameter == "beta") %>% 
  mutate(bias = value - x1x2_beta) %>% 
  GG_group_means(var = "bias", groupvar = "x1_beta", subgroupvar = "method") +
  xlab("Linear effect of IV1 (x1)")

GG_save("figs/bias_function_of_x1.png")

#as function of x2
results2_long %>% 
  filter(parameter == "beta") %>% 
  mutate(bias = value - x1x2_beta) %>% 
  GG_group_means(var = "bias", groupvar = "x2_beta", subgroupvar = "method") +
  xlab("Linear effect of IV2 (x2)")

GG_save("figs/bias_function_of_x2.png")

#power
results2_long %>% 
  filter(parameter == "p", x1x2_beta != 0) %>% 
  mutate(sig = value < 0.05) %>% 
  GG_group_means(var = "sig", groupvar = "n_samples", subgroupvar = "method") +
  scale_y_continuous("Power at alpha = 0.05", labels = scales::percent)
## Proportion variable detected, using `prop.test()`

GG_save("figs/power_function_of_n.png")

#false positives
results2_long %>% 
  filter(parameter == "p", x1x2_beta == 0) %>% 
  mutate(sig = value < 0.05) %>% 
  GG_group_means(var = "sig", groupvar = "n_samples", subgroupvar = "method") +
  scale_y_continuous("Power at alpha = 0.05", labels = scales::percent)
## Proportion variable detected, using `prop.test()`

GG_save("figs/fp_function_of_n.png")

results2_long %>% 
  filter(parameter == "p", x1x2_beta == 0) %>% 
  ggplot(aes(value, fill = method)) +
  geom_histogram(position = "dodge", boundary = 0, bins = 25) +
  scale_y_continuous("p value") +
  geom_hline(yintercept = nrow(sim2_pars)/4/25, linetype = "dotted") +
  xlab("p value")

GG_save("figs/p_null_hist.png")

Citations

gs4_deauth()
citations = read_sheet("https://docs.google.com/spreadsheets/d/1oL9cD7SJBK0jNEIGPRUIloHz38KCNZjR2qo44CRriEU/edit#gid=0")
## ✔ Reading from "Gorsuch 2005 CPEM".
## ✔ Range 'Sheet1'.
nrow(citations)
## [1] 43
sum(citations$Woodley)
## [1] 18
sum(citations$Figeruedo)
## [1] 16
sum(citations$Woodley | citations$Figeruedo)
## [1] 23
sum(citations$Woodley | citations$Figeruedo) / nrow(citations)
## [1] 0.535

Meta

#versions
write_sessioninfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] conflicted_1.2.0      googlesheets4_1.1.1   furrr_0.3.1          
##  [4] future_1.33.0         broom_1.0.5           MASS_7.3-60          
##  [7] kirkegaard_2023-08-04 psych_2.3.6           assertthat_0.2.1     
## [10] weights_1.0.4         Hmisc_5.1-0           magrittr_2.0.3       
## [13] lubridate_1.9.2       forcats_1.0.0         stringr_1.5.0        
## [16] dplyr_1.1.2           purrr_1.0.1           readr_2.1.4          
## [19] tidyr_1.3.0           tibble_3.2.1          ggplot2_3.4.2        
## [22] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] mnormt_2.1.1      gridExtra_2.3     rlang_1.1.1       compiler_4.3.1   
##  [5] gdata_2.19.0      systemfonts_1.0.4 vctrs_0.6.3       pkgconfig_2.0.3  
##  [9] shape_1.4.6       fastmap_1.1.1     backports_1.4.1   labeling_0.4.2   
## [13] utf8_1.2.3        rmarkdown_2.23    tzdb_0.4.0        nloptr_2.0.3     
## [17] ragg_1.2.5        xfun_0.39         glmnet_4.1-7      jomo_2.7-6       
## [21] cachem_1.0.8      jsonlite_1.8.7    highr_0.10        pan_1.8          
## [25] parallel_4.3.1    cluster_2.1.4     R6_2.5.1          bslib_0.5.0      
## [29] stringi_1.7.12    parallelly_1.36.0 boot_1.3-28       rpart_4.1.19     
## [33] jquerylib_0.1.4   cellranger_1.1.0  Rcpp_1.0.11       iterators_1.0.14 
## [37] knitr_1.43        base64enc_0.1-3   Matrix_1.6-0      splines_4.3.1    
## [41] nnet_7.3-19       timechange_0.2.0  tidyselect_1.2.0  rstudioapi_0.15.0
## [45] yaml_2.3.7        codetools_0.2-19  curl_5.0.1        listenv_0.9.0    
## [49] lattice_0.21-8    plyr_1.8.8        withr_2.5.0       evaluate_0.21    
## [53] foreign_0.8-82    survival_3.5-5    pillar_1.9.0      mice_3.16.0      
## [57] checkmate_2.2.0   foreach_1.5.2     generics_0.1.3    hms_1.1.3        
## [61] munsell_0.5.0     scales_1.2.1      minqa_1.2.5       gtools_3.9.4     
## [65] globals_0.16.2    glue_1.6.2        tools_4.3.1       data.table_1.14.8
## [69] lme4_1.1-34       fs_1.6.2          grid_4.3.1        colorspace_2.1-0 
## [73] nlme_3.1-162      htmlTable_2.4.1   googledrive_2.1.1 Formula_1.2-5    
## [77] cli_3.6.1         textshaping_0.3.6 fansi_1.0.4       gargle_1.5.1     
## [81] gtable_0.3.3      sass_0.4.6        digest_0.6.33     htmlwidgets_1.6.2
## [85] farver_2.1.1      memoise_2.0.1     htmltools_0.5.5   lifecycle_1.0.3  
## [89] httr_1.4.6        mitml_0.4-5
#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}