library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
MASS,
broom,
furrr,
future,
googlesheets4,
conflicted
)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: future
theme_set(theme_bw())
options(
digits = 3
)
n_samples = 200
plan(sequential)
plan(multisession)
plan()
## multisession:
## - args: function (..., workers = availableCores(), lazy = FALSE, rscript_libs = .libPaths(), envir = parent.frame())
## - tweaked: FALSE
## - call: plan(multisession)
conflicts_prefer(dplyr::select)
## [conflicted] Will prefer dplyr::select over any other package.
conflicts_prefer(dplyr::filter)
## [conflicted] Will prefer dplyr::filter over any other package.
conflicts_prefer(kirkegaard::`+`)
## [conflicted] Will prefer kirkegaard::`+` over any other package.
simulator = function(x1_beta = 0.5, x2_beta = 0.2, x1x2_beta = 0.1, x1x2_cor = 0, n_samples, skip_reg = F) {
# browser()
#make correlated IVs
x1x2 = MASS::mvrnorm(
n = n_samples,
mu = c(0, 0),
Sigma = matrix(c(1, x1x2_cor, x1x2_cor, 1), nrow = 2)
)
#make other variables
d1 = tibble(
x1 = x1x2[, 1],
x2 = x1x2[, 2],
y = x1*x1_beta + x2 * x2_beta + x1*x2*x1x2_beta + rnorm(n_samples),
y_z = standardize(y),
x1_y = x1*y_z,
x2_y = x2*y_z
)
#ols
ols = lm(y ~ x1*x2, data = d1) %>% summary() %>% tidy()
#cpem correlations
cpem1_r = d1 %$% cor.test(x2, x1_y)
cpem2_r = d1 %$% cor.test(x1, x2_y)
#save results
y = tibble(
#pars
x1_beta = x1_beta,
x2_beta = x2_beta,
x1x2_beta = x1x2_beta,
x1x2_cor = x1x2_cor,
n_samples = n_samples,
#results
ols_beta = ols$estimate[4],
ols_p = ols$p.value[4],
cpem1_r_beta = cpem1_r[["estimate"]],
cpem1_r_p = cpem1_r[["p.value"]],
cpem2_r_beta = cpem2_r[["estimate"]],
cpem2_r_p = cpem2_r[["p.value"]]
)
#these are the same as intercept free model
if (!skip_reg) {
cpem1 = d1 %$% lm(x2 ~ 0 + x1_y) %>% summary() %>% tidy()
cpem2 = d1 %$% lm(x1 ~ 0 + x2_y) %>% summary() %>% tidy()
cpem1_rev = d1 %$% lm(x1_y ~ 0 + x2) %>% summary() %>% tidy()
cpem2_rev = d1 %$% lm(x2_y ~ 0 + x1) %>% summary() %>% tidy()
y$cpem1_reg_beta = cpem1$estimate
y$cpem1_reg_p = cpem1$p.value
y$cpem2_reg_beta = cpem2$estimate
y$cpem2_reg_p = cpem2$p.value
y$cpem1_reg_rev_beta = cpem1_rev$estimate
y$cpem1_reg_rev_p = cpem1_rev$p.value
y$cpem2_reg_rev_beta = cpem2_rev$estimate
y$cpem2_reg_rev_p = cpem2_rev$p.value
}
y
}
#examples
set.seed(1)
simulator(n = 200)
simulator(n = 200)
simulator(n = 500)
simulator(n = 1000)
simulator(n = 2000)
simulator(n = 5000)
simulator(n = 10000)
set.seed(1)
simulator(n = 10e6) %>% select(matches("_beta"))
#simulation par grid
sim2_pars = expand_grid(
n = c(200, 500, 1000, 2000, 5000, 10000),
x1x2_cor = c(0, 0.2, 0.5, 0.8),
x1x2_beta = c(0, 0.1, 0.2, 0.3),
x1_beta = c(0.2, 0.5, 0.8),
x2_beta = c(-0.2, 0, 0.2),
run = seq(100)
)
set.seed(1)
results2 = future_map_dfr(1:nrow(sim2_pars), ~simulator(n = sim2_pars$n[.],
x1x2_cor = sim2_pars$x1x2_cor[.],
x1x2_beta = sim2_pars$x1x2_beta[.],
x1_beta = sim2_pars$x1_beta[.],
x2_beta = sim2_pars$x2_beta[.]
), .options = furrr_options(seed = TRUE))
#long form
results2_long = results2 %>%
pivot_longer(cols = c(ols_beta:cpem2_reg_rev_p)) %>%
mutate(
parameter = str_detect(name, "_beta") %>% if_else(true = "beta", false = "p"),
method = str_match(name, "(.+)_") %>% .[, 2]
)
#bias
#as function of sample size
results2_long %>%
filter(parameter == "beta") %>%
mutate(bias = value - x1x2_beta) %>%
GG_group_means(var = "bias", groupvar = "n_samples", subgroupvar = "method")
GG_save("figs/bias_function_of_n.png")
#as function of interaction size
results2_long %>%
filter(parameter == "beta") %>%
mutate(bias = value - x1x2_beta) %>%
GG_group_means(var = "bias", groupvar = "x1x2_beta", subgroupvar = "method") +
xlab("True interaction size")
GG_save("figs/bias_function_of_x1x2.png")
#as function of x1
results2_long %>%
filter(parameter == "beta") %>%
mutate(bias = value - x1x2_beta) %>%
GG_group_means(var = "bias", groupvar = "x1_beta", subgroupvar = "method") +
xlab("Linear effect of IV1 (x1)")
GG_save("figs/bias_function_of_x1.png")
#as function of x2
results2_long %>%
filter(parameter == "beta") %>%
mutate(bias = value - x1x2_beta) %>%
GG_group_means(var = "bias", groupvar = "x2_beta", subgroupvar = "method") +
xlab("Linear effect of IV2 (x2)")
GG_save("figs/bias_function_of_x2.png")
#power
results2_long %>%
filter(parameter == "p", x1x2_beta != 0) %>%
mutate(sig = value < 0.05) %>%
GG_group_means(var = "sig", groupvar = "n_samples", subgroupvar = "method") +
scale_y_continuous("Power at alpha = 0.05", labels = scales::percent)
## Proportion variable detected, using `prop.test()`
GG_save("figs/power_function_of_n.png")
#false positives
results2_long %>%
filter(parameter == "p", x1x2_beta == 0) %>%
mutate(sig = value < 0.05) %>%
GG_group_means(var = "sig", groupvar = "n_samples", subgroupvar = "method") +
scale_y_continuous("Power at alpha = 0.05", labels = scales::percent)
## Proportion variable detected, using `prop.test()`
GG_save("figs/fp_function_of_n.png")
results2_long %>%
filter(parameter == "p", x1x2_beta == 0) %>%
ggplot(aes(value, fill = method)) +
geom_histogram(position = "dodge", boundary = 0, bins = 25) +
scale_y_continuous("p value") +
geom_hline(yintercept = nrow(sim2_pars)/4/25, linetype = "dotted") +
xlab("p value")
GG_save("figs/p_null_hist.png")
gs4_deauth()
citations = read_sheet("https://docs.google.com/spreadsheets/d/1oL9cD7SJBK0jNEIGPRUIloHz38KCNZjR2qo44CRriEU/edit#gid=0")
## ✔ Reading from "Gorsuch 2005 CPEM".
## ✔ Range 'Sheet1'.
nrow(citations)
## [1] 43
sum(citations$Woodley)
## [1] 18
sum(citations$Figeruedo)
## [1] 16
sum(citations$Woodley | citations$Figeruedo)
## [1] 23
sum(citations$Woodley | citations$Figeruedo) / nrow(citations)
## [1] 0.535
#versions
write_sessioninfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] conflicted_1.2.0 googlesheets4_1.1.1 furrr_0.3.1
## [4] future_1.33.0 broom_1.0.5 MASS_7.3-60
## [7] kirkegaard_2023-08-04 psych_2.3.6 assertthat_0.2.1
## [10] weights_1.0.4 Hmisc_5.1-0 magrittr_2.0.3
## [13] lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0
## [16] dplyr_1.1.2 purrr_1.0.1 readr_2.1.4
## [19] tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2
## [22] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] mnormt_2.1.1 gridExtra_2.3 rlang_1.1.1 compiler_4.3.1
## [5] gdata_2.19.0 systemfonts_1.0.4 vctrs_0.6.3 pkgconfig_2.0.3
## [9] shape_1.4.6 fastmap_1.1.1 backports_1.4.1 labeling_0.4.2
## [13] utf8_1.2.3 rmarkdown_2.23 tzdb_0.4.0 nloptr_2.0.3
## [17] ragg_1.2.5 xfun_0.39 glmnet_4.1-7 jomo_2.7-6
## [21] cachem_1.0.8 jsonlite_1.8.7 highr_0.10 pan_1.8
## [25] parallel_4.3.1 cluster_2.1.4 R6_2.5.1 bslib_0.5.0
## [29] stringi_1.7.12 parallelly_1.36.0 boot_1.3-28 rpart_4.1.19
## [33] jquerylib_0.1.4 cellranger_1.1.0 Rcpp_1.0.11 iterators_1.0.14
## [37] knitr_1.43 base64enc_0.1-3 Matrix_1.6-0 splines_4.3.1
## [41] nnet_7.3-19 timechange_0.2.0 tidyselect_1.2.0 rstudioapi_0.15.0
## [45] yaml_2.3.7 codetools_0.2-19 curl_5.0.1 listenv_0.9.0
## [49] lattice_0.21-8 plyr_1.8.8 withr_2.5.0 evaluate_0.21
## [53] foreign_0.8-82 survival_3.5-5 pillar_1.9.0 mice_3.16.0
## [57] checkmate_2.2.0 foreach_1.5.2 generics_0.1.3 hms_1.1.3
## [61] munsell_0.5.0 scales_1.2.1 minqa_1.2.5 gtools_3.9.4
## [65] globals_0.16.2 glue_1.6.2 tools_4.3.1 data.table_1.14.8
## [69] lme4_1.1-34 fs_1.6.2 grid_4.3.1 colorspace_2.1-0
## [73] nlme_3.1-162 htmlTable_2.4.1 googledrive_2.1.1 Formula_1.2-5
## [77] cli_3.6.1 textshaping_0.3.6 fansi_1.0.4 gargle_1.5.1
## [81] gtable_0.3.3 sass_0.4.6 digest_0.6.33 htmlwidgets_1.6.2
## [85] farver_2.1.1 memoise_2.0.1 htmltools_0.5.5 lifecycle_1.0.3
## [89] httr_1.4.6 mitml_0.4-5
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}