library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
n_per_group = 20000
theme_set(theme_bw())
set.seed(1)
d1 = MASS::mvrnorm(
n = n_per_group,
mu = rep(0, 2),
Sigma = matrix(c(1, .5, .5, 1), nrow = 2)
) %>% set_colnames(c("Y", "X")) %>% as_tibble()
#add noise variables
for (noise_sd in seq(0.5, 4, by = 0.5)) {
d1[[str_glue("X_plus_noise_sd_{noise_sd}")]] = d1$X + rnorm(n_per_group, mean = 0, sd = noise_sd)
}
#cors
d1_cors = wtd.cors(d1)[-1, 1]
d1 %>%
pivot_longer(cols = -Y) %>%
mutate(
name = name %>% factor(levels = unique(name) %>% gtools::mixedsort())
) %>%
ggplot(aes(value, Y)) +
geom_point(alpha = .10) +
geom_smooth(method = "lm", se = F) +
geom_text(data = tibble(
value = -15,
Y = 4,
text = str_glue("r = {d1_cors %>% str_round(2)}"),
name = names(d1_cors)
), mapping = aes(label = text)) +
facet_wrap("name")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("cors with noise.png")
## `geom_smooth()` using formula = 'y ~ x'
set.seed(1)
d = bind_rows(
tibble(
group = "normies",
IQ = rnorm(n_per_group, mean = 100, sd = 15)
),
tibble(
group = "smarties",
IQ = rnorm(n_per_group, mean = 115, sd = 15)
)
)
#add noise variants
for (error_sd in seq(5, 40, by =5)) {
d[[str_glue("IQ_plus_error_sd_{error_sd}")]] = d$IQ + rnorm(2*n_per_group, mean = 0, sd = error_sd)
}
#cors
d[-1] %>% wtd.cors() %>% .[1, ]
## IQ IQ_plus_error_sd_5 IQ_plus_error_sd_10 IQ_plus_error_sd_15
## 1.0000000 0.9581904 0.8597598 0.7442668
## IQ_plus_error_sd_20 IQ_plus_error_sd_25 IQ_plus_error_sd_30 IQ_plus_error_sd_35
## 0.6434069 0.5612995 0.4939757 0.4297645
## IQ_plus_error_sd_40
## 0.3895300
#group means
d %>%
pivot_longer(-group) %>%
mutate(
name = name %>% factor(levels = gtools::mixedsort(unique(name)))
) %>%
GG_group_means("value", groupvar = "name", subgroupvar = "group", type = "boxplot") +
scale_y_continuous("IQ") +
scale_x_discrete(guide = guide_axis(n.dodge = 2))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
d %>%
pivot_longer(-group) %>%
mutate(
name = name %>% factor(levels = gtools::mixedsort(unique(name)))
) %>%
ggplot(aes(name, value, fill = group)) +
geom_boxplot() +
scale_y_continuous("IQ") +
scale_x_discrete(guide = guide_axis(n.dodge = 2))
GG_save("group diffs IQ plus noise.png")
cohen.d(d %>% select(-group),
group = d$group) %>% .$cohen.d %>% .[, 2]
## IQ IQ_plus_error_sd_5 IQ_plus_error_sd_10 IQ_plus_error_sd_15
## 1.0039964 0.9567370 0.8426912 0.7046468
## IQ_plus_error_sd_20 IQ_plus_error_sd_25 IQ_plus_error_sd_30 IQ_plus_error_sd_35
## 0.5983834 0.5076792 0.4720276 0.4020456
## IQ_plus_error_sd_40
## 0.3622504
tibble(
variable = names(d[-1]),
IQ_gap = describe2(d[-1], group = d$group) %>% {
filter(., group == "smarties") %>% pull(mean) -
filter(., group == "normies") %>% pull(mean)
},
total_IQ_SD = d[-1] %>% map_dbl(sd),
IQ_SD_normies = d %>% filter(group == "normies") %>% select(-group) %>% map_dbl(sd),
Cohen_d = cohen.d(d %>% select(-group),
group = d$group) %>% .$cohen.d %>% .[, 2],
r_IQ_variable = wtd.cors(d[-1])[, 1],
Cohen_d_adjusted = map2_dbl(Cohen_d, r_IQ_variable^2, adj_d_reliability)
) %>%
df_round()
#versions
write_sessioninfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kirkegaard_2023-11-12 psych_2.3.6 assertthat_0.2.1
## [4] weights_1.0.4 Hmisc_5.1-0 magrittr_2.0.3
## [7] lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0
## [10] dplyr_1.1.2 purrr_1.0.1 readr_2.1.4
## [13] tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2
## [16] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.0 farver_2.1.1 fastmap_1.1.1 digest_0.6.33
## [5] rpart_4.1.21 timechange_0.2.0 lifecycle_1.0.3 cluster_2.1.5
## [9] survival_3.5-7 gdata_2.19.0 compiler_4.3.2 rlang_1.1.1
## [13] sass_0.4.6 tools_4.3.2 utf8_1.2.3 yaml_2.3.7
## [17] data.table_1.14.8 knitr_1.43 labeling_0.4.2 htmlwidgets_1.6.2
## [21] mnormt_2.1.1 plyr_1.8.8 withr_2.5.0 foreign_0.8-86
## [25] nnet_7.3-19 grid_4.3.2 fansi_1.0.4 jomo_2.7-6
## [29] colorspace_2.1-0 mice_3.16.0 scales_1.2.1 gtools_3.9.4
## [33] iterators_1.0.14 MASS_7.3-60 cli_3.6.1 rmarkdown_2.23
## [37] ragg_1.2.5 generics_0.1.3 rstudioapi_0.15.0 tzdb_0.4.0
## [41] minqa_1.2.5 cachem_1.0.8 splines_4.3.2 parallel_4.3.2
## [45] base64enc_0.1-3 vctrs_0.6.3 boot_1.3-28 glmnet_4.1-7
## [49] Matrix_1.6-0 jsonlite_1.8.7 hms_1.1.3 mitml_0.4-5
## [53] Formula_1.2-5 htmlTable_2.4.1 systemfonts_1.0.4 foreach_1.5.2
## [57] jquerylib_0.1.4 glue_1.6.2 nloptr_2.0.3 pan_1.8
## [61] codetools_0.2-19 stringi_1.7.12 gtable_0.3.3 shape_1.4.6
## [65] lme4_1.1-34 munsell_0.5.0 pillar_1.9.0 htmltools_0.5.5
## [69] R6_2.5.1 textshaping_0.3.6 evaluate_0.21 lattice_0.22-5
## [73] highr_0.10 backports_1.4.1 broom_1.0.5 bslib_0.5.0
## [77] Rcpp_1.0.11 gridExtra_2.3 nlme_3.1-163 checkmate_2.2.0
## [81] mgcv_1.9-0 xfun_0.39 pkgconfig_2.0.3
#write data to file for reuse
# d %>% write_rds("data/data_for_reuse.rds")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}