Init
Simulations
- Continuous variables
- Group differences
Meta

Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

n_per_group = 20000


theme_set(theme_bw())

Simulations

Continuous variables

set.seed(1)
d1 = MASS::mvrnorm(
  n = n_per_group,
  mu = rep(0, 2),
  Sigma = matrix(c(1, .5, .5, 1), nrow = 2)
) %>% set_colnames(c("Y", "X")) %>% as_tibble()

#add noise variables
for (noise_sd in seq(0.5, 4, by = 0.5)) {
  d1[[str_glue("X_plus_noise_sd_{noise_sd}")]] = d1$X + rnorm(n_per_group, mean = 0, sd = noise_sd)
}

#cors
d1_cors = wtd.cors(d1)[-1, 1]

d1 %>% 
  pivot_longer(cols = -Y) %>% 
  mutate(
    name = name %>% factor(levels = unique(name) %>% gtools::mixedsort())
  ) %>% 
  ggplot(aes(value, Y)) +
  geom_point(alpha = .10) +
  geom_smooth(method = "lm", se = F) +
  geom_text(data = tibble(
    value = -15,
    Y = 4,
    text = str_glue("r = {d1_cors %>% str_round(2)}"),
    name = names(d1_cors)
  ), mapping = aes(label = text)) +
  facet_wrap("name")

## `geom_smooth()` using formula = 'y ~ x'

GG_save("cors with noise.png")

## `geom_smooth()` using formula = 'y ~ x'

Group differences

set.seed(1)
d = bind_rows(
  tibble(
    group = "normies",
    IQ = rnorm(n_per_group, mean = 100, sd = 15)
  ),
  tibble(
    group = "smarties",
    IQ = rnorm(n_per_group, mean = 115, sd = 15)
  )
)

#add noise variants
for (error_sd in seq(5, 40, by =5)) {
  d[[str_glue("IQ_plus_error_sd_{error_sd}")]] = d$IQ + rnorm(2*n_per_group, mean = 0, sd = error_sd)
}

#cors
d[-1] %>% wtd.cors() %>% .[1, ]

##                  IQ  IQ_plus_error_sd_5 IQ_plus_error_sd_10 IQ_plus_error_sd_15 
##           1.0000000           0.9581904           0.8597598           0.7442668 
## IQ_plus_error_sd_20 IQ_plus_error_sd_25 IQ_plus_error_sd_30 IQ_plus_error_sd_35 
##           0.6434069           0.5612995           0.4939757           0.4297645 
## IQ_plus_error_sd_40 
##           0.3895300

#group means
d %>% 
  pivot_longer(-group) %>% 
  mutate(
    name = name %>% factor(levels = gtools::mixedsort(unique(name)))
  ) %>% 
  GG_group_means("value", groupvar = "name", subgroupvar = "group", type = "boxplot") +
  scale_y_continuous("IQ") +
  scale_x_discrete(guide = guide_axis(n.dodge = 2))

## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

d %>% 
  pivot_longer(-group) %>% 
  mutate(
    name = name %>% factor(levels = gtools::mixedsort(unique(name)))
  ) %>% 
  ggplot(aes(name, value, fill = group)) +
  geom_boxplot() +
  scale_y_continuous("IQ") +
  scale_x_discrete(guide = guide_axis(n.dodge = 2))

GG_save("group diffs IQ plus noise.png")

cohen.d(d %>% select(-group),
           group = d$group) %>% .$cohen.d %>% .[, 2]

##                  IQ  IQ_plus_error_sd_5 IQ_plus_error_sd_10 IQ_plus_error_sd_15 
##           1.0039964           0.9567370           0.8426912           0.7046468 
## IQ_plus_error_sd_20 IQ_plus_error_sd_25 IQ_plus_error_sd_30 IQ_plus_error_sd_35 
##           0.5983834           0.5076792           0.4720276           0.4020456 
## IQ_plus_error_sd_40 
##           0.3622504

tibble(
  variable = names(d[-1]),
  IQ_gap = describe2(d[-1], group = d$group) %>% {
    filter(., group == "smarties") %>% pull(mean) -
    filter(., group == "normies") %>% pull(mean)
  },
  total_IQ_SD = d[-1] %>% map_dbl(sd),
  IQ_SD_normies = d %>% filter(group == "normies") %>% select(-group) %>% map_dbl(sd),
  Cohen_d = cohen.d(d %>% select(-group),
           group = d$group) %>% .$cohen.d %>% .[, 2],
  r_IQ_variable = wtd.cors(d[-1])[, 1],
  Cohen_d_adjusted = map2_dbl(Cohen_d, r_IQ_variable^2, adj_d_reliability)
) %>% 
  df_round()

Meta

#versions
write_sessioninfo()

## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kirkegaard_2023-11-12 psych_2.3.6           assertthat_0.2.1     
##  [4] weights_1.0.4         Hmisc_5.1-0           magrittr_2.0.3       
##  [7] lubridate_1.9.2       forcats_1.0.0         stringr_1.5.0        
## [10] dplyr_1.1.2           purrr_1.0.1           readr_2.1.4          
## [13] tidyr_1.3.0           tibble_3.2.1          ggplot2_3.4.2        
## [16] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.0  farver_2.1.1      fastmap_1.1.1     digest_0.6.33    
##  [5] rpart_4.1.21      timechange_0.2.0  lifecycle_1.0.3   cluster_2.1.5    
##  [9] survival_3.5-7    gdata_2.19.0      compiler_4.3.2    rlang_1.1.1      
## [13] sass_0.4.6        tools_4.3.2       utf8_1.2.3        yaml_2.3.7       
## [17] data.table_1.14.8 knitr_1.43        labeling_0.4.2    htmlwidgets_1.6.2
## [21] mnormt_2.1.1      plyr_1.8.8        withr_2.5.0       foreign_0.8-86   
## [25] nnet_7.3-19       grid_4.3.2        fansi_1.0.4       jomo_2.7-6       
## [29] colorspace_2.1-0  mice_3.16.0       scales_1.2.1      gtools_3.9.4     
## [33] iterators_1.0.14  MASS_7.3-60       cli_3.6.1         rmarkdown_2.23   
## [37] ragg_1.2.5        generics_0.1.3    rstudioapi_0.15.0 tzdb_0.4.0       
## [41] minqa_1.2.5       cachem_1.0.8      splines_4.3.2     parallel_4.3.2   
## [45] base64enc_0.1-3   vctrs_0.6.3       boot_1.3-28       glmnet_4.1-7     
## [49] Matrix_1.6-0      jsonlite_1.8.7    hms_1.1.3         mitml_0.4-5      
## [53] Formula_1.2-5     htmlTable_2.4.1   systemfonts_1.0.4 foreach_1.5.2    
## [57] jquerylib_0.1.4   glue_1.6.2        nloptr_2.0.3      pan_1.8          
## [61] codetools_0.2-19  stringi_1.7.12    gtable_0.3.3      shape_1.4.6      
## [65] lme4_1.1-34       munsell_0.5.0     pillar_1.9.0      htmltools_0.5.5  
## [69] R6_2.5.1          textshaping_0.3.6 evaluate_0.21     lattice_0.22-5   
## [73] highr_0.10        backports_1.4.1   broom_1.0.5       bslib_0.5.0      
## [77] Rcpp_1.0.11       gridExtra_2.3     nlme_3.1-163      checkmate_2.2.0  
## [81] mgcv_1.9-0        xfun_0.39         pkgconfig_2.0.3

#write data to file for reuse
# d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}

Measurement error and group differences

13 December, 2023

Init

Simulations

Continuous variables

Group differences

Meta