library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1.9000 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
## Loading required package: robustbase
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
patchwork
)
theme_set(theme_bw())
options(
digits = 3
)
#multithreading
#library(future)
#plan(multisession(workers = 8))
#GPT pro
make_mating_pool <- function(n, seed = 1,
names = c("man", "woman")) {
stopifnot(length(n) == 1, n > 1, is.finite(n))
stopifnot(length(names) == 2)
if (!is.null(seed)) set.seed(seed)
men <- as.numeric(scale(rnorm(n)))
women <- as.numeric(scale(rnorm(n)))
out <- data.frame(men, women)
colnames(out) <- names
out
}
#pairsing function
pair_assortative <- function(df, r,
man_col = names(df)[1],
woman_col = names(df)[2],
tol = 1e-3, maxit = 60,
seed = NULL) {
stopifnot(is.data.frame(df))
stopifnot(length(r) == 1, is.finite(r), r >= -1, r <= 1)
if (!is.null(seed)) set.seed(seed)
x <- df[[man_col]]
y <- df[[woman_col]]
n <- length(x)
stopifnot(length(y) == n, n > 1)
# Men order used to define who matches whom (rank order)
ox <- order(x)
corr_for_alpha <- function(alpha) {
ry <- rank(y, ties.method = "random")
rr <- rank(runif(n), ties.method = "random")
score <- alpha * ry + (1 - alpha) * rr
oy <- order(score)
if (r < 0) oy <- rev(oy)
cor(x[ox], y[oy])
}
target <- abs(r)
c0 <- abs(corr_for_alpha(0))
c1 <- abs(corr_for_alpha(1))
if (target <= c0 + tol) {
alpha_hat <- 0
} else if (target >= c1 - tol) {
alpha_hat <- 1
warning(sprintf(
"Requested |r|=%.3f exceeds/equals max achievable |r|=%.3f for this finite pool; using alpha=1.",
target, c1
), call. = FALSE)
} else {
f <- function(a) abs(corr_for_alpha(a)) - target
alpha_hat <- uniroot(f, interval = c(0, 1), tol = tol, maxiter = maxit)$root
}
# Final matching
ry <- rank(y, ties.method = "random")
rr <- rank(runif(n), ties.method = "random")
score <- alpha_hat * ry + (1 - alpha_hat) * rr
oy <- order(score)
if (r < 0) oy <- rev(oy)
# IMPORTANT: map paired women back to original men rows
new_y <- y
new_y[ox] <- y[oy]
out <- df
out[[woman_col]] <- new_y
attr(out, "alpha") <- alpha_hat
attr(out, "achieved_r") <- cor(out[[man_col]], out[[woman_col]])
attr(out, "woman_order") <- oy
attr(out, "men_order") <- ox
out
}
#compute metrics
pair_stats <- function(df,
man_col = names(df)[1],
woman_col = names(df)[2]) {
stopifnot(is.data.frame(df))
x <- df[[man_col]]
y <- df[[woman_col]]
stopifnot(length(x) == length(y), length(x) > 1)
data.frame(
r = cor(x, y, use = "complete.obs"),
mean_abs_diff = mean(abs(x - y), na.rm = TRUE)
)
}
#small sample to illstrate the principle
d1 = make_mating_pool(n=50, seed = 2)
d1_random = pair_assortative(d1, r = 0)
d1_random %>% describe2()
#plot the pairsings
p1 = d1_random %>%
mutate(
pair = 1:n()
) %>%
pivot_longer(-pair) %>%
ggplot(aes(name, value)) +
geom_point(aes(color = name)) +
geom_line(aes(group = pair), alpha = 0.2, linetype = 2) +
labs(
color = "Sex",
y = "Trait value (z)",
x = "Sex",
title = "Random mating",
subtitle = "r = 0.0"
)
#metrics
d1_random %>% pair_stats()
#match them up more realistically
d1_05 = pair_assortative(d1, r = 0.5)
p2 = d1_05 %>%
mutate(
pair = 1:n()
) %>%
pivot_longer(-pair) %>%
ggplot(aes(name, value)) +
geom_point(aes(color = name)) +
geom_line(aes(group = pair), alpha = 0.2, linetype = 2) +
labs(
color = "Sex",
y = "Trait value (z)",
x = "Sex",
title = "Realistic assortative mating",
subtitle = "r = 0.5"
)
#metrics
d1_05 %>% pair_stats()
#pair as perfect as possible
d1_10 = pair_assortative(d1, r = 1)
## Warning: Requested |r|=1.000 exceeds/equals max achievable |r|=0.985 for this
## finite pool; using alpha=1.
p3 = d1_10 %>%
mutate(
pair = 1:n()
) %>%
pivot_longer(-pair) %>%
ggplot(aes(name, value)) +
geom_point(aes(color = name)) +
geom_line(aes(group = pair), alpha = 0.2, linetype = 2) +
labs(
color = "Sex",
y = "Trait value (z)",
x = "Sex",
title = "Perfect assortative mating",
subtitle = "r = 1.0"
)
#metrics
d1_10 %>% pair_stats()
#combine plots
patchwork::wrap_plots(p1, p2, p3, nrow = 1, axes = "collect", guides = "collect")
GG_save("figs/pairings.png")
#larger sample size
d2 = make_mating_pool(n=1000, seed = 2)
d2_05 = pair_assortative(d2, r = 0.5)
d2_10 = pair_assortative(d2, r = 1)
## Warning: Requested |r|=1.000 exceeds/equals max achievable |r|=0.998 for this
## finite pool; using alpha=1.
d2_05 %>% pair_stats()
p5 = d2_05 %>%
mutate(
max_z = pmax(man, woman),
abs_d = abs(man - woman)
) %>%
GG_scatter("max_z", "abs_d") +
labs(
x = "Highest mate value for trait",
y = "Trait value absolute difference"
)
GG_save(plot = p5, "figs/abd_d_by_max_z.png")
## `geom_smooth()` using formula = 'y ~ x'
p4 = d2 %>%
mutate(
max_z = pmax(man, woman),
abs_d = abs(man - woman)
) %>%
GG_scatter("max_z", "abs_d") +
labs(
x = "Highest mate value for trait",
y = "Trait value absolute difference"
)
p6 = d2_10 %>%
mutate(
max_z = pmax(man, woman),
abs_d = abs(man - woman)
) %>%
GG_scatter("max_z", "abs_d") +
labs(
x = "Highest mate value for trait",
y = "Trait value absolute difference"
)
#combine plots
patchwork::wrap_plots(p4, p5, p6, nrow = 1, guides = "collect", axes = "collect")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/abd_d_by_max_z_3.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
d2_05 %>%
GG_scatter("man", "woman") +
labs(
title = "Pairs' trait values under assortative mating",
x = "Man's value",
y = "Woman's value"
)
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/xz_am.png")
## `geom_smooth()` using formula = 'y ~ x'
#versions
write_sessioninfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.3.2.9000 kirkegaard_2025-11-19 robustbase_0.99-6
## [4] psych_2.5.6 assertthat_0.2.1 weights_1.1.2
## [7] magrittr_2.0.4 lubridate_1.9.4 forcats_1.0.1
## [10] stringr_1.6.0 dplyr_1.1.4 purrr_1.2.0
## [13] readr_2.1.5 tidyr_1.3.1 tibble_3.3.0
## [16] ggplot2_4.0.1.9000 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 farver_2.1.2 S7_0.2.1 fastmap_1.2.0
## [5] digest_0.6.39 rpart_4.1.24 timechange_0.3.0 lifecycle_1.0.4
## [9] cluster_2.1.8.1 survival_3.8-3 gdata_3.0.1 compiler_4.5.2
## [13] rlang_1.1.6.9000 Hmisc_5.2-4 sass_0.4.10 tools_4.5.2
## [17] yaml_2.3.10 data.table_1.17.8 knitr_1.50 labeling_0.4.3
## [21] htmlwidgets_1.6.4 mnormt_2.1.1 RColorBrewer_1.1-3 withr_3.0.2
## [25] foreign_0.8-90 nnet_7.3-20 grid_4.5.2 jomo_2.7-6
## [29] colorspace_2.1-2 mice_3.18.0 scales_1.4.0 gtools_3.9.5
## [33] iterators_1.0.14 MASS_7.3-65 cli_3.6.5 rmarkdown_2.30
## [37] ragg_1.5.0 reformulas_0.4.1 generics_0.1.4 rstudioapi_0.17.1
## [41] tzdb_0.5.0 minqa_1.2.8 cachem_1.1.0 splines_4.5.2
## [45] parallel_4.5.2 base64enc_0.1-3 vctrs_0.6.5 boot_1.3-32
## [49] glmnet_4.1-10 Matrix_1.7-4 jsonlite_2.0.0 hms_1.1.3
## [53] mitml_0.4-5 Formula_1.2-5 htmlTable_2.4.3 systemfonts_1.3.1
## [57] foreach_1.5.2 jquerylib_0.1.4 glue_1.8.0 DEoptimR_1.1-4
## [61] nloptr_2.2.1 pan_1.9 codetools_0.2-20 stringi_1.8.7
## [65] gtable_0.3.6 shape_1.4.6.1 lme4_1.1-37 pillar_1.11.1
## [69] htmltools_0.5.8.1 R6_2.6.1 textshaping_1.0.4 Rdpack_2.6.4
## [73] evaluate_1.0.5 lattice_0.22-7 rbibutils_2.3 backports_1.5.0
## [77] broom_1.0.11 bslib_0.9.0 Rcpp_1.1.0 gridExtra_2.3
## [81] nlme_3.1-168 checkmate_2.3.3 mgcv_1.9-3 xfun_0.53
## [85] pkgconfig_2.0.3
#write data to file for reuse
# d %>% write_rds("data/data_for_reuse.rds")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}