suppressPackageStartupMessages(library(magrittr))
suppressPackageStartupMessages(library(tidyverse))
Generate data
n <- 5 # observations
d <- 100 # dimensions
eps <- .5 # standard deviation
n_draws <- 10 # number of draws i.e. samples from from d-dim gaussian
n_biases <- 10 # number of shifts i.e. vectors to shift each sample
n_scalings <- 20 # number of scalings i.e. scaling factors for each shift
scaling_max <- 3
scalings <- scaling_max * seq(n_scalings) / n_scalings # scaling factors for each shift
Function to compute vector of cross-correlation values
corvec <- function(m) {cm <- cor(m); cm[upper.tri(cm)]}
Generate control distribution
a <- matrix(rnorm(n*d*10), n*10, d) * eps # sample from d-dim Gaussian
amean <- colMeans(a)
asd <- apply(a, 2, sd)
Generate treatment distributions and compute metrics
- Generate
n_draws x n_biases x n_scalings sets of samples from a d-dim Gaussian, with n samples per set.
- Compute median replicate correlation for each set
- Compute distance from control for each set - this is just the scaling value
df <-
map_df(1:n_draws,
function(draw) {
b <-
scale(matrix(rnorm(n * d), n, d)) # sample from d-dim gaussion
map_df(1:n_biases,
function(bias) {
bshift <- rnorm(d) # create shift vector
bshift <- d * bshift / norm(as.matrix(bshift))
map_df(scalings,
function(bshift_scale) {
# scale the shift vector and then add to sample
bi <- scale(b, center = -bshift * bshift_scale, scale = FALSE)
# normalize the samples wrt control
bn <- scale(bi, center = amean, scale = asd)
data.frame(draw,
bias,
bshift_scale, # distance from control
medrepcor = median(corvec(t(bn))) # median replicate correlation
)
})
})
})
Plot data
Plot accross all shifts and draws
ggplot(df, aes(bshift_scale, medrepcor, group = interaction(draw, bias))) +
geom_line(alpha = 0.1) +
xlab("Distance from control") +
ylab("Median replicate correlation")

Plot accross all shifts grouped by draws
df %>%
filter(draw %in% sample(1:n_draws, min(12, n_draws))) %>%
ggplot(aes(bshift_scale, medrepcor, group = bias)) +
geom_line(alpha = 0.1) +
xlab("Distance from control") +
ylab("Median replicate correlation") +
facet_wrap(~draw, labeller = "label_both")

Plot accross all draws grouped by shifts
df %>%
filter(bias %in% sample(1:n_biases, min(12, n_biases))) %>%
ggplot(aes(bshift_scale, medrepcor, group = draw)) +
geom_line(alpha = 0.1) +
xlab("Distance from control") +
ylab("Median replicate correlation") +
facet_wrap(~bias, labeller = "label_both")

Generate data for 2 replicates and scale the shift
d <- 10 # dimensions
b0 <- matrix(rnorm(2 * d), 2, d)
bshift <- rnorm(d)
bshift <- bshift / norm(as.matrix(bshift))
norm(as.matrix(bshift))
[1] 1
bd <- map_df(seq(20),
function(bshift_scale) {
bi <- sweep(b0, 2, bshift * bshift_scale, "+")
bind_rows(
as.data.frame(t(b0)) %>% mutate(type = "orig"),
as.data.frame(t(bi)) %>% mutate(type = "shifted")
) %>%
mutate(bshift_scale = bshift_scale)
})
Plot data
ggplot(bd,
aes(V1, V2, color = type)) +
geom_point() + facet_wrap(~bshift_scale, labeller = "label_both")

bd %>%
filter(type == "shifted") %>%
group_by(bshift_scale) %>%
summarise(corval = cor(V1, V2), .groups = "keep") %>%
ggplot(aes(bshift_scale, corval)) + geom_line() +
xlab("Distance from control") +
ylab("Median replicate correlation")

unit_vec <- function(vec){
return(vec/(sqrt(sum(vec**2,na.rm=TRUE))))
}
d <- 1000
x <- rnorm(d)
y <- rnorm(d)
df <-
map_df(1:1000,
function(i) {
b <- unit_vec(rnorm(d)) * sqrt(d)
data.frame(corval_xy = cor(x+b, y+b), corval_bmeanxy = cor(b, (x+y)/2))
}
)
ggplot(df, aes(corval_xy, corval_bmeanxy)) + geom_point()

LS0tCnRpdGxlOiAiUGVhcnNvbiBzaW1pbGFyaXR5IGFuZCBkaXN0YW5jZSBmcm9tIGNvbnRyb2wiCm91dHB1dDogCiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICB0b2NfZGVwdGg6IDMKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogICAgdGhlbWU6IGx1bWVuCi0tLQoKCmBgYHtyfQpzdXBwcmVzc1BhY2thZ2VTdGFydHVwTWVzc2FnZXMobGlicmFyeShtYWdyaXR0cikpCnN1cHByZXNzUGFja2FnZVN0YXJ0dXBNZXNzYWdlcyhsaWJyYXJ5KHRpZHl2ZXJzZSkpCmBgYAoKIyBHZW5lcmF0ZSBkYXRhCgpgYGB7cn0KbiA8LSA1ICMgb2JzZXJ2YXRpb25zCmQgPC0gMTAwICMgZGltZW5zaW9ucwplcHMgPC0gLjUgIyBzdGFuZGFyZCBkZXZpYXRpb24KYGBgCgpgYGB7cn0Kbl9kcmF3cyA8LSAxMCAjIG51bWJlciBvZiBkcmF3cyBpLmUuIHNhbXBsZXMgZnJvbSBmcm9tIGQtZGltIGdhdXNzaWFuCm5fYmlhc2VzIDwtIDEwICMgbnVtYmVyIG9mIHNoaWZ0cyBpLmUuIHZlY3RvcnMgdG8gc2hpZnQgZWFjaCBzYW1wbGUKbl9zY2FsaW5ncyA8LSAyMCAjIG51bWJlciBvZiBzY2FsaW5ncyBpLmUuIHNjYWxpbmcgZmFjdG9ycyBmb3IgZWFjaCBzaGlmdApzY2FsaW5nX21heCA8LSAzCnNjYWxpbmdzIDwtIHNjYWxpbmdfbWF4ICogc2VxKG5fc2NhbGluZ3MpIC8gbl9zY2FsaW5ncyAjIHNjYWxpbmcgZmFjdG9ycyBmb3IgZWFjaCBzaGlmdApgYGAKCkZ1bmN0aW9uIHRvIGNvbXB1dGUgdmVjdG9yIG9mIGNyb3NzLWNvcnJlbGF0aW9uIHZhbHVlcwoKYGBge3J9CmNvcnZlYyA8LSBmdW5jdGlvbihtKSB7Y20gPC0gY29yKG0pOyBjbVt1cHBlci50cmkoY20pXX0KYGBgCgojIyBHZW5lcmF0ZSBjb250cm9sIGRpc3RyaWJ1dGlvbgoKYGBge3J9CmEgPC0gbWF0cml4KHJub3JtKG4qZCoxMCksIG4qMTAsIGQpICogZXBzICMgc2FtcGxlIGZyb20gZC1kaW0gR2F1c3NpYW4KCmFtZWFuIDwtIGNvbE1lYW5zKGEpCgphc2QgPC0gYXBwbHkoYSwgMiwgc2QpCgpgYGAKCiMjIEdlbmVyYXRlIHRyZWF0bWVudCBkaXN0cmlidXRpb25zIGFuZCBjb21wdXRlIG1ldHJpY3MKCi0gR2VuZXJhdGUgYG5fZHJhd3NgIHggYG5fYmlhc2VzYCB4IGBuX3NjYWxpbmdzYCBzZXRzIG9mIHNhbXBsZXMgZnJvbSBhIGQtZGltIEdhdXNzaWFuLCB3aXRoIGBuYCBzYW1wbGVzIHBlciBzZXQuCi0gQ29tcHV0ZSBtZWRpYW4gcmVwbGljYXRlIGNvcnJlbGF0aW9uIGZvciBlYWNoIHNldAotIENvbXB1dGUgZGlzdGFuY2UgZnJvbSBjb250cm9sIGZvciBlYWNoIHNldCAtIHRoaXMgaXMganVzdCB0aGUgc2NhbGluZyB2YWx1ZQoKYGBge3J9CgpkZiA8LQogIG1hcF9kZigxOm5fZHJhd3MsCiAgICAgICAgIGZ1bmN0aW9uKGRyYXcpIHsKICAgICAgICAgICBiIDwtCiAgICAgICAgICAgICBzY2FsZShtYXRyaXgocm5vcm0obiAqIGQpLCBuLCBkKSkgIyBzYW1wbGUgZnJvbSBkLWRpbSBnYXVzc2lvbgoKICAgICAgICAgICBtYXBfZGYoMTpuX2JpYXNlcywKICAgICAgICAgICAgICAgICAgZnVuY3Rpb24oYmlhcykgewogICAgICAgICAgICAgICAgICAgIGJzaGlmdCA8LSBybm9ybShkKSAjIGNyZWF0ZSBzaGlmdCB2ZWN0b3IKCiAgICAgICAgICAgICAgICAgICAgYnNoaWZ0IDwtIGQgKiBic2hpZnQgLyBub3JtKGFzLm1hdHJpeChic2hpZnQpKQogICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgIG1hcF9kZihzY2FsaW5ncywKICAgICAgICAgICAgICAgICAgICAgICAgICAgZnVuY3Rpb24oYnNoaWZ0X3NjYWxlKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyBzY2FsZSB0aGUgc2hpZnQgdmVjdG9yIGFuZCB0aGVuIGFkZCB0byBzYW1wbGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBiaSA8LSBzY2FsZShiLCBjZW50ZXIgPSAtYnNoaWZ0ICogYnNoaWZ0X3NjYWxlLCBzY2FsZSA9IEZBTFNFKSAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIG5vcm1hbGl6ZSB0aGUgc2FtcGxlcyB3cnQgY29udHJvbAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIGJuIDwtIHNjYWxlKGJpLCBjZW50ZXIgPSBhbWVhbiwgc2NhbGUgPSBhc2QpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGF0YS5mcmFtZShkcmF3LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYmlhcywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGJzaGlmdF9zY2FsZSwgIyBkaXN0YW5jZSBmcm9tIGNvbnRyb2wKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lZHJlcGNvciA9IG1lZGlhbihjb3J2ZWModChibikpKSAjIG1lZGlhbiByZXBsaWNhdGUgY29ycmVsYXRpb24KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICkKICAgICAgICAgICAgICAgICAgICAgICAgICAgfSkKICAgICAgICAgICAgICAgICAgfSkKICAgICAgICAgfSkKYGBgCgojIFBsb3QgZGF0YQoKIyMgUGxvdCBhY2Nyb3NzIGFsbCBzaGlmdHMgYW5kIGRyYXdzCgpgYGB7cn0KZ2dwbG90KGRmLCBhZXMoYnNoaWZ0X3NjYWxlLCBtZWRyZXBjb3IsIGdyb3VwID0gaW50ZXJhY3Rpb24oZHJhdywgYmlhcykpKSArIAogIGdlb21fbGluZShhbHBoYSA9IDAuMSkgKyAKICB4bGFiKCJEaXN0YW5jZSBmcm9tIGNvbnRyb2wiKSArIAogIHlsYWIoIk1lZGlhbiByZXBsaWNhdGUgY29ycmVsYXRpb24iKQpgYGAKIyMgUGxvdCBhY2Nyb3NzIGFsbCBzaGlmdHMgZ3JvdXBlZCBieSBkcmF3cwoKYGBge3J9CmRmICU+JQogIGZpbHRlcihkcmF3ICVpbiUgc2FtcGxlKDE6bl9kcmF3cywgbWluKDEyLCBuX2RyYXdzKSkpICU+JQogIGdncGxvdChhZXMoYnNoaWZ0X3NjYWxlLCBtZWRyZXBjb3IsIGdyb3VwID0gYmlhcykpICsgCiAgZ2VvbV9saW5lKGFscGhhID0gMC4xKSArIAogIHhsYWIoIkRpc3RhbmNlIGZyb20gY29udHJvbCIpICsgCiAgeWxhYigiTWVkaWFuIHJlcGxpY2F0ZSBjb3JyZWxhdGlvbiIpICsgCiAgZmFjZXRfd3JhcCh+ZHJhdywgbGFiZWxsZXIgPSAibGFiZWxfYm90aCIpCmBgYAojIyBQbG90IGFjY3Jvc3MgYWxsIGRyYXdzIGdyb3VwZWQgYnkgc2hpZnRzCgpgYGB7cn0KZGYgJT4lCiAgZmlsdGVyKGJpYXMgJWluJSBzYW1wbGUoMTpuX2JpYXNlcywgbWluKDEyLCBuX2JpYXNlcykpKSAlPiUKICBnZ3Bsb3QoYWVzKGJzaGlmdF9zY2FsZSwgbWVkcmVwY29yLCBncm91cCA9IGRyYXcpKSArIAogIGdlb21fbGluZShhbHBoYSA9IDAuMSkgKyAKICB4bGFiKCJEaXN0YW5jZSBmcm9tIGNvbnRyb2wiKSArIAogIHlsYWIoIk1lZGlhbiByZXBsaWNhdGUgY29ycmVsYXRpb24iKSArIAogIGZhY2V0X3dyYXAofmJpYXMsIGxhYmVsbGVyID0gImxhYmVsX2JvdGgiKQpgYGAKCiMgR2VuZXJhdGUgZGF0YSBmb3IgMiByZXBsaWNhdGVzIGFuZCBzY2FsZSB0aGUgc2hpZnQKCmBgYHtyfQpkIDwtIDEwICMgZGltZW5zaW9ucwoKYjAgPC0gbWF0cml4KHJub3JtKDIgKiBkKSwgMiwgZCkgCgpic2hpZnQgPC0gcm5vcm0oZCkKCmJzaGlmdCA8LSBic2hpZnQgLyBub3JtKGFzLm1hdHJpeChic2hpZnQpKQoKbm9ybShhcy5tYXRyaXgoYnNoaWZ0KSkKYGBgCgoKYGBge3J9CmJkIDwtIG1hcF9kZihzZXEoMjApLAogICAgICAgICAgICAgZnVuY3Rpb24oYnNoaWZ0X3NjYWxlKSB7CiAgICAgICAgICAgICAgIGJpIDwtIHN3ZWVwKGIwLCAyLCBic2hpZnQgKiBic2hpZnRfc2NhbGUsICIrIikKICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgIGJpbmRfcm93cygKICAgICAgICAgICAgICAgICBhcy5kYXRhLmZyYW1lKHQoYjApKSAlPiUgbXV0YXRlKHR5cGUgPSAib3JpZyIpLAogICAgICAgICAgICAgICAgIGFzLmRhdGEuZnJhbWUodChiaSkpICU+JSBtdXRhdGUodHlwZSA9ICJzaGlmdGVkIikKICAgICAgICAgICAgICAgKSAlPiUKICAgICAgICAgICAgICAgICBtdXRhdGUoYnNoaWZ0X3NjYWxlID0gYnNoaWZ0X3NjYWxlKQogICAgICAgICAgICAgfSkKCmBgYAoKIyBQbG90IGRhdGEKCmBgYHtyfQpnZ3Bsb3QoYmQsIAogICAgICAgYWVzKFYxLCBWMiwgY29sb3IgPSB0eXBlKSkgKyAKICBnZW9tX3BvaW50KCkgKyBmYWNldF93cmFwKH5ic2hpZnRfc2NhbGUsIGxhYmVsbGVyID0gImxhYmVsX2JvdGgiKQpgYGAKCmBgYHtyfQpiZCAlPiUgCiAgZmlsdGVyKHR5cGUgPT0gInNoaWZ0ZWQiKSAlPiUKICBncm91cF9ieShic2hpZnRfc2NhbGUpICU+JSAKICBzdW1tYXJpc2UoY29ydmFsID0gY29yKFYxLCBWMiksIC5ncm91cHMgPSAia2VlcCIpICU+JQogIGdncGxvdChhZXMoYnNoaWZ0X3NjYWxlLCBjb3J2YWwpKSArIGdlb21fbGluZSgpICsKICB4bGFiKCJEaXN0YW5jZSBmcm9tIGNvbnRyb2wiKSArIAogIHlsYWIoIk1lZGlhbiByZXBsaWNhdGUgY29ycmVsYXRpb24iKQoKYGBgCmBgYHtyfQp1bml0X3ZlYyA8LSBmdW5jdGlvbih2ZWMpewogICByZXR1cm4odmVjLyhzcXJ0KHN1bSh2ZWMqKjIsbmEucm09VFJVRSkpKSkKfQoKZCA8LSAxMDAwCnggPC0gcm5vcm0oZCkKeSA8LSBybm9ybShkKQoKZGYgPC0gCm1hcF9kZigxOjEwMDAsIAogICAgICAgZnVuY3Rpb24oaSkgewogICAgICAgICBiIDwtIHVuaXRfdmVjKHJub3JtKGQpKSAqIHNxcnQoZCkKICAgICAgICAgZGF0YS5mcmFtZShjb3J2YWxfeHkgPSBjb3IoeCtiLCB5K2IpLCBjb3J2YWxfYm1lYW54eSA9IGNvcihiLCAoeCt5KS8yKSkKICAgICAgIH0KICAgICAgICkKCmdncGxvdChkZiwgYWVzKGNvcnZhbF94eSwgY29ydmFsX2JtZWFueHkpKSArIGdlb21fcG9pbnQoKQpgYGAKCgo=