suppressPackageStartupMessages(library(magrittr))
suppressPackageStartupMessages(library(tidyverse))

1 Generate data

n <- 5 # observations
d <- 100 # dimensions
eps <- .5 # standard deviation
n_draws <- 10 # number of draws i.e. samples from from d-dim gaussian
n_biases <- 10 # number of shifts i.e. vectors to shift each sample
n_scalings <- 20 # number of scalings i.e. scaling factors for each shift
scaling_max <- 3
scalings <- scaling_max * seq(n_scalings) / n_scalings # scaling factors for each shift

Function to compute vector of cross-correlation values

corvec <- function(m) {cm <- cor(m); cm[upper.tri(cm)]}

1.1 Generate control distribution

a <- matrix(rnorm(n*d*10), n*10, d) * eps # sample from d-dim Gaussian

amean <- colMeans(a)

asd <- apply(a, 2, sd)

1.2 Generate treatment distributions and compute metrics

  • Generate n_draws x n_biases x n_scalings sets of samples from a d-dim Gaussian, with n samples per set.
  • Compute median replicate correlation for each set
  • Compute distance from control for each set - this is just the scaling value

df <-
  map_df(1:n_draws,
         function(draw) {
           b <-
             scale(matrix(rnorm(n * d), n, d)) # sample from d-dim gaussion

           map_df(1:n_biases,
                  function(bias) {
                    bshift <- rnorm(d) # create shift vector

                    bshift <- d * bshift / norm(as.matrix(bshift))
                    
                    map_df(scalings,
                           function(bshift_scale) {
                             # scale the shift vector and then add to sample
                             bi <- scale(b, center = -bshift * bshift_scale, scale = FALSE) 
                             
                             # normalize the samples wrt control
                             bn <- scale(bi, center = amean, scale = asd)
                             
                             data.frame(draw,
                                        bias,
                                        bshift_scale, # distance from control
                                        medrepcor = median(corvec(t(bn))) # median replicate correlation
                                        )
                           })
                  })
         })

2 Plot data

2.1 Plot accross all shifts and draws

ggplot(df, aes(bshift_scale, medrepcor, group = interaction(draw, bias))) + 
  geom_line(alpha = 0.1) + 
  xlab("Distance from control") + 
  ylab("Median replicate correlation")

2.2 Plot accross all shifts grouped by draws

df %>%
  filter(draw %in% sample(1:n_draws, min(12, n_draws))) %>%
  ggplot(aes(bshift_scale, medrepcor, group = bias)) + 
  geom_line(alpha = 0.1) + 
  xlab("Distance from control") + 
  ylab("Median replicate correlation") + 
  facet_wrap(~draw, labeller = "label_both")

2.3 Plot accross all draws grouped by shifts

df %>%
  filter(bias %in% sample(1:n_biases, min(12, n_biases))) %>%
  ggplot(aes(bshift_scale, medrepcor, group = draw)) + 
  geom_line(alpha = 0.1) + 
  xlab("Distance from control") + 
  ylab("Median replicate correlation") + 
  facet_wrap(~bias, labeller = "label_both")

3 Generate data for 2 replicates and scale the shift

d <- 10 # dimensions

b0 <- matrix(rnorm(2 * d), 2, d) 

bshift <- rnorm(d)

bshift <- bshift / norm(as.matrix(bshift))

norm(as.matrix(bshift))
[1] 1
bd <- map_df(seq(20),
             function(bshift_scale) {
               bi <- sweep(b0, 2, bshift * bshift_scale, "+")
               
               bind_rows(
                 as.data.frame(t(b0)) %>% mutate(type = "orig"),
                 as.data.frame(t(bi)) %>% mutate(type = "shifted")
               ) %>%
                 mutate(bshift_scale = bshift_scale)
             })

4 Plot data

ggplot(bd, 
       aes(V1, V2, color = type)) + 
  geom_point() + facet_wrap(~bshift_scale, labeller = "label_both")

bd %>% 
  filter(type == "shifted") %>%
  group_by(bshift_scale) %>% 
  summarise(corval = cor(V1, V2), .groups = "keep") %>%
  ggplot(aes(bshift_scale, corval)) + geom_line() +
  xlab("Distance from control") + 
  ylab("Median replicate correlation")

unit_vec <- function(vec){
   return(vec/(sqrt(sum(vec**2,na.rm=TRUE))))
}

d <- 1000
x <- rnorm(d)
y <- rnorm(d)

df <- 
map_df(1:1000, 
       function(i) {
         b <- unit_vec(rnorm(d)) * sqrt(d)
         data.frame(corval_xy = cor(x+b, y+b), corval_bmeanxy = cor(b, (x+y)/2))
       }
       )

ggplot(df, aes(corval_xy, corval_bmeanxy)) + geom_point()

LS0tCnRpdGxlOiAiUGVhcnNvbiBzaW1pbGFyaXR5IGFuZCBkaXN0YW5jZSBmcm9tIGNvbnRyb2wiCm91dHB1dDogCiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICB0b2NfZGVwdGg6IDMKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogICAgdGhlbWU6IGx1bWVuCi0tLQoKCmBgYHtyfQpzdXBwcmVzc1BhY2thZ2VTdGFydHVwTWVzc2FnZXMobGlicmFyeShtYWdyaXR0cikpCnN1cHByZXNzUGFja2FnZVN0YXJ0dXBNZXNzYWdlcyhsaWJyYXJ5KHRpZHl2ZXJzZSkpCmBgYAoKIyBHZW5lcmF0ZSBkYXRhCgpgYGB7cn0KbiA8LSA1ICMgb2JzZXJ2YXRpb25zCmQgPC0gMTAwICMgZGltZW5zaW9ucwplcHMgPC0gLjUgIyBzdGFuZGFyZCBkZXZpYXRpb24KYGBgCgpgYGB7cn0Kbl9kcmF3cyA8LSAxMCAjIG51bWJlciBvZiBkcmF3cyBpLmUuIHNhbXBsZXMgZnJvbSBmcm9tIGQtZGltIGdhdXNzaWFuCm5fYmlhc2VzIDwtIDEwICMgbnVtYmVyIG9mIHNoaWZ0cyBpLmUuIHZlY3RvcnMgdG8gc2hpZnQgZWFjaCBzYW1wbGUKbl9zY2FsaW5ncyA8LSAyMCAjIG51bWJlciBvZiBzY2FsaW5ncyBpLmUuIHNjYWxpbmcgZmFjdG9ycyBmb3IgZWFjaCBzaGlmdApzY2FsaW5nX21heCA8LSAzCnNjYWxpbmdzIDwtIHNjYWxpbmdfbWF4ICogc2VxKG5fc2NhbGluZ3MpIC8gbl9zY2FsaW5ncyAjIHNjYWxpbmcgZmFjdG9ycyBmb3IgZWFjaCBzaGlmdApgYGAKCkZ1bmN0aW9uIHRvIGNvbXB1dGUgdmVjdG9yIG9mIGNyb3NzLWNvcnJlbGF0aW9uIHZhbHVlcwoKYGBge3J9CmNvcnZlYyA8LSBmdW5jdGlvbihtKSB7Y20gPC0gY29yKG0pOyBjbVt1cHBlci50cmkoY20pXX0KYGBgCgojIyBHZW5lcmF0ZSBjb250cm9sIGRpc3RyaWJ1dGlvbgoKYGBge3J9CmEgPC0gbWF0cml4KHJub3JtKG4qZCoxMCksIG4qMTAsIGQpICogZXBzICMgc2FtcGxlIGZyb20gZC1kaW0gR2F1c3NpYW4KCmFtZWFuIDwtIGNvbE1lYW5zKGEpCgphc2QgPC0gYXBwbHkoYSwgMiwgc2QpCgpgYGAKCiMjIEdlbmVyYXRlIHRyZWF0bWVudCBkaXN0cmlidXRpb25zIGFuZCBjb21wdXRlIG1ldHJpY3MKCi0gR2VuZXJhdGUgYG5fZHJhd3NgIHggYG5fYmlhc2VzYCB4IGBuX3NjYWxpbmdzYCBzZXRzIG9mIHNhbXBsZXMgZnJvbSBhIGQtZGltIEdhdXNzaWFuLCB3aXRoIGBuYCBzYW1wbGVzIHBlciBzZXQuCi0gQ29tcHV0ZSBtZWRpYW4gcmVwbGljYXRlIGNvcnJlbGF0aW9uIGZvciBlYWNoIHNldAotIENvbXB1dGUgZGlzdGFuY2UgZnJvbSBjb250cm9sIGZvciBlYWNoIHNldCAtIHRoaXMgaXMganVzdCB0aGUgc2NhbGluZyB2YWx1ZQoKYGBge3J9CgpkZiA8LQogIG1hcF9kZigxOm5fZHJhd3MsCiAgICAgICAgIGZ1bmN0aW9uKGRyYXcpIHsKICAgICAgICAgICBiIDwtCiAgICAgICAgICAgICBzY2FsZShtYXRyaXgocm5vcm0obiAqIGQpLCBuLCBkKSkgIyBzYW1wbGUgZnJvbSBkLWRpbSBnYXVzc2lvbgoKICAgICAgICAgICBtYXBfZGYoMTpuX2JpYXNlcywKICAgICAgICAgICAgICAgICAgZnVuY3Rpb24oYmlhcykgewogICAgICAgICAgICAgICAgICAgIGJzaGlmdCA8LSBybm9ybShkKSAjIGNyZWF0ZSBzaGlmdCB2ZWN0b3IKCiAgICAgICAgICAgICAgICAgICAgYnNoaWZ0IDwtIGQgKiBic2hpZnQgLyBub3JtKGFzLm1hdHJpeChic2hpZnQpKQogICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgIG1hcF9kZihzY2FsaW5ncywKICAgICAgICAgICAgICAgICAgICAgICAgICAgZnVuY3Rpb24oYnNoaWZ0X3NjYWxlKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyBzY2FsZSB0aGUgc2hpZnQgdmVjdG9yIGFuZCB0aGVuIGFkZCB0byBzYW1wbGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBiaSA8LSBzY2FsZShiLCBjZW50ZXIgPSAtYnNoaWZ0ICogYnNoaWZ0X3NjYWxlLCBzY2FsZSA9IEZBTFNFKSAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIG5vcm1hbGl6ZSB0aGUgc2FtcGxlcyB3cnQgY29udHJvbAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIGJuIDwtIHNjYWxlKGJpLCBjZW50ZXIgPSBhbWVhbiwgc2NhbGUgPSBhc2QpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGF0YS5mcmFtZShkcmF3LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYmlhcywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGJzaGlmdF9zY2FsZSwgIyBkaXN0YW5jZSBmcm9tIGNvbnRyb2wKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1lZHJlcGNvciA9IG1lZGlhbihjb3J2ZWModChibikpKSAjIG1lZGlhbiByZXBsaWNhdGUgY29ycmVsYXRpb24KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICkKICAgICAgICAgICAgICAgICAgICAgICAgICAgfSkKICAgICAgICAgICAgICAgICAgfSkKICAgICAgICAgfSkKYGBgCgojIFBsb3QgZGF0YQoKIyMgUGxvdCBhY2Nyb3NzIGFsbCBzaGlmdHMgYW5kIGRyYXdzCgpgYGB7cn0KZ2dwbG90KGRmLCBhZXMoYnNoaWZ0X3NjYWxlLCBtZWRyZXBjb3IsIGdyb3VwID0gaW50ZXJhY3Rpb24oZHJhdywgYmlhcykpKSArIAogIGdlb21fbGluZShhbHBoYSA9IDAuMSkgKyAKICB4bGFiKCJEaXN0YW5jZSBmcm9tIGNvbnRyb2wiKSArIAogIHlsYWIoIk1lZGlhbiByZXBsaWNhdGUgY29ycmVsYXRpb24iKQpgYGAKIyMgUGxvdCBhY2Nyb3NzIGFsbCBzaGlmdHMgZ3JvdXBlZCBieSBkcmF3cwoKYGBge3J9CmRmICU+JQogIGZpbHRlcihkcmF3ICVpbiUgc2FtcGxlKDE6bl9kcmF3cywgbWluKDEyLCBuX2RyYXdzKSkpICU+JQogIGdncGxvdChhZXMoYnNoaWZ0X3NjYWxlLCBtZWRyZXBjb3IsIGdyb3VwID0gYmlhcykpICsgCiAgZ2VvbV9saW5lKGFscGhhID0gMC4xKSArIAogIHhsYWIoIkRpc3RhbmNlIGZyb20gY29udHJvbCIpICsgCiAgeWxhYigiTWVkaWFuIHJlcGxpY2F0ZSBjb3JyZWxhdGlvbiIpICsgCiAgZmFjZXRfd3JhcCh+ZHJhdywgbGFiZWxsZXIgPSAibGFiZWxfYm90aCIpCmBgYAojIyBQbG90IGFjY3Jvc3MgYWxsIGRyYXdzIGdyb3VwZWQgYnkgc2hpZnRzCgpgYGB7cn0KZGYgJT4lCiAgZmlsdGVyKGJpYXMgJWluJSBzYW1wbGUoMTpuX2JpYXNlcywgbWluKDEyLCBuX2JpYXNlcykpKSAlPiUKICBnZ3Bsb3QoYWVzKGJzaGlmdF9zY2FsZSwgbWVkcmVwY29yLCBncm91cCA9IGRyYXcpKSArIAogIGdlb21fbGluZShhbHBoYSA9IDAuMSkgKyAKICB4bGFiKCJEaXN0YW5jZSBmcm9tIGNvbnRyb2wiKSArIAogIHlsYWIoIk1lZGlhbiByZXBsaWNhdGUgY29ycmVsYXRpb24iKSArIAogIGZhY2V0X3dyYXAofmJpYXMsIGxhYmVsbGVyID0gImxhYmVsX2JvdGgiKQpgYGAKCiMgR2VuZXJhdGUgZGF0YSBmb3IgMiByZXBsaWNhdGVzIGFuZCBzY2FsZSB0aGUgc2hpZnQKCmBgYHtyfQpkIDwtIDEwICMgZGltZW5zaW9ucwoKYjAgPC0gbWF0cml4KHJub3JtKDIgKiBkKSwgMiwgZCkgCgpic2hpZnQgPC0gcm5vcm0oZCkKCmJzaGlmdCA8LSBic2hpZnQgLyBub3JtKGFzLm1hdHJpeChic2hpZnQpKQoKbm9ybShhcy5tYXRyaXgoYnNoaWZ0KSkKYGBgCgoKYGBge3J9CmJkIDwtIG1hcF9kZihzZXEoMjApLAogICAgICAgICAgICAgZnVuY3Rpb24oYnNoaWZ0X3NjYWxlKSB7CiAgICAgICAgICAgICAgIGJpIDwtIHN3ZWVwKGIwLCAyLCBic2hpZnQgKiBic2hpZnRfc2NhbGUsICIrIikKICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgIGJpbmRfcm93cygKICAgICAgICAgICAgICAgICBhcy5kYXRhLmZyYW1lKHQoYjApKSAlPiUgbXV0YXRlKHR5cGUgPSAib3JpZyIpLAogICAgICAgICAgICAgICAgIGFzLmRhdGEuZnJhbWUodChiaSkpICU+JSBtdXRhdGUodHlwZSA9ICJzaGlmdGVkIikKICAgICAgICAgICAgICAgKSAlPiUKICAgICAgICAgICAgICAgICBtdXRhdGUoYnNoaWZ0X3NjYWxlID0gYnNoaWZ0X3NjYWxlKQogICAgICAgICAgICAgfSkKCmBgYAoKIyBQbG90IGRhdGEKCmBgYHtyfQpnZ3Bsb3QoYmQsIAogICAgICAgYWVzKFYxLCBWMiwgY29sb3IgPSB0eXBlKSkgKyAKICBnZW9tX3BvaW50KCkgKyBmYWNldF93cmFwKH5ic2hpZnRfc2NhbGUsIGxhYmVsbGVyID0gImxhYmVsX2JvdGgiKQpgYGAKCmBgYHtyfQpiZCAlPiUgCiAgZmlsdGVyKHR5cGUgPT0gInNoaWZ0ZWQiKSAlPiUKICBncm91cF9ieShic2hpZnRfc2NhbGUpICU+JSAKICBzdW1tYXJpc2UoY29ydmFsID0gY29yKFYxLCBWMiksIC5ncm91cHMgPSAia2VlcCIpICU+JQogIGdncGxvdChhZXMoYnNoaWZ0X3NjYWxlLCBjb3J2YWwpKSArIGdlb21fbGluZSgpICsKICB4bGFiKCJEaXN0YW5jZSBmcm9tIGNvbnRyb2wiKSArIAogIHlsYWIoIk1lZGlhbiByZXBsaWNhdGUgY29ycmVsYXRpb24iKQoKYGBgCmBgYHtyfQp1bml0X3ZlYyA8LSBmdW5jdGlvbih2ZWMpewogICByZXR1cm4odmVjLyhzcXJ0KHN1bSh2ZWMqKjIsbmEucm09VFJVRSkpKSkKfQoKZCA8LSAxMDAwCnggPC0gcm5vcm0oZCkKeSA8LSBybm9ybShkKQoKZGYgPC0gCm1hcF9kZigxOjEwMDAsIAogICAgICAgZnVuY3Rpb24oaSkgewogICAgICAgICBiIDwtIHVuaXRfdmVjKHJub3JtKGQpKSAqIHNxcnQoZCkKICAgICAgICAgZGF0YS5mcmFtZShjb3J2YWxfeHkgPSBjb3IoeCtiLCB5K2IpLCBjb3J2YWxfYm1lYW54eSA9IGNvcihiLCAoeCt5KS8yKSkKICAgICAgIH0KICAgICAgICkKCmdncGxvdChkZiwgYWVzKGNvcnZhbF94eSwgY29ydmFsX2JtZWFueHkpKSArIGdlb21fcG9pbnQoKQpgYGAKCgo=