The message here is: don’t confuse (a) measuring similarities between feature vectors using Pearson correlation with (b) measuring correlation among features

suppressPackageStartupMessages(library(magrittr))
suppressPackageStartupMessages(library(tidyverse))

1 Generate data

n <- 100 # observatins
d <- 10 # dimensions
eps <- .5 # standard deviation

Function to compute vector of cross-correlation values

corvec <- function(m) {cm <- cor(m); cm[upper.tri(cm)]}

Generate data

x <- rnorm(d) # bias

m <- matrix(rnorm(n*d), n, d) * eps # sample from d-dim gaussion

mx <- sweep(m, 2, x, "+") # add bias to sample

mxn <- sweep(mx, 2, colMeans(mx), "-") # remove mean from biased data

2 Pearson similarity

Create data frame of pearson similarities between samples of the 3 noise, biased data, mean-subtracted data

df <- data.frame(
  noise = corvec(t(m)),
  signal = corvec(t(mx)),
  meansub = corvec(t(mxn))
  ) 

dim(df) 
[1] 4950    3
df %<>% 
  pivot_longer(everything())

Plot data

ggplot(df, aes(x=value, y=..scaled.., fill = name)) + geom_density(alpha = 0.5)


ggplot(df, aes(x=name, y=value)) + geom_violin()

3 Cross-correlation

Create data frame of cross-correlations of noise, biased data, mean-subtracted data

df <- data.frame(
  noise = corvec((m)),
  signal = corvec((mx)),
  meansub = corvec((mxn))
  ) 

dim(df) 
[1] 45  3
df %<>% 
  pivot_longer(everything()) 

Plot data

ggplot(df, aes(x=value, y=..scaled.., fill = name)) + geom_density(alpha = 0.5)


ggplot(df, aes(x=name, y=value)) + geom_violin()

LS0tCnRpdGxlOiAiQ3Jvc3MtY29ycmVsYXRpb25zIGFuZCBQZWFyc29uIHNpbWlsYXJpdGllcyIKb3V0cHV0OiAKICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRvY19kZXB0aDogMwogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgICB0aGVtZTogbHVtZW4KLS0tCgpUaGUgbWVzc2FnZSBoZXJlIGlzOiBkb24ndCBjb25mdXNlIAooYSkgbWVhc3VyaW5nIHNpbWlsYXJpdGllcyBiZXR3ZWVuIGZlYXR1cmUgdmVjdG9ycyB1c2luZyBQZWFyc29uIGNvcnJlbGF0aW9uIAp3aXRoIAooYikgbWVhc3VyaW5nIGNvcnJlbGF0aW9uIGFtb25nIGZlYXR1cmVzCgoKYGBge3J9CnN1cHByZXNzUGFja2FnZVN0YXJ0dXBNZXNzYWdlcyhsaWJyYXJ5KG1hZ3JpdHRyKSkKc3VwcHJlc3NQYWNrYWdlU3RhcnR1cE1lc3NhZ2VzKGxpYnJhcnkodGlkeXZlcnNlKSkKYGBgCgoKIyBHZW5lcmF0ZSBkYXRhCgpgYGB7cn0KbiA8LSAxMDAgIyBvYnNlcnZhdGlucwpkIDwtIDEwICMgZGltZW5zaW9ucwplcHMgPC0gLjUgIyBzdGFuZGFyZCBkZXZpYXRpb24KYGBgCgpGdW5jdGlvbiB0byBjb21wdXRlIHZlY3RvciBvZiBjcm9zcy1jb3JyZWxhdGlvbiB2YWx1ZXMKCmBgYHtyfQpjb3J2ZWMgPC0gZnVuY3Rpb24obSkge2NtIDwtIGNvcihtKTsgY21bdXBwZXIudHJpKGNtKV19CmBgYAoKR2VuZXJhdGUgZGF0YQoKYGBge3J9CnggPC0gcm5vcm0oZCkgIyBiaWFzCgptIDwtIG1hdHJpeChybm9ybShuKmQpLCBuLCBkKSAqIGVwcyAjIHNhbXBsZSBmcm9tIGQtZGltIGdhdXNzaW9uCgpteCA8LSBzd2VlcChtLCAyLCB4LCAiKyIpICMgYWRkIGJpYXMgdG8gc2FtcGxlCgpteG4gPC0gc3dlZXAobXgsIDIsIGNvbE1lYW5zKG14KSwgIi0iKSAjIHJlbW92ZSBtZWFuIGZyb20gYmlhc2VkIGRhdGEKCmBgYAoKIyBQZWFyc29uIHNpbWlsYXJpdHkKCkNyZWF0ZSBkYXRhIGZyYW1lIG9mIHBlYXJzb24gc2ltaWxhcml0aWVzIGJldHdlZW4gc2FtcGxlcyBvZiB0aGUgMyAgbm9pc2UsIGJpYXNlZCBkYXRhLCBtZWFuLXN1YnRyYWN0ZWQgZGF0YQoKYGBge3J9CmRmIDwtIGRhdGEuZnJhbWUoCiAgbm9pc2UgPSBjb3J2ZWModChtKSksCiAgc2lnbmFsID0gY29ydmVjKHQobXgpKSwKICBtZWFuc3ViID0gY29ydmVjKHQobXhuKSkKICApIAoKZGltKGRmKSAKCmRmICU8PiUgCiAgcGl2b3RfbG9uZ2VyKGV2ZXJ5dGhpbmcoKSkKYGBgCgpQbG90IGRhdGEKCmBgYHtyfQpnZ3Bsb3QoZGYsIGFlcyh4PXZhbHVlLCB5PS4uc2NhbGVkLi4sIGZpbGwgPSBuYW1lKSkgKyBnZW9tX2RlbnNpdHkoYWxwaGEgPSAwLjUpCgpnZ3Bsb3QoZGYsIGFlcyh4PW5hbWUsIHk9dmFsdWUpKSArIGdlb21fdmlvbGluKCkKYGBgCgojIENyb3NzLWNvcnJlbGF0aW9uCgpDcmVhdGUgZGF0YSBmcmFtZSBvZiBjcm9zcy1jb3JyZWxhdGlvbnMgb2Ygbm9pc2UsIGJpYXNlZCBkYXRhLCBtZWFuLXN1YnRyYWN0ZWQgZGF0YQoKYGBge3J9CmRmIDwtIGRhdGEuZnJhbWUoCiAgbm9pc2UgPSBjb3J2ZWMoKG0pKSwKICBzaWduYWwgPSBjb3J2ZWMoKG14KSksCiAgbWVhbnN1YiA9IGNvcnZlYygobXhuKSkKICApIAoKZGltKGRmKSAKCmRmICU8PiUgCiAgcGl2b3RfbG9uZ2VyKGV2ZXJ5dGhpbmcoKSkgCmBgYAoKUGxvdCBkYXRhCgpgYGB7cn0KZ2dwbG90KGRmLCBhZXMoeD12YWx1ZSwgeT0uLnNjYWxlZC4uLCBmaWxsID0gbmFtZSkpICsgZ2VvbV9kZW5zaXR5KGFscGhhID0gMC41KQoKZ2dwbG90KGRmLCBhZXMoeD1uYW1lLCB5PXZhbHVlKSkgKyBnZW9tX3Zpb2xpbigpCmBgYAo=