rbvBern <- function(n, p1, p2, rho12) {
t1 <- qnorm(1 - p1) #defining thresholds
t2 <- qnorm(1 - p2)
corr_diff <- function(r) {
p11 <- mvtnorm::pmvnorm( #Joint prob of P(X1=1,X2=1)
lower = c(t1, t2),
upper = c(Inf, Inf),
mean = c(0, 0),
sigma = matrix(c(1, r, r, 1), 2, 2)
)[1]
#Bernoulli correlation
achieved_corr <- (p11 - p1*p2) / sqrt(p1*(1-p1)*p2*(1-p2))
achieved_corr - rho12
}
#solving for correlation r
r_sol <- uniroot(corr_diff, interval = c(-0.99, 0.99))$root
#Generating BVN data
Sigma <- matrix(c(1, r_sol, r_sol, 1), 2, 2)
Z <- mvtnorm::rmvnorm(n, mean = c(0,0), sigma = Sigma)
#Threshold defining to get Bernoulli variables
X1 <- as.numeric(Z[,1] > t1)
X2 <- as.numeric(Z[,2] > t2)
data.frame(X1 = X1, X2 = X2)
}
#simulation to check if my fn works
set.seed(123)
n <- 10000
p1 <- 0.2
p2 <- 0.4
rho12 <- 0.6
sim <- rbvBern(n, p1, p2, rho12)
mean(sim$X1) #Target 0.2
## [1] 0.1979
mean(sim$X2) #Target 0.4
## [1] 0.4002
cor(sim$X1, sim$X2) #Target 0.6
## [1] 0.5942657