You can find the source code to this simulation here

Setup and generate simulation

library(tidyverse)
set.seed(42)
N = 10000
# X and D cause Y
# X -> Y <- D
D = rnorm(N)
X = rnorm(N)
Y = 1/2 * D + 1/2 * X + rnorm(N)
df = data.frame(
    D=D,
    X=X,
    Y=Y
)

Population correlation between \(D\) and \(X\)

cor.test(df$D, df$X)
## 
##  Pearson's product-moment correlation
## 
## data:  df$D and df$X
## t = -0.72811, df = 9998, p-value = 0.4666
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02687784  0.01232022
## sample estimates:
##          cor 
## -0.007281603

Correlation between \(D\) and \(X\) after conditioning on \(Y\)

collider_df = df %>% filter(Y > 1)
cor.test(collider_df$D, collider_df$X)
## 
##  Pearson's product-moment correlation
## 
## data:  collider_df$D and collider_df$X
## t = -7.8246, df = 2087, p-value = 8.028e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2101837 -0.1268501
## sample estimates:
##        cor 
## -0.1688186

Population correlation between \(D\) and \(Y\)

cor.test(df$D, df$Y)
## 
##  Pearson's product-moment correlation
## 
## data:  df$D and df$Y
## t = 44.037, df = 9998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3865096 0.4193436
## sample estimates:
##       cor 
## 0.4030563

Correlation between \(D\) and \(Y\) after conditioning on \(Y\)

cor.test(collider_df$D, collider_df$Y)
## 
##  Pearson's product-moment correlation
## 
## data:  collider_df$D and collider_df$Y
## t = 11.272, df = 2087, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1987125 0.2795724
## sample estimates:
##       cor 
## 0.2395579

Visualize

collider_df = df %>% filter(Y > 1)
cor_df = data.frame(
    name=c(
        'cor(D, X)',
        'cor(D, X)',
        'cor(D, Y)',
        'cor(D, Y)'
    ),
    category=c(
        'Population',
        'Given Y > 1',
        'Population',
        'Given Y > 1'
    ),
    val=c(
        cor(df$D, df$X),
        cor(collider_df$D, collider_df$X),
        cor(df$D, df$Y),
        cor(collider_df$D, collider_df$Y)
    ),
    ucl=c(
        cor.test(df$D, df$X)$conf.int[1],
        cor.test(collider_df$D, collider_df$X)$conf.int[1],
        cor.test(df$D, df$Y)$conf.int[1],
        cor.test(collider_df$D, collider_df$Y)$conf.int[1]
    ),
    lcl=c(
        cor.test(df$D, df$X)$conf.int[2],
        cor.test(collider_df$D, collider_df$X)$conf.int[2],
        cor.test(df$D, df$Y)$conf.int[2],
        cor.test(collider_df$D, collider_df$Y)$conf.int[2]
    )
)
cor_df %>%
    ggplot(aes(x=name, y=val, color=category)) +
    geom_hline(yintercept=0, linetype='dashed', alpha=0.4) +
    geom_point(position=position_dodge(width=0.3)) +
    geom_errorbar(
        aes(ymin=lcl, ymax=ucl),
        position=position_dodge(width=0.3),
        width=0.3
    ) +
    ylim(-0.5, 0.5) +
    theme_minimal() +
    theme(
        panel.grid.minor=element_blank()
    )