Collider example

You can find the source code to this simulation here

Setup and generate simulation

library(tidyverse)
set.seed(42)
N = 10000
# X and D cause Y
# X -> Y <- D
D = rnorm(N)
X = rnorm(N)
Y = 1/2 * D + 1/2 * X + rnorm(N)
df = data.frame(
    D=D,
    X=X,
    Y=Y
)

Population correlation between \(D\) and \(X\)

cor.test(df$D, df$X)

## 
##  Pearson's product-moment correlation
## 
## data:  df$D and df$X
## t = -0.72811, df = 9998, p-value = 0.4666
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02687784  0.01232022
## sample estimates:
##          cor 
## -0.007281603

Correlation between \(D\) and \(X\) after conditioning on \(Y\)

collider_df = df %>% filter(Y > 1)
cor.test(collider_df$D, collider_df$X)

## 
##  Pearson's product-moment correlation
## 
## data:  collider_df$D and collider_df$X
## t = -7.8246, df = 2087, p-value = 8.028e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2101837 -0.1268501
## sample estimates:
##        cor 
## -0.1688186

Population correlation between \(D\) and \(Y\)

cor.test(df$D, df$Y)

## 
##  Pearson's product-moment correlation
## 
## data:  df$D and df$Y
## t = 44.037, df = 9998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3865096 0.4193436
## sample estimates:
##       cor 
## 0.4030563

Correlation between \(D\) and \(Y\) after conditioning on \(Y\)

cor.test(collider_df$D, collider_df$Y)

## 
##  Pearson's product-moment correlation
## 
## data:  collider_df$D and collider_df$Y
## t = 11.272, df = 2087, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1987125 0.2795724
## sample estimates:
##       cor 
## 0.2395579

Visualize

collider_df = df %>% filter(Y > 1)
cor_df = data.frame(
    name=c(
        'cor(D, X)',
        'cor(D, X)',
        'cor(D, Y)',
        'cor(D, Y)'
    ),
    category=c(
        'Population',
        'Given Y > 1',
        'Population',
        'Given Y > 1'
    ),
    val=c(
        cor(df$D, df$X),
        cor(collider_df$D, collider_df$X),
        cor(df$D, df$Y),
        cor(collider_df$D, collider_df$Y)
    ),
    ucl=c(
        cor.test(df$D, df$X)$conf.int[1],
        cor.test(collider_df$D, collider_df$X)$conf.int[1],
        cor.test(df$D, df$Y)$conf.int[1],
        cor.test(collider_df$D, collider_df$Y)$conf.int[1]
    ),
    lcl=c(
        cor.test(df$D, df$X)$conf.int[2],
        cor.test(collider_df$D, collider_df$X)$conf.int[2],
        cor.test(df$D, df$Y)$conf.int[2],
        cor.test(collider_df$D, collider_df$Y)$conf.int[2]
    )
)

cor_df %>%
    ggplot(aes(x=name, y=val, color=category)) +
    geom_hline(yintercept=0, linetype='dashed', alpha=0.4) +
    geom_point(position=position_dodge(width=0.3)) +
    geom_errorbar(
        aes(ymin=lcl, ymax=ucl),
        position=position_dodge(width=0.3),
        width=0.3
    ) +
    ylim(-0.5, 0.5) +
    theme_minimal() +
    theme(
        panel.grid.minor=element_blank()
    )

Collider example

George Berry, geb97 at cornell.edu, @george_berry

3/15/2019

You can find the source code to this simulation here

Setup and generate simulation

Population correlation between \(D\) and \(X\)

Correlation between \(D\) and \(X\) after conditioning on \(Y\)

Population correlation between \(D\) and \(Y\)

Correlation between \(D\) and \(Y\) after conditioning on \(Y\)

Visualize