Population correlation between \(D\) and \(X\)
cor.test(df$D, df$X)
##
## Pearson's product-moment correlation
##
## data: df$D and df$X
## t = -0.72811, df = 9998, p-value = 0.4666
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.02687784 0.01232022
## sample estimates:
## cor
## -0.007281603
Correlation between \(D\) and \(X\) after conditioning on \(Y\)
collider_df = df %>% filter(Y > 1)
cor.test(collider_df$D, collider_df$X)
##
## Pearson's product-moment correlation
##
## data: collider_df$D and collider_df$X
## t = -7.8246, df = 2087, p-value = 8.028e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2101837 -0.1268501
## sample estimates:
## cor
## -0.1688186
Population correlation between \(D\) and \(Y\)
cor.test(df$D, df$Y)
##
## Pearson's product-moment correlation
##
## data: df$D and df$Y
## t = 44.037, df = 9998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3865096 0.4193436
## sample estimates:
## cor
## 0.4030563
Correlation between \(D\) and \(Y\) after conditioning on \(Y\)
cor.test(collider_df$D, collider_df$Y)
##
## Pearson's product-moment correlation
##
## data: collider_df$D and collider_df$Y
## t = 11.272, df = 2087, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1987125 0.2795724
## sample estimates:
## cor
## 0.2395579
Visualize
collider_df = df %>% filter(Y > 1)
cor_df = data.frame(
name=c(
'cor(D, X)',
'cor(D, X)',
'cor(D, Y)',
'cor(D, Y)'
),
category=c(
'Population',
'Given Y > 1',
'Population',
'Given Y > 1'
),
val=c(
cor(df$D, df$X),
cor(collider_df$D, collider_df$X),
cor(df$D, df$Y),
cor(collider_df$D, collider_df$Y)
),
ucl=c(
cor.test(df$D, df$X)$conf.int[1],
cor.test(collider_df$D, collider_df$X)$conf.int[1],
cor.test(df$D, df$Y)$conf.int[1],
cor.test(collider_df$D, collider_df$Y)$conf.int[1]
),
lcl=c(
cor.test(df$D, df$X)$conf.int[2],
cor.test(collider_df$D, collider_df$X)$conf.int[2],
cor.test(df$D, df$Y)$conf.int[2],
cor.test(collider_df$D, collider_df$Y)$conf.int[2]
)
)
cor_df %>%
ggplot(aes(x=name, y=val, color=category)) +
geom_hline(yintercept=0, linetype='dashed', alpha=0.4) +
geom_point(position=position_dodge(width=0.3)) +
geom_errorbar(
aes(ymin=lcl, ymax=ucl),
position=position_dodge(width=0.3),
width=0.3
) +
ylim(-0.5, 0.5) +
theme_minimal() +
theme(
panel.grid.minor=element_blank()
)
