library(tidyverse)
library(rethinking)
library(dagitty)
library(ggdag)
Pipe
# G = gender
# T = time spent studying
# R = exam results
n <- 500
df <- tibble(
G = rbern( n , 0.5 ),
T = rnorm( n , ifelse(G==0, 2, -2), 2 ),
R = rnorm( n , T, 3 )
)
# naive plot
dens(df$R,
xlab="Exam results",
ylim = c(0, max(density(df$R)$y) * 1.5)
)
lines(density(df$R[df$G == 0]), col="blue")
lines(density(df$R[df$G == 1]), col="red")
legend("topright",legend = c("Male", "Female"), col = c("blue", "red"), lwd = 2)

# causal model
dagify(
R ~ T,
T ~ G,
outcome="R",
exposure="G",
latent="T"
) |>
tidy_dagitty() |>
node_status() |>
ggplot(aes_dag(color=status)) +
geom_dag(size=2) +
theme_dag_gray(legend.position="none",aspect.ratio=.66) +
geom_dag_node(size=40) +
geom_dag_label(aes(label = name), fill = "white", color="black", size=4) +
scale_y_reverse(lim=c(-.5,2.5)) + coord_flip() +
scale_color_manual(values=c("exposure" = "#F8766D", "outcome" = "#619CFF", "latent"="lightgray"))

# interpreted plot
plot(R~T, data=df, col=c(2,4)[G+1], lwd=2)
abline(lm(R~T, data=df), lwd=2)
abline(lm(R[G==0]~T[G==0], data=df), col=2, lwd=2)
abline(lm(R[G==1]~T[G==1], data=df), col=4, lwd=2)

Fork
# A = age at marriage
# H = happiness
# G = gender
n <- 500
df <- tibble(
G = rbern( n , 0.5 ),
A = rnorm( n , 2*G-1 ),
H = rnorm( n , 2*G-1 )
)
# naive plot
plot(H~A, data=df, col="#555", lwd=2)
abline(lm(H~A, data=df), lwd=3)

# causal model
dagify(
A ~ G,
H ~ G,
exposure="G",
outcome=c("H"),
latent="A"
) |>
tidy_dagitty() |>
node_status() |>
ggplot(aes_dag(color=status)) +
geom_dag(size=2) +
theme_dag_gray(legend.position="none",aspect.ratio=.66) +
geom_dag_node(size=40) +
geom_dag_label(aes(label = name), fill = "white", color="black", size=4) +
coord_cartesian(xlim = c(-1, 1), ylim = c(-0.5, 1.5)) +
scale_color_manual(values=c("exposure" = "#F8766D", "outcome" = "#619CFF", "latent"="lightgray"))

# interpreted plot
plot(H~A, data=df, col=c(2,4)[G+1], lwd=2)
abline(lm(H~A, data=df), lwd=3)
abline(lm(H[G==0]~A[G==0], data=df), col=2, lwd=3)
abline(lm(H[G==1]~A[G==1], data=df), col=4, lwd=3)

Collider
# F = family wealth
# H = hardwork
# J = prestigeous job
n <- 500
df <- tibble(
F = rnorm( n ),
H = rnorm( n ),
R = rnorm( n, 0, 0.3 ),
J = ifelse(F+H+R > 0.5, TRUE, FALSE)
)
# naive plot
plot(H~F, data=df, lwd=2, col=ifelse(J, "royalblue", "transparent"))
abline(lm(H[J==TRUE]~F[J==TRUE], data=df), lwd=3, col="royalblue")

# causal model
dagify(
J ~ F,
J ~ H,
exposure=c("F", "H"),
latent=c("J")
) |>
tidy_dagitty(layout = 'eigen') |>
node_status() |>
ggplot(aes_dag(color=status)) +
geom_dag(size=2) +
theme_dag_gray(legend.position="none",aspect.ratio=.66) +
geom_dag_node(size=40) +
geom_dag_label(aes(label = name), fill = "white", color="black", size=4) +
coord_cartesian(xlim = c(-1, 1), ylim = c(-1, 1)) +
scale_color_manual(values=c("exposure" = "#F8766D", "outcome" = "#619CFF", "latent"="lightgray"))
## Warning in layout_with_eigen(graph, type = type, ev = eigenvector): g is
## directed. undirected version is used for the layout.

# interpreted plot
plot(H~F, data=df, lwd=2, col=ifelse(J, "royalblue", "darkgray"))
abline(lm(H[J]~F[J], data=df), lwd=3, col="royalblue")
abline(lm(H[!J]~F[!J], data=df), lwd=3, col="darkgray")
abline(lm(H~F, data=df), lwd=3, col="black")

Bonus: Operationalizing variables
library(dagitty)
library(ggdag)
library(haven)
S01 <- read_dta("nlss2022/data/stata format/S01.dta")
ggplot(S01, aes(x=q01_03)) +
geom_histogram(binwidth=1, color="darkgray", fill="royalblue")

set.seed(10)
dagify(
AgeEstimate ~ Age,
Health ~ Age,
Health ~ AgeEstimate,
exposure=c("Age"),
outcome=c("Health"),
latent=c("AgeEstimate")
) |>
tidy_dagitty() |>
mutate(linetype = ifelse(name == "AgeEstimate" & to == "Health", "dashed", "solid")) |>
node_status() |>
ggplot(aes_dag(color=status, edge_linetype = linetype)) +
geom_dag(size=2.2) +
theme_dag_gray(legend.position="none",aspect.ratio=.66) +
geom_dag_node(size=40) +
geom_dag_label(aes(label = name), fill = "white", color="black", size=4) +
scale_x_continuous(expand = expansion(mult = 0.5)) +
scale_y_continuous(expand = expansion(mult = 0.5)) +
scale_color_manual(values=c("exposure" = "#F8766D", "outcome" = "#619CFF", "latent"="lightgray"))

set.seed(NULL)