library(cfid)plot_graphviz <- function(g, main){
el <- get_edgelist(g)
nodes_ <- unique(unlist(el))
net <- bnlearn::empty.graph(nodes=nodes_)
bnlearn::arcs(net) <- el
bnlearn::graphviz.plot(net, main = main)
}CFID uses this IDC example:
g1 <- dag("X -> W -> Y <- Z <- D X <-> Y")
v1 <- cf("Y", 0, c(X = 0))
v2 <- cf("X", 1)
v3 <- cf("Z", 0, c(D = 0))
v4 <- cf("D", 0)
c1 <- conj(v1)
c2 <- conj(v2, v3, v4)
# Identifiable conditional conjunction
print(identifiable(g1, c1, c2))
#> $id
#> [1] TRUE
#>
#> $prob
#> \frac{\sum_{w,z,d} p_{x}(w)p_{w,z}(y,x')p_{d}(z)p(d)}{p(x')}
#>
#> $undefined
#> [1] FALSEThis is demonstrating the ability for the algorithm to resolve * Estimating the probability of \(Y_{X=0}=0\) * Conditioning on \(X=1\), \(Z_{D=0}=0\), and \(D=0\) * Utilizing the front door via W.
Notice we don’t condition on \(Y=y\). If we do, there is no identifiability. Indeed, the algorithm seems to get stuck in a recursion.
v5 <- cf("Y", 0, c(D = 0))
c5 <- conj(v2, v3, v4, v5)
tryCatch(
identifiable(g1, c1, c5),
error = function(e) print(e)
)
#> <nodeStackOverflowError: node stack overflow>
v6 <- cf("Y", 1)
c6 <- conj(v2, v3, v4, v6)
tryCatch(
identifiable(g1, c1, c6),
error = function(e) print(e)
)
#> <nodeStackOverflowError: node stack overflow>Let’s identify \(P(Y_{X=0}=0|X=1)\). This query is related to ETT.
g <- cfid::dag("Z -> X -> Y Z -> Y")
actual_x <- cf("X", 1)
counterfactual <- cf("Y", 0, int=c("X"=0))
joint <- conj(actual_x, counterfactual)
plot_graphviz(g, main="DAG")
#> Loading required namespace: Rgraphvizplot_graphviz(cfid:::pwg(g, joint), main="Parallel World Graph")plot_graphviz(cfid:::make_cg(g, joint), main="Counterfactual Graph")identifiable(g, conj(counterfactual), conj(actual_x))
#> $id
#> [1] TRUE
#>
#> $prob
#> \frac{\sum_{z} p(z)p_{x,z}(y)p_{z}(x')}{\sum_{z} p(z)p_{z}(x')}
#>
#> $undefined
#> [1] FALSENow let’s condition additionally on the outcome Y=1: \(P(Y_{X=0}=0|X=1, Y=1)\). This is probability of necessity.
actual_x <- cf("X", 1)
actual_y <- cf("Y", 1)
joint2 <- conj(actual_x, actual_y, counterfactual)
plot_graphviz(g, main="DAG")plot_graphviz(cfid:::pwg(g, joint2), main="Parallel World Graph")plot_graphviz(cfid:::make_cg(g, joint2), main="Counterfactual Graph")tryCatch(
identifiable(g, conj(counterfactual), conj(actual_x, actual_y)),
error = function(e) print(e)
)
#> <nodeStackOverflowError: node stack overflow>Note the difference between the counterfactual graph in this case and the previous case. This one preserves the exogenous variable for Y, and this makes sense. Yet the conditional identification algorithm seems to be unable to cope with conditioning \(Y_x\) on \(Y\).
I suspect the following is true:
Identification of counterfactuals that condition on variants of themselves is impossible without parametric assumptions.
Would be good to confirm.