Let’s rebuild the survey model.
library(bnlearn)
## Warning: package 'bnlearn' was built under R version 3.5.2
##
## Attaching package: 'bnlearn'
## The following object is masked from 'package:stats':
##
## sigma
dag <- empty.graph(nodes = c("A","S","E","O","R","T"))
arc.set <- matrix(c("A", "E",
"S", "E",
"E", "O",
"E", "R",
"O", "T",
"R", "T"),
byrow = TRUE, ncol = 2,
dimnames = list(NULL, c("from", "to")))
arcs(dag) <- arc.set
A.lv <- c("young", "adult", "old")
S.lv <- c("M", "F")
E.lv <- c("high", "uni")
O.lv <- c("emp", "self")
R.lv <- c("small", "big")
T.lv <- c("car", "train", "other")
A.prob <- array(c(0.3,0.5,0.2), dim = 3, dimnames = list(A = A.lv))
S.prob <- array(c(0.6,0.4), dim = 2, dimnames = list(S = S.lv))
E.prob <- array(c(0.75,0.25,0.72,0.28,0.88,0.12,0.64,0.36,0.70,0.30,0.90,0.10), dim = c(2,3,2), dimnames = list(E = E.lv, A = A.lv, S = S.lv))
O.prob <- array(c(0.96,0.04,0.92,0.08), dim = c(2,2), dimnames = list(O = O.lv, E = E.lv))
R.prob <- array(c(0.25,0.75,0.2,0.8), dim = c(2,2), dimnames = list(R = R.lv, E = E.lv))
T.prob <- array(c(0.48,0.42,0.10,0.56,0.36,0.08,0.58,0.24,0.18,0.70,0.21,0.09), dim = c(3,2,2), dimnames = list(T = T.lv, O = O.lv, R = R.lv))
cpt <- list(A = A.prob, S = S.prob, E = E.prob, O = O.prob, R = R.prob, T = T.prob)
bn <- custom.fit(dag, cpt)
The function mutilated in bnlearn simulates an intervention by fixing the target node to a certain value, and removing incoming edges to that node (i.e. mutilating the graph).
Here we simulated the effect of setting “R” (city size) to “big”
big_city <- mutilated(bn, list(R = "big"))
graphviz.plot(big_city)
## Loading required namespace: Rgraphviz
We see that the intervention concentrates all the probability in the CPT for R is concentrated on “big”.
big_city$R
##
## Parameters of node R (multinomial distribution)
##
## Conditional probability table:
## small big
## 0 1
Suppose we want to know the causal effect of city size on car use. We can estimate this simply by simulating from the case where R == “big” and R == “small”, and getting a Monte Carlo estimate of the probability of car use.
small_city <- mutilated(bn, list(R = "small"))
small_city_car_use <- mean(rbn(small_city, 50000)['T'] == 'car')
big_city_car_use <- mean(rbn(big_city, 50000)['T'] == 'car')
big_city_car_use - small_city_car_use
## [1] 0.10226