#install.packages("bnlearn", dependencies=TRUE)
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager", dependencies=TRUE)
#BiocManager::install(c("graph", "Rgraphviz", "RBGL"), dependencies=TRUE)
#install.packages("gRain", dependencies=TRUE)
# "bnlearn" and "gRain" are the two main R packages used for the five examples.
library(bnlearn)
library(gRain)
library(arules)
library(dplyr)
# Select an example
#---------------------------Example 1: Sex as a Cause of Pregnancy---------------------------------------#
# The Bayes Network model structure
# P(Sex,Pregnancy)=P(Sex)P(Pregnancy|Sex)
Ex1.DAG <- model2network('[Sex][Pregnancy|Sex]')
graphviz.plot(Ex1.DAG)
# Complete conditional probability table for the nodes of Sex and Pregnancy
# specify different states of a node
sex <- c("Male","Female")
pregnancy<-c("Yes","No")
# assign probability values to different states
# the Sex node
# parent node
S <- array(dimnames = list(Sex = sex), dim = 2, c(100/200,100/200))
# the Pregnancy node
Pregnancy.value<-data.frame(Sex=c("Male", "Female"), Yes=c(0,0.1), No=c(1,0.9))
Pregnancy.value<-t(data.matrix(Pregnancy.value))
# child node <-- parent node
PR <- array(dimnames = list(Pregnancy=pregnancy, Sex = sex
), dim = c(2,2), c(0,100/100,
10/100,90/100))
# Check the created probability tables
S # the Sex node
PR # the Pregnancy node
# feed the probability tables to the causal structure
cpts <- list(Sex = S, Pregnancy = PR)
Ex1.fit = custom.fit(Ex1.DAG, cpts)
graphviz.chart(Ex1.fit,type = "barprob", scale = c(1.25, 2), bar.col = "green", strip.bg = "lightgray")
# Model compiling
junction<-compile(as.grain(Ex1.fit))
# Node intervention and manipulation Intervene on the nodes
# 1. fix the Sex node at the state "Male"
jSex.Male<-setEvidence(junction, nodes="Sex", states="Male")
querygrain(jSex.Male, nodes="Pregnancy")$Pregnancy
bn.fit.barchart(as.bn.fit(jSex.Male,including.evidence = TRUE)$Pregnancy, main="Pregnancy", xlab="Posterintervention Distribution")
# 2. fix the Sex node at the state "Female"
jSex.Female<-setEvidence(junction, nodes="Sex", states="Female")
querygrain(jSex.Female, nodes="Pregnancy")$Pregnancy
bn.fit.barchart(as.bn.fit(jSex.Female,including.evidence = TRUE)$Pregnancy, main="Pregnancy", xlab="Posterintervention Distribution")
#----------------------------------------------------------------------------------------------------------#
#---------------------------Example 2: The Color of Children's Shoes---------------------------------------#
# The Bayes Network model structure
# P(Sex,Color)=P(Sex)P(Color|Sex)
Ex2.DAG <- model2network('[Sex][Color|Sex]')
graphviz.plot(Ex2.DAG)
# Complete conditional probability table for the nodes of Sex and Color
# specify different states of a node
sex <- c("Boy","Girl")
color<-c("Black","Blue", "Red" )
# assign probability values to different states
# the Sex node
# parent node
S <- array(dimnames = list(Sex = sex), dim = 2, c(50/100,50/100))
# the Color node
# child node <-- parent node
C <- array(dimnames = list(Color=color, Sex = sex
), dim = c(3,2), c(25/50, 15/50, 10/50,
5/50, 10/50, 35/50))
# Check the created probability tables
S # the Sex node
C # the Color node
# feed the probability tables to the causal structure
cpts <- list(Sex = S, Color = C)
Ex2.fit = custom.fit(Ex2.DAG, cpts)
graphviz.chart(Ex2.fit, type = "barprob", scale = c(1.25, 2), bar.col = "green", strip.bg = "lightgray")
Ex2.fit$Sex
Ex2.fit$Color
# Model compiling
junction<-compile(as.grain(Ex2.fit))
# Node intervention and manipulation Intervene on the nodes
# 1. fix the Sex node at the state "Male"
jSex.Boy<-setEvidence(junction, nodes="Sex", states="Boy")
querygrain(jSex.Boy, nodes="Color")$Color
bn.fit.barchart(as.bn.fit(jSex.Boy,including.evidence = TRUE)$Color, main="Color", xlab="Posterintervention Distribution")
# 2. fix the Sex node at the state "Female"
jSex.Girl<-setEvidence(junction, nodes="Sex", states="Girl")
querygrain(jSex.Girl, nodes="Color")$Color
bn.fit.barchart(as.bn.fit(jSex.Girl,including.evidence = TRUE)$Color, main="Color", xlab="Posterintervention Distribution")
#-------------------------------------------------------------------------------------------------------------#
#---------------------------Example 3: Efficacy of Medication Treatment---------------------------------------#
# The Bayes Network model structure
# P(Treatment, Sex, Recovery)=P(Recovery|Sex,Treatment)P(Treatment)P(Sex)
Ex3.DAG <- model2network('[Sex][Treatment][Recovery|Sex:Treatment]')
graphviz.plot(Ex3.DAG)
# Complete conditional probability table for the nodes of Recovery, Treatment and Sex
# specify different states of a node
recovery<- c("Yes","No")
treatment<-c("Medication","Placebo")
sex<-c("Male","Female")
# assign probability values to different states
# the Sex node
# parent node
S <- array(dimnames = list(Sex = sex # Male=18+12+7+3=40; Female=2+8+9+21=40
), dim = 2, c(40/80,40/80))
# the Treatment node
# parent node
T <- array(dimnames = list(Treatment=treatment
# Medication =18+12+2+8=40
# Placebo =7+3+9+21=40
), dim = 2, c(40/80,40/80))
# the Recovery node
# child node <-- parent node, parent node
R <- array(dimnames = list(Recovery=recovery, Treatment=treatment, Sex = sex
# Yes Medication Male=18
# No Medication Male=12
# Yes Placebo Male=7
# No Placebo Male=3
# Yes Medication Female=2
# No Medication Female=8
# Yes Placebo Female=9
# No Placebo Female=21
), dim = c(2,2,2), c(18/30, 12/30,
7/10, 3/10,
2/10, 8/10,
9/30, 21/30))
# Check the created probability tables
S # the Sex node
T # the Treatment node
R # the Recovery node
# feed the probability tables to the causal structure
cpts <- list(Recovery = R, Treatment = T, Sex=S)
Ex3.fit = custom.fit(Ex3.DAG, cpts)
graphviz.chart(Ex3.fit,type = "barprob", scale = c(1.25, 2), bar.col = "green", strip.bg = "lightgray")
Ex3.fit$Recovery
# Model compiling
junction<-compile(as.grain(Ex3.fit))
# Node intervention and manipulation Intervene on the nodes
# 1. fix the Treatment node at the state "Medication"
jtreatment.Medication<-setEvidence(junction, nodes="Treatment", states="Medication")
querygrain(jtreatment.Medication, nodes="Recovery")$Recovery
bn.fit.barchart(as.bn.fit(jtreatment.Medication,including.evidence = TRUE)$Recovery, main="Recovery", xlab="Posterintervention Distribution")
# 2. fix the Treatment node at the state "Placebo"
jtreatment.Placebo<-setEvidence(junction, nodes="Treatment", states="Placebo")
querygrain(jtreatment.Placebo, nodes="Recovery")$Recovery
bn.fit.barchart(as.bn.fit(jtreatment.Placebo,including.evidence = TRUE)$Recovery, main="Recovery", xlab="Posterintervention Distribution")
#---------------------------------------------------------------------------------------------#
#---------------------------Example 4: Should I switch?---------------------------------------#
# The Bayes Network model structure
# P(Host, Jen, Car)=P(Jen)P(Car)P(Jen)P(Host|Jen,Car)
Ex4.DAG <- model2network('[Jen][Car][Host|Jen:Car]')
graphviz.plot(Ex4.DAG)
# Complete conditional probability table for the nodes of Sex and Color
# specify different states of a node
jen <- c("Door1","Door2", "Door3")
car<-c("Door1","Door2", "Door3")
host<-c("Door1","Door2", "Door3")
# assign probability values to different states
# the Jen node
# parent node
J <- array(dimnames = list(Jen = jen), dim = 3, c(1/3,1/3,1/3))
# the Car node
# parent node
C <- array(dimnames = list(Car=car), dim = 3, c(1/3,1/3,1/3))
# the Host node
# child node <-- parent node, parent node
H <- array(dimnames = list(Host=host, Jen=jen, Car=car
# Door1 Door1: Door1 0
# Door2 Door1: Door1 1/2
# Door3 Door1: Door1 1/2
# Door1 Door2: Door1 0
# Door2 Door2: Door1 0
# Door3 Door2: Door1 1
# Door1 Door3: Door1 0
# Door2 Door3: Door1 1
# Door3 Door3: Door1 0
# Door1 Door1: Door2 0
# Door2 Door1: Door2 0
# Door3 Door1: Door2 1
# Door1 Door2: Door2 1/2
# Door2 Door2: Door2 0
# Door3 Door2: Door2 1/2
# Door1 Door3: Door2 1
# Door2 Door3: Door2 0
# Door3 Door3: Door2 0
# Door1 Door1: Door3 0
# Door2 Door1: Door3 1
# Door3 Door1: Door3 0
# Door1 Door2: Door3 1
# Door2 Door2: Door3 0
# Door2 Door2: Door3 0
# Door1 Door3: Door3 1/2
# Door2 Door3: Door3 1/2
# Door3 Door3: Door3 0
), dim = c(3,3,3), c(0, 1/2, 1/2, 0, 0, 1, 0, 1, 0,
0, 0, 1, 1/2, 0, 1/2, 1, 0, 0,
0, 1, 0, 1, 0, 0, 1/2, 1/2, 0))
H
# Check the created probability tables
J # the Jen node
C # the Car node
H # the Host node
# feed the probability tables to the causal structure
cpts <- list(Jen = J, Car = C, Host=H)
Ex4.fit = custom.fit(Ex4.DAG, cpts)
graphviz.chart(Ex4.fit, type = "barprob", scale = c(1.25, 2), bar.col = "green", strip.bg = "lightgray")
Ex4.fit$Host
# Model compiling
junction<-compile(as.grain(Ex4.fit))
# Node intervention and manipulation Intervene on the nodes
# 1. fix the Jen and Car nodes at the states "Door1" and "Door1" respectively
jJenCar.Door1.Door1<-setEvidence(junction, nodes=c("Jen", "Car"), states=c("Door1", "Door1"))
querygrain(jJenCar.Door1.Door1, nodes="Host")$Host
bn.fit.barchart(as.bn.fit(jJenCar.Door1.Door1,including.evidence = TRUE)$Host, main="Host", xlab="Posterintervention Distribution")
# 2. fix the Jen and Car nodes at the states "Door1" and "Door2" respectively
jJenCar.Door1.Door2<-setEvidence(junction, nodes=c("Jen", "Car"), states=c("Door1", "Door2"))
querygrain(jJenCar.Door1.Door2, nodes="Host")$Host
bn.fit.barchart(as.bn.fit(jJenCar.Door1.Door2,including.evidence = TRUE)$Host, main="Host", xlab="Posterintervention Distribution")
#-----------------------------------------------------------------------------------------------------------#
#---------------------------Example 5: : Effect of Smoking on Forced Expiratory Volume---------------------------------------#
# Download the example data file from JSDSE website
FEV.data<-read.table("http://jse.amstat.org/datasets/fev.dat.txt", col.names = c("Age", "FEV", "Height", "Sex", "Smoke"))
# Discretize Age, FEV, and Height.
FEV.data.discrete<-data.frame(Age=arules::discretize(FEV.data$Age, method = "fixed", breaks = c(3, 5, 12, 15, 19), include.lowest=TRUE, right=FALSE, labels = c("3 to 5",
"5 to 12",
"12 to 15",
"15 to 19")),
FEV=arules::discretize(FEV.data$FEV, method = "fixed", breaks = c(0, 1, 2, 3, 4, 5, 6), include.lowest=TRUE, right=FALSE, labels = c("0 to 1",
"1 to 2",
"2 to 3",
"3 to 4",
"4 to 5",
"5 to 6")),
Height=arules::discretize(FEV.data$Height, method = "fixed", breaks = c(45, 50, 55, 60, 65, 70, 75), include.lowest=TRUE, right=FALSE, labels = c("45 to 50",
"50 to 55",
"55 to 60",
"60 to 65",
"65 to 70",
"70 to 75")),
Sex=as.factor(FEV.data$Sex),
Smoke=as.factor(FEV.data$Smoke))
# convert all data to be of factor format.
FEV.data.discrete<- FEV.data.discrete %>% mutate_if((~is.factor(.)==FALSE), as.factor)
# The Bayes Network model structure
# P(FEV,Smoke,Age,Sex) = P(FEV|Smoke,Age,Sex)P(Height|Smoke,Age,Sex)P(Smoke|Age, Sex)P(Age)P(Sex)
Ex5.DAG <- model2network("[Sex][Age][Smoke|Sex:Age][Height|Sex:Age:Smoke][FEV|Sex:Age:Smoke:Height]")
graphviz.plot(Ex5.DAG)
# feed the probability tables to the causal structure
Ex5.fit<-bn.fit(Ex5.DAG, FEV.data.discrete, method = 'bayes', iss=290)
#---------------------------------------------------------------------------------------------------#
# When building a BN model from an empirical data file, Netica follows a specific way to avoid the
# zero-probability problem. Suppose a data file contains three records for the variable X
# with possible values x1 and x2, as below
# X
# x1
# x1
# x1
#
# The frequency table for X based on this data file is
# x1 x2
# X 3 0
#
# To avoid the zero probability for X=x2, Netica adds one to both frequencies of x1 and x2,
# x1 x2
# X 4 1
#
# Then the proability table is calculated to be
# x1 x2
# X 0.8 0.2
#
# In the bn.fit() function, setting method = 'bayes' and iss(imaginary sample size) = 290 approximates
# the Netica's way of aoviding the zero-probability problem for this example.
#---------------------------------------------------------------------------------------------------#
graphviz.chart(Ex5.fit,type = "barprob", scale = c(1.25, 2), bar.col = "green", strip.bg = "lightgray")
# Model compiling
junction<-compile(as.grain(Ex5.fit))
# Node intervention and manipulation Intervene on the nodes
# 1. fix the Smoke node at the state "0"
jSmoke.0<-setEvidence(junction, nodes=c("Smoke"), states=c("0"))
querygrain(jSmoke.0, nodes="FEV")$FEV
# 2. fix the Smoke node at the state "1"
jSmoke.1<-setEvidence(junction, nodes=c("Smoke"), states=c("1"))
querygrain(jSmoke.1, nodes="FEV")$FEV
# 3. fix the Smoke, Age, and Sex nodes at the states "0", "15 to 19", and "1" respectively
text<-"Smoke, Age, Sex"
text2<-strsplit(text, ',')[[1]]
state<-"0, 5 to 12, 1"
state2<-strsplit(state, ',')[[1]]
jSmoke0.AgeSex15191<-setEvidence(junction, nodes=text2, states=state2)
querygrain(jSmoke0.AgeSex15191, nodes="FEV")$FEV
bn.fit.barchart(as.bn.fit(jSmoke0.AgeSex15191,including.evidence = TRUE)$FEV, main="FEV", xlab="Posterintervention Distribution")
# 4. fix the Smoke, Age, and Sex nodes at the states "1", "15 to 19", and "1" respectively
jSmoke1.AgeSex15191<-setEvidence(junction, nodes=c("Smoke", "Age", "Sex"), states=c("1", "5 to 12", "1"))
querygrain(jSmoke1.AgeSex15191, nodes="FEV")$FEV
bn.fit.barchart(as.bn.fit(jSmoke1.AgeSex15191,including.evidence = TRUE)$FEV, main="FEV", xlab="Posterintervention Distribution")