1.Problem Set 1

Your colleague either commutes by train or by the bus. 20 days of the month,
she takes the train and the remaining 10 days she takes the bus. If she takes the
train, she reaches work on time with a probability of 0.9. If she takes the bus, she
frequently gets stuck in traffic and reaches work on time with a probability of 0.5.
Given that she was on time today, what is the probability that she took the bus to
work today?

1.Solution Set 1

# let B be the event of taking the bus. Hence the probability p.B is
p_B<- 10/30

# let T be the event of taking the bus. Hence the probability p.T is:
p_T<- 20/30

# Also, when my colleague takes the bus, the probability of her reaches work on time is: 
p_B_ontime <- .5

# Also, when my colleague takes the train, the probability of her reaches work on time is: 
p_T_ontime<- .9


# Therefore,  given that she was on time today, the probability that she took the bus to
# work today is:

p_B_given_ontime <- p_B * p_B_ontime  / ( p_B * p_B_ontime + p_T * p_T_ontime)
p_B_given_ontime 
## [1] 0.2173913

2.Problem Set 2

In the Grade Network that we looked at in the notes, what happens to the probability of
Difficulty of Course when you present the evidence that the received recommendation letter
was good? In addition, now present the evidence that both SAT scores were good and
the letter of recommendation was good, What is the probability of the Difficulty of Course
now? You should use the gRain package in R to build your network and perform these
calculations.
You may need to install RBGL package from BioConductor in R to get gRain
instructions on RBGL.
Please submit your assignment as an R markdown document.

2.Solution Set 2:

## CPTspec with probabilities:
##  P( Difficulty )
##  P( Intelligence )
##  P( Grade | Difficulty Intelligence )
##  P( SAT | Intelligence )
##  P( Letter | Grade )
## Independence network: Compiled: FALSE Propagated: FALSE 
##   Nodes: chr [1:5] "Difficulty" "Intelligence" "Grade" "SAT" ...

# Presenting the evidence that the received recommendation letter was good
bayse.net2 <- setEvidence(bayse.net1, nodes="Letter", state="High")

# Observing the probability of Difficulty of the course went from .7 to .62685 
querygrain(bayse.net2, nodes="Difficulty", type="marginal")
## $Difficulty
## Difficulty
##       Yes        No 
## 0.6268561 0.3731439
# In addition, now present the evidence that both SAT scores were good and
# the letter of recommendation was good,

bayse.net3 <- setEvidence(bayse.net2, nodes="SAT", state="High")

# What is the probability of the Difficulty of Course now?
querygrain(bayse.net3, nodes="Difficulty", type="marginal")
## $Difficulty
## Difficulty
##       Yes        No 
## 0.6676522 0.3323478

Using gRain package in R in the chest clinic example…

# Specify conditional probability tables
 yn <- c("yes","no")
 a <- cptable(~asia, values=c(1,99),levels=yn)
 t.a <- cptable(~tub|asia, values=c(5,95,1,99),levels=yn)
 s <- cptable(~smoke, values=c(5,5), levels=yn)
 l.s <- cptable(~lung|smoke, values=c(1,9,1,99), levels=yn)
 b.s <- cptable(~bronc|smoke, values=c(6,4,3,7), levels=yn)
 e.lt <- cptable(~either|lung:tub,values=c(1,0,1,0,1,0,0,1),levels=yn)
 x.e <- cptable(~xray|either, values=c(98,2,5,95), levels=yn)
 d.be <- cptable(~dysp|bronc:either, values=c(9,1,7,3,8,2,1,9), levels=yn)

# Compile list of conditional probability tables and create the network:
 plist <- compileCPT(list(a, t.a, s, l.s, b.s, e.lt, x.e, d.be))
 plist
## CPTspec with probabilities:
##  P( asia )
##  P( tub | asia )
##  P( smoke )
##  P( lung | smoke )
##  P( bronc | smoke )
##  P( either | lung tub )
##  P( xray | either )
##  P( dysp | bronc either )
 net1 <- grain(plist)
 net1
## Independence network: Compiled: FALSE Propagated: FALSE 
##   Nodes: chr [1:8] "asia" "tub" "smoke" "lung" "bronc" "either" ...
#The network can be queried to give marginal probabilities:
 querygrain(net1, nodes=c("lung","bronc"), type="marginal")
## $lung
## lung
##   yes    no 
## 0.055 0.945 
## 
## $bronc
## bronc
##  yes   no 
## 0.45 0.55
# The network can be queried to give joint distribution:
  querygrain(net1,nodes=c("lung","bronc"), type="joint")
##      bronc
## lung     yes     no
##   yes 0.0315 0.0235
##   no  0.4185 0.5265
# Evidence is entered for asia and dysp
  net12 <- setEvidence(net1, evidence=list(asia="yes", dysp="yes"))

 # The network can be queried again:
querygrain( net12, nodes=c("lung","bronc"), type="marginal" )
## $lung
## lung
##        yes         no 
## 0.09952515 0.90047485 
## 
## $bronc
## bronc
##       yes        no 
## 0.8114021 0.1885979
querygrain( net12, nodes=c("lung","bronc"), type="joint" )
##      bronc
## lung         yes         no
##   yes 0.06298076 0.03654439
##   no  0.74842132 0.15205354
# Specifying the hard evidence that the person has recently been to Asia
setFinding( net1, nodes="asia", states="yes")
## Independence network: Compiled: TRUE Propagated: TRUE 
##   Nodes: chr [1:8] "asia" "tub" "smoke" "lung" "bronc" "either" ...
##   Evidence:
##   nodes is.hard.evidence hard.state
## 1  asia             TRUE        yes
##   pEvidence: 0.010000
# Now, let's test the virtual evidence... 
# in other words, let's assume that the person does not know or does not remember being in Asia 
# too sick to remember

# We can then introduce a new variable guess.asia with asia as its only parent.
g.a <- parray(c("guess.asia", "asia"), levels=list(yn, yn), values=c(.8,.2, .1,.9))
plist2<- compileCPT(list(a, t.a, s, l.s, b.s, e.lt, x.e, d.be, g.a))



# Also, if the person is too ill to confirm if he/she smokes   
# We can then introduce a new variable guess.smoke with smoke as its only parent.
g.s <- parray(c("guess.smoke", "smoke"), levels=list(yn, yn), values=c(.8,.2, .1,.9))
plist3<- compileCPT(list(a, t.a, s, l.s, b.s, e.lt, x.e, d.be, g.a, g.s))
plist3
## CPTspec with probabilities:
##  P( asia )
##  P( tub | asia )
##  P( smoke )
##  P( lung | smoke )
##  P( bronc | smoke )
##  P( either | lung tub )
##  P( xray | either )
##  P( dysp | bronc either )
##  P( guess.asia | asia )
##  P( guess.smoke | smoke )

From the results below, please note that the probability of the lung cancer hasn’t

changed using the virtual evidence of the person being in Asia or not. However, it has changed #### based the possibility that the person is smoker.

Similarly, the probability of having tuberculosis hasn’t changed because the person is smoker. #### However, it has changed based on the person being in Asia or not.

# being in Asia or not
plist2$tub
##      asia
## tub    yes   no
##   yes 0.05 0.01
##   no  0.95 0.99
plist2$lung
##      smoke
## lung  yes   no
##   yes 0.1 0.01
##   no  0.9 0.99
# Being smoker or not

plist3$tub
##      asia
## tub    yes   no
##   yes 0.05 0.01
##   no  0.95 0.99
plist3$lung
##      smoke
## lung  yes   no
##   yes 0.1 0.01
##   no  0.9 0.99

Plotting the network with the virtual evidence:

if the person does not remember if he/she was in Asia

# Plotting the network with the virtual evidence:
# if the person does not remember if he/she was  in Asia
 net1.g.a <- grain(plist2)
 plot(net1.g.a)

Plotting the network with the virtual evidence:

if the person does not remember if he/she was either in Asia or smokes

 # Plotting the network with the virtual evidence:
 # if the person does not remember if he/she was either in Asia or smokes
net1.g.s <- grain(plist3)
 plot(net1.g.s)