Part I.

Bayes Theorem is an extension of the Law of Conditional Probability. It is used to calculate a conditional P of a condition with evidence present. The prior probability (before the evidence) (usually P(A)), is multiplied by a fraction representing the “strength of the evidence” (P(B|A)/P(B). In other words, it is the probability of evidence being present, given the hypothesis is true (“likelihood”) divided by the marginal P of the evidence (Probability of the evidence being present regardless of the hypothesis being true or not).

Posterior = likelihood * prior / evidence

An popular example of Bayes Theorem:

You have three boxes, A, B, and C.

Box A has 2 red balls and 3 black, B has 3 red and 1 black, and C has 1 red and 4 black. Given you pick a red ball, what is the probability that box was box A?

\(P(A \mid red ball) = \frac{P(red ball \mid A) * P(A)}{P(red ball)}\)

\(P(redball \mid A) = 2/5\)

\(P(A) = 1/3\)

\(P(redball) = P(A) * P(red \mid A) + P(B) * P(red \mid B) + P(C) * P(red \mid C)\)

\(P(redball) = (1/3) * (2/5) + (1/3) * (3/4) + (1/3) * (1/5) = .45\)

\(P(redball \mid A) = \frac{(2/5) * (1/3)}{.45} = .296\)

The Formula for Bayes Theorem is …

\(P(A \mid B) = \frac{P(B \mid A) * P(A)}{P(B)}\)

Part II.

rm(list=ls())
cat("\f")
graphics.off()
gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 521376 27.9    1159452   62   660385 35.3
## Vcells 944206  7.3    8388608   64  1769489 13.6
#Probabilities that are given

A <- .35 #Prob. of Academic Event 
S <- .20 #Prob. of Sporting Event
N <- .45 #Prob. of No Event
#Define the conditionals, B is going to represent Full Garage, B1 is going to represent Not Full Garage

#P(B|A)
B_G_A <- .25 #Prob. of Full given Academic Event

#P(B|S)
B_G_S <- .70 #Prob. of Full given Sporting Event

#P(B|N)
B_G_N <- .05 #Prob. of Full given No Event



#Define the inverse of the conditionals... Not Full given the different events

#P(B'|A)
B1_G_A <- .75 #Prob not full, given Academic event

#P(B'|S)
B1_G_S <- .30 #P not full, given Sporting Event

#P(B'|N)
B1_G_N <- .55 #P not full, given No Event
#Define the Intersections


A_AND_B <- A * B_G_A #P of Academic Event AND Full
S_AND_B <- S * B_G_S #P of Spprting Event AND Full
N_AND_B <- N * B_G_N #P of No Event AND Full
A_AND_B1 <- A * (1-B_G_A) #P Academic Event AND Not Full
S_AND_B1 <- S * (1 - B_G_S) #P of Sporting Event AND Not Full
N_AND_B1 <- N * (1 - B_G_N) #P of No Event AND Not Full


B <- A_AND_B + S_AND_B + N_AND_B #Total P of Being Full
B1 <- 1-B #Total P of Not Being Full
B
## [1] 0.25

Use Bayes Formula

\(P(A \mid B) = \frac{(B \cap A)(P(A)}{\ P(B)}\)

\(P(S \mid B) = \frac{(B \cap S)(P(S)}{P(B)}\)

#Use Bayes Theorem
P_Full_Given_Sports <- (B_G_S * S)/(B)
print(paste("The Probability that there is a Sporting Event, Given the garage is full is", P_Full_Given_Sports))
## [1] "The Probability that there is a Sporting Event, Given the garage is full is 0.56"

Make a Tree Diagram

#Install the packages we need
library(BiocManager)
library("Rgraphviz")
## Loading required package: graph
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: grid
### NODES (labels)
# These are the labels of the nodes on the graph
# To signify "Not A" - we use A' or A prime 

node1     <-  "P"
node2     <-  "A"
node3     <-  "S"
node4     <-  "N"
node5     <-  "B_G_A"
node6     <-  "B1_G_A"
node7     <-  "B_G_S"
node8     <-  "B1_G_S"
node9     <-  "B_G_N"
node10    <-  "B1_G_N"
nodeNames <- c(node1, node2, node3, node4, node5, node6, node7, node8, node9, node10)

rEG   <- new("graphNEL", 
             nodes = nodeNames, 
             edgemode="directed"
             )
### LINES
# Draw the "lines" or "branches" of the probability Tree
#Tells R which nodes to draw lines to and from
rEG  <- addEdge (nodeNames[1], nodeNames[2], rEG, 1)
rEG  <- addEdge (nodeNames[1], nodeNames[3], rEG, 1)
rEG  <- addEdge (nodeNames[1], nodeNames[4], rEG, 1)
rEG  <- addEdge (nodeNames[2], nodeNames[5], rEG, 1)
rEG  <- addEdge (nodeNames[2], nodeNames[6], rEG, 1)
rEG  <- addEdge (nodeNames[3], nodeNames[7], rEG, 1)
rEG  <- addEdge (nodeNames[3], nodeNames[8], rEG, 1)
rEG  <- addEdge (nodeNames[4], nodeNames[9], rEG, 1)
rEG  <- addEdge (nodeNames[4], nodeNames[10], rEG,1)


eAttrs  <- list()

q    <-  edgeNames(rEG)
### PROBABILITY VALUES
# Add the probability values to the the branch lines

eAttrs$label <- c(toString(A),          toString(S),
                  toString(N),    toString(B_G_A),
                  toString(B1_G_A), toString(B_G_S),
                  toString(B1_G_S), toString(B_G_N),
                  toString(B1_G_N)
                  )

names(eAttrs$label) <- c( q[1], q[2], q[3], q[4], q[5], q[6], q[7], q[8], q[9] )
edgeAttrs <- eAttrs


### COLOR
# Set the color, etc, of the tree
attributes <- list(node  = list(label    = "foo", 
                              fillcolor = "lightgreen", 
                              fontsize  = "15"
                              ),
                   edge  = list(color   = "red"),
                   graph = list(rankdir = "LR")
                   )

### PLOT
# Plot the probability tree using Rgraphvis
plot (rEG, edgeAttrs = eAttrs, attrs=attributes)
nodes(rEG)
##  [1] "P"      "A"      "S"      "N"      "B_G_A"  "B1_G_A" "B_G_S"  "B1_G_S"
##  [9] "B_G_N"  "B1_G_N"
edges(rEG)
## $P
## [1] "A" "S" "N"
## 
## $A
## [1] "B_G_A"  "B1_G_A"
## 
## $S
## [1] "B_G_S"  "B1_G_S"
## 
## $N
## [1] "B_G_N"  "B1_G_N"
## 
## $B_G_A
## character(0)
## 
## $B1_G_A
## character(0)
## 
## $B_G_S
## character(0)
## 
## $B1_G_S
## character(0)
## 
## $B_G_N
## character(0)
## 
## $B1_G_N
## character(0)
#Place text on the graph- showing probabilities for different nodes
text(500,500, A_AND_B,  cex = .8)
text(500,400, A_AND_B1, cex = .8)
text(500,300, S_AND_B,  cex = .8)
text(500,200, S_AND_B1, cex = .8)
text(500,100, N_AND_B,  cex = .8)
text(500,0, N_AND_B1,   cex = .8)
text(300,430, "(B|A)",  cex = .8)
text(300,360, "(B1|A)", cex = .8)
text(300,300, "(B|S)",  cex = .8)
text(300,220, "(B1|S)", cex = .8)
text(300,140, "(B|N)",  cex = .8)
text(300,70, "(B1|N)", cex = .8)


text(80,100,  paste("P(A):",  A),  cex = .9, col = "darkgreen")
text(80,50,   paste("P(S):",  S),  cex = .9, col = "darkgreen")
text(80,20,   paste("P(N):",  N),  cex = .9, col = "darkgreen")
text(160,50,  paste("P(B):",  B),  cex = .9, col = "darkgreen")
text(160,20,  paste("P(B1):", B1), cex = .9, col = "darkgreen")
text(160,400, paste("(S|B):", P_Full_Given_Sports), cex = .9, col = "darkgreen")

HW Problem 9:

An opioid urinalysis test is 95% sensitive for a 30-day period, meaning that if a person
has actually used opioids within 30 days, they will test positive 95% of the time P( + |
User) =.95. The same test is 99% specific, meaning that if they did not use opioids within
30 days, they will test negative P( - | Not User) = .99. Assume that 3% of the population
are users. Then what is the probability that a person who tests positive is actually a user
P(User | +)?
Show your R code.
You may use a tree, table, or Bayes to answer this problem.

Positive_G_User <- .95
Negative_G_NotUser <- .99
User <- .03
Not_User <- .97
Positive_G_NotUser <- .01


Positive <- (User * Positive_G_User) + (Not_User * Positive_G_NotUser)
#Bayes Theorem
# P(User|+) = (P(+|User) * P(User)) / P(+)

User_G_Positive <- (Positive_G_User * User) / (Positive)
print(paste ("The probability of a person being a user, given they test positive is", User_G_Positive))
## [1] "The probability of a person being a user, given they test positive is 0.746073298429319"