rm(list = ls()) # Clear data
Prompt: Please explain Bayes Theorem in your own words, and give an example. Less than 10 sentences. Also, write out the formula. Pick up on how to to type equations in R Markdown using Latex terminology
Bayes theorem: It helps us understand and update the probability of an event based on new information. To simplify, we can calculate the probability of an event A occurring if we know the probability of another related event B occurring, and the probability of B given A. I have the formula down below:
$P(A \mid B)$
\[ P(A \mid B) = \frac {P(B \mid A)â‹…P(A)} {P(B)} \]
Example: We want to determine if the signed jersey being inspected is fake (event A) based on certain characteristics/details of the jersey (event B)
P(A): The prior probability of jersey being fake, about 20% of the jerseys you inspect are fake P(A)=0.20
P(B|A): The probability that the jersey exbits certain unusual details, there is a 70% chance the jersey contains some of these characteristics.
P(B): The probability of us correctly observing/identifying these characteristics is 90%
Loading Packages
#install.packages("BiocManager")
library(BiocManager)
#BiocManager::install("Rgraphviz")
library(Rgraphviz)
## Loading required package: graph
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: grid
Prompt: Jose visits campus every Thursday evening. However, some days the parking garage is full, often due to college events. There are academic events on 35% of evenings, sporting events on 20% ofevenings, and no events on 45% of evenings. When there is an academic event, the garage fills up about 25% of the time, and it fills up 70% of evenings with sporting events. On evenings when there are no events, it only fills up about 5% of the time. If Jose comes to campus and finds the garage full, what is the probability that there is a sporting event? Use a tree diagram to solve this problem.
Academic events occurance: 35%
sport events occurrence: 20%
No events occurrence: 45%
\[ P(S \mid F) = \frac {P(F \mid S)â‹…P(S)} {P(F)} \]
P(F∣S) is the probability of the garage being full when there is a sporting event (70%)
P(S) is the prior probability of a sporting event (20%)
P(F) is the probability of the garage being full on any given evening (we can find this)
Objective: P(S|F) We want to find the probability that there is a sporting event (S) given that the parking garage is full (F).
P(S∩F) = 20%*70% = 14%
P(F) = (0.35*0.25) + (0.20%*0.70) + (0.45*0.05)
= 0.0875 + 0.14 + 0.0225 = 0.25
P(S|F) = 0.14 / 0.25 = 0.56 or 56%
# Probability of academic event
a <- 0.35
# Probability of garage full when there's an academic event
a_full <- 0.25
# Probability of sporting event
s <- 0.20
# Probability of garage full when there's a sporting event
s_full <- 0.70
# Probability of no event
n <- 0.45
# Probability of garage full when there's no event
n_full <- 0.05
###################### Everything below here will be calculated
### MAKE SURE THE FORMULAS ARE LOGICALLY CORRECT
# Calculate the rest of the values based upon the variables above (Not full)
a_nfull <- 1 - a_full
s_nfull <- 1 - s_full
n_nfull <- 1 - n_full
# Joint Probabilities of each event being full and not full
aANDa_full <- a*a_full
aANDa_nfull <- a*a_nfull
sANDs_full <- s*s_full
sANDs_nfull <- s*s_nfull
nANDn_full <- n*n_full
nANDn_nfull <- n*n_nfull
# Probability garage is full
gar_full <- aANDa_full+sANDs_full+nANDn_full
# P(S|F)
sGivenf <- sANDs_full/gar_full
#show answer
print(paste0("Answer ", round(x = sGivenf,
digits = 4)
)
)
## [1] "Answer 0.56"
#############################################
## 3. START CODING ELEMENTS FOR TREE FOR GRAPH
#############################################
### NODES (labels)
# These are the labels of the nodes on the graph
# To signify "Not A" - we use A' or A prime
node1 <- "P"
node2 <- "Academic"
node3 <- "Sport"
node4 <- "None"
node5 <- "Acad_Full"
node6 <- "Acad_NFull"
node7 <- "Sport_Full"
node8 <- "Sport_NFull"
node9 <- "None_Full"
node10 <- "None_NFull"
nodeNames <- c(node1, node2, node3, node4, node5, node6, node7, node8, node9, node10)
rEG <- new("graphNEL",
nodes = nodeNames,
edgemode = "directed")
### LINES
# Draw the "lines" or "branches" of the probability Tree
rEG <- addEdge (nodeNames[1], nodeNames[2], rEG, 1)
rEG <- addEdge (nodeNames[1], nodeNames[3], rEG, 1)
rEG <- addEdge (nodeNames[1], nodeNames[4], rEG, 1)
rEG <- addEdge (nodeNames[2], nodeNames[5], rEG, 1)
rEG <- addEdge (nodeNames[2], nodeNames[6], rEG, 1)
rEG <- addEdge (nodeNames[3], nodeNames[7], rEG, 1)
rEG <- addEdge (nodeNames[3], nodeNames[8], rEG, 1)
rEG <- addEdge (nodeNames[4], nodeNames[9], rEG, 1)
rEG <- addEdge (nodeNames[4], nodeNames[10], rEG, 1)
eAttrs <- list()
q <- edgeNames(rEG)
### PROBABILITY VALUES
# Add the probability values to the the branch lines
eAttrs$label <- c(toString(a),
toString(s),
toString(n),
toString(a_full),
toString(a_nfull),
toString(s_full),
toString(s_nfull),
toString(n_full),
toString(n_nfull)
)
names(eAttrs$label) <- c( q[1], q[2], q[3], q[4], q[5], q[6], q[7], q[8], q[9] )
edgeAttrs <- eAttrs
### COLOR
# Set the color, etc, of the tree
attributes <- list(node = list(label = "foo",
fillcolor = "darkgreen",
fontsize = "15",
fontcolor = "white"
),
edge = list(color = "darkgreen"),
graph = list(rankdir = "LR")
)
### PLOT
# Plot the probability tree using Rgraphvis
plot (rEG, edgeAttrs = eAttrs, attrs=attributes)
nodes(rEG)
## [1] "P" "Academic" "Sport" "None" "Acad_Full"
## [6] "Acad_NFull" "Sport_Full" "Sport_NFull" "None_Full" "None_NFull"
#Add probabilities value on the leaves
text(570, 400, aANDa_full, cex = 0.8)
text(570, 320, aANDa_nfull, cex = 0.8)
text(570, 250, sANDs_full, cex = 0.8)
text(550, 210, "0.14 / 0.25 = 0.56", cex = 0.6, col="darkgreen")
text(570, 170, sANDs_nfull, cex = 0.8)
text(570, 100, nANDn_full, cex = 0.8)
text(570, 20, nANDn_nfull, cex = 0.8)
#Add the table
text(50,80, paste("P(A):" ,a ), cex = .9, col="darkgreen")
text(46,60, paste("P(S):" ,s ), cex = .9, col="darkgreen")
text(50,40, paste("P(N):" ,n ), cex = .9, col="darkgreen")
text(141,80, paste("P(F):" ,gar_full ), cex = .9, col="darkgreen")
text(141,60, paste("P(FS):" ,s_full ), cex = .9, col="darkgreen")
text(150,40, paste("P(S∩F):" ,sANDs_full ), cex = .9, col="darkgreen")
text(149,20, paste("P(S|F):" ,sGivenf ), cex = .9, col="darkgreen")
#text(160,50, paste("P(B):" ,round(b, digits = 2)), cex = .9)
#text(160,20, paste("P(B'):" ,round(notB, digits = 2)), cex = .9)
#text(80,420, paste("P(A|B):" ,round(aGivenb, digits = 2)), cex = .9, col = "blue")
We can see that we arrive at the same answer here through the Probability Tree
edges(rEG)
## $P
## [1] "Academic" "Sport" "None"
##
## $Academic
## [1] "Acad_Full" "Acad_NFull"
##
## $Sport
## [1] "Sport_Full" "Sport_NFull"
##
## $None
## [1] "None_Full" "None_NFull"
##
## $Acad_Full
## character(0)
##
## $Acad_NFull
## character(0)
##
## $Sport_Full
## character(0)
##
## $Sport_NFull
## character(0)
##
## $None_Full
## character(0)
##
## $None_NFull
## character(0)