rm(list = ls()) # Clear data

Part 1

Prompt: Please explain Bayes Theorem in your own words, and give an example. Less than 10 sentences. Also, write out the formula. Pick up on how to to type equations in R Markdown using Latex terminology

Bayes theorem: It helps us understand and update the probability of an event based on new information. To simplify, we can calculate the probability of an event A occurring if we know the probability of another related event B occurring, and the probability of B given A. I have the formula down below:

$P(A \mid B)$

\[ P(A \mid B) = \frac {P(B \mid A)â‹…P(A)} {P(B)} \]

Example: We want to determine if the signed jersey being inspected is fake (event A) based on certain characteristics/details of the jersey (event B)

Part 2

Loading Packages

#install.packages("BiocManager")

library(BiocManager)

#BiocManager::install("Rgraphviz")
library(Rgraphviz)
## Loading required package: graph
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: grid

Prompt: Jose visits campus every Thursday evening. However, some days the parking garage is full, often due to college events. There are academic events on 35% of evenings, sporting events on 20% ofevenings, and no events on 45% of evenings. When there is an academic event, the garage fills up about 25% of the time, and it fills up 70% of evenings with sporting events. On evenings when there are no events, it only fills up about 5% of the time. If Jose comes to campus and finds the garage full, what is the probability that there is a sporting event? Use a tree diagram to solve this problem.

Academic events occurance: 35%

sport events occurrence: 20%

No events occurrence: 45%

\[ P(S \mid F) = \frac {P(F \mid S)â‹…P(S)} {P(F)} \]

Objective: P(S|F) We want to find the probability that there is a sporting event (S) given that the parking garage is full (F).

P(S∩F) = 20%*70% = 14%

P(F) = (0.35*0.25) + (0.20%*0.70) + (0.45*0.05)

= 0.0875 + 0.14 + 0.0225 = 0.25

P(S|F) = 0.14 / 0.25 = 0.56 or 56%

Answer: We find that the probability is 0.56 or 56%, I also included the tree below:

# Probability of academic event
a       <- 0.35

# Probability of garage full when there's an academic event
a_full  <- 0.25

# Probability of sporting event
s       <- 0.20

# Probability of garage full when there's a sporting event
s_full  <- 0.70

# Probability of no event
n      <- 0.45

# Probability of garage full when there's no event
n_full <- 0.05


###################### Everything below here will be calculated

### MAKE SURE THE FORMULAS ARE LOGICALLY CORRECT 

# Calculate the rest of the values based upon the variables above (Not full)
a_nfull  <- 1 - a_full
s_nfull  <- 1 - s_full
n_nfull <- 1 - n_full

# Joint Probabilities of each event being full and not full
aANDa_full    <-   a*a_full
aANDa_nfull   <-   a*a_nfull
sANDs_full    <-   s*s_full
sANDs_nfull   <-   s*s_nfull
nANDn_full    <-   n*n_full
nANDn_nfull   <-   n*n_nfull

# Probability garage is full
gar_full      <-  aANDa_full+sANDs_full+nANDn_full 

# P(S|F) 
sGivenf     <-   sANDs_full/gar_full

#show answer
print(paste0("Answer ", round(x      = sGivenf, 
                              digits = 4)
             )
      )
## [1] "Answer 0.56"
#############################################
## 3. START CODING ELEMENTS FOR TREE FOR GRAPH 
#############################################

### NODES (labels)
# These are the labels of the nodes on the graph
# To signify "Not A" - we use A' or A prime 

node1     <-  "P"
node2     <-  "Academic"
node3     <-  "Sport"
node4     <-  "None"
node5     <-  "Acad_Full"
node6     <-  "Acad_NFull"
node7     <-  "Sport_Full"
node8     <-  "Sport_NFull"
node9     <-  "None_Full"
node10    <- "None_NFull"
nodeNames <- c(node1, node2, node3, node4, node5, node6, node7, node8, node9, node10)

rEG <- new("graphNEL", 
           nodes = nodeNames, 
           edgemode = "directed")

### LINES
# Draw the "lines" or "branches" of the probability Tree
rEG  <- addEdge (nodeNames[1], nodeNames[2], rEG, 1)
rEG  <- addEdge (nodeNames[1], nodeNames[3], rEG, 1)
rEG  <- addEdge (nodeNames[1], nodeNames[4], rEG, 1)

rEG  <- addEdge (nodeNames[2], nodeNames[5], rEG, 1)
rEG  <- addEdge (nodeNames[2], nodeNames[6], rEG, 1)

rEG  <- addEdge (nodeNames[3], nodeNames[7], rEG, 1)
rEG  <- addEdge (nodeNames[3], nodeNames[8], rEG, 1)

rEG  <- addEdge (nodeNames[4], nodeNames[9], rEG, 1)
rEG  <- addEdge (nodeNames[4], nodeNames[10], rEG, 1)

eAttrs  <- list()

q    <-  edgeNames(rEG)



### PROBABILITY VALUES
# Add the probability values to the the branch lines

eAttrs$label <- c(toString(a),
                  toString(s),
                  toString(n),
                  toString(a_full),
                  toString(a_nfull),
                  toString(s_full),
                  toString(s_nfull),
                  toString(n_full),
                  toString(n_nfull)
                  )

names(eAttrs$label) <- c( q[1], q[2], q[3], q[4], q[5], q[6], q[7], q[8], q[9] )
edgeAttrs <- eAttrs


### COLOR
# Set the color, etc, of the tree
attributes <- list(node  = list(label    = "foo", 
                              fillcolor = "darkgreen", 
                              fontsize  = "15",
                              fontcolor = "white"
                              ),
                   edge  = list(color   = "darkgreen"),
                   graph = list(rankdir = "LR")
                   )


### PLOT
# Plot the probability tree using Rgraphvis
plot (rEG, edgeAttrs = eAttrs, attrs=attributes)
nodes(rEG)
##  [1] "P"           "Academic"    "Sport"       "None"        "Acad_Full"  
##  [6] "Acad_NFull"  "Sport_Full"  "Sport_NFull" "None_Full"   "None_NFull"
#Add probabilities value on the leaves
text(570, 400, aANDa_full, cex = 0.8)
text(570, 320, aANDa_nfull, cex = 0.8)

text(570, 250, sANDs_full, cex = 0.8)
text(550, 210, "0.14 / 0.25 = 0.56", cex = 0.6, col="darkgreen")
text(570, 170, sANDs_nfull, cex = 0.8)

text(570, 100, nANDn_full, cex = 0.8)
text(570, 20, nANDn_nfull, cex = 0.8)

#Add the table
text(50,80,  paste("P(A):"   ,a    ), cex = .9, col="darkgreen")
text(46,60,  paste("P(S):"  ,s ), cex = .9, col="darkgreen")
text(50,40,  paste("P(N):"  ,n ), cex = .9, col="darkgreen")

text(141,80,  paste("P(F):"  ,gar_full ), cex = .9, col="darkgreen")
text(141,60,  paste("P(FS):"  ,s_full ), cex = .9, col="darkgreen")
text(150,40,  paste("P(S∩F):"  ,sANDs_full ), cex = .9, col="darkgreen")
text(149,20,  paste("P(S|F):"  ,sGivenf ), cex = .9, col="darkgreen")

#text(160,50, paste("P(B):"   ,round(b,       digits = 2)), cex = .9)
#text(160,20, paste("P(B'):"  ,round(notB,    digits = 2)), cex = .9)

#text(80,420, paste("P(A|B):" ,round(aGivenb, digits = 2)), cex = .9, col = "blue")

We can see that we arrive at the same answer here through the Probability Tree

edges(rEG)
## $P
## [1] "Academic" "Sport"    "None"    
## 
## $Academic
## [1] "Acad_Full"  "Acad_NFull"
## 
## $Sport
## [1] "Sport_Full"  "Sport_NFull"
## 
## $None
## [1] "None_Full"  "None_NFull"
## 
## $Acad_Full
## character(0)
## 
## $Acad_NFull
## character(0)
## 
## $Sport_Full
## character(0)
## 
## $Sport_NFull
## character(0)
## 
## $None_Full
## character(0)
## 
## $None_NFull
## character(0)