Bayes’ Theorem can yield surprising results. Take a look at Guided Practice 3.43 and attempt to solve this using Bayes’ formula. Interpret the results. Then use the attached code and solve via a tree diagram in R. You will need to change the initial parameters to the appropriate values. Attach your final graph to your submission. Note: for newer versions of R, you will need to install BiocManager.

  1. install.packages("BiocManager")

  2. library(BiocManager)

  3. BiocManager::install("Rgraphviz")

Guided Practice 3.43: Jose visits campus every Thursday evening. However, some days the parking garage is full, often due to college events. There are academic events on 35% of evenings, sporting events on 20% of evenings, and no events on 45% of evenings. When there is an academic event, the garage fills up about 25% of the time, and it fills up 70% of evenings with sporting events. On evenings when there are no events, it only fills up about 5% of the time. If Jose comes to campus and finds the garage full, what is the probability that there is a sporting event? Use a tree diagram to solve this problem.

  • If Jose comes to campus and finds the garage full, the probability that there is a sporting event is 56%.

1 Install Package

See installation help.

I am also setting warning=FALSE and message=FALSE in the code chunk below for cleaner output, and commenting out install.package() commands to speed up the code.

  • sessionInfo() is printed before and after installation of libraries to show you what Rgraphviz adds.
rm(list=ls())       # empty environment
gc()                # Clear unused memory
##          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 528772 28.3    1176135 62.9         NA   669282 35.8
## Vcells 973829  7.5    8388608 64.0      32768  1840447 14.1
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.35   R6_2.5.1        jsonlite_1.8.8  evaluate_0.23  
##  [5] cachem_1.0.8    rlang_1.1.2     cli_3.6.2       rstudioapi_0.14
##  [9] jquerylib_0.1.4 bslib_0.5.0     rmarkdown_2.21  tools_4.2.1    
## [13] xfun_0.39       yaml_2.3.7      fastmap_1.1.1   compiler_4.2.1 
## [17] htmltools_0.5.5 knitr_1.45      sass_0.4.6
# install.packages("BiocManager")
library(BiocManager)
# BiocManager::install("Rgraphviz")

require("Rgraphviz")
BiocManager::valid()     ## R version 3.5 or later
## 
## * sessionInfo()
## 
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] Rgraphviz_2.40.0    graph_1.74.0        BiocGenerics_0.42.0
## [4] BiocManager_1.30.22
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.35   R6_2.5.1        jsonlite_1.8.8  stats4_4.2.1   
##  [5] evaluate_0.23   cachem_1.0.8    rlang_1.1.2     cli_3.6.2      
##  [9] rstudioapi_0.14 jquerylib_0.1.4 bslib_0.5.0     rmarkdown_2.21 
## [13] tools_4.2.1     xfun_0.39       yaml_2.3.7      fastmap_1.1.1  
## [17] compiler_4.2.1  htmltools_0.5.5 knitr_1.45      sass_0.4.6     
## 
## Bioconductor version '3.15'
## 
##   * 424 packages out-of-date
##   * 2 packages too new
## 
## create a valid installation with
## 
##   BiocManager::install(c(
##     "acepack", "AER", "animint2", "askpass", "BayesFactor", "bayestestR",
##     "bdsmatrix", "BH", "BiocManager", "blob", "bookdown", "brew", "brio",
##     "broom", "broom.helpers", "bslib", "cachem", "callr", "car", "checkmate",
##     "chron", "Ckmeans.1d.dp", "classInt", "cli", "clock", "coda", "coin",
##     "collapse", "colourpicker", "commonmark", "covr", "cowplot", "cpp11",
##     "credentials", "crosstable", "crosstalk", "crul", "Cubist", "curl", "cvms",
##     "cyclocomp", "data.table", "DataExplorer", "datamods", "datawizard", "DBI",
##     "dbplyr", "deldir", "DEoptimR", "desc", "DiagrammeR", "dials",
##     "DiceDesign", "digest", "directlabels", "distributional", "downlit",
##     "dplyr", "DT", "dtplyr", "e1071", "earth", "ellipse", "emmeans",
##     "esquisse", "evaluate", "expm", "expss", "fable", "fabletools",
##     "FactoMineR", "fansi", "fastICA", "fastmap", "fastmatrix", "feasts",
##     "flextable", "float", "FNN", "fontawesome", "forcats", "forecast",
##     "Formula", "fracdiff", "fs", "fstcore", "ftExtra", "future",
##     "future.apply", "gam", "gamlss", "gamlss.data", "gamlss.dist", "gargle",
##     "gdtools", "geometries", "gert", "gganimate", "ggbeeswarm", "ggbiplot",
##     "ggcorrplot", "ggdag", "ggdendro", "ggeffects", "ggExtra", "ggforce",
##     "ggfortify", "ggiraph", "ggplot2", "ggprism", "ggpubr", "ggraph",
##     "ggrepel", "ggsci", "ggthemes", "gh", "glmnet", "globals", "glue", "gmp",
##     "googledrive", "googlesheets4", "googleVis", "GPArotation", "gplots",
##     "graphlayouts", "gstat", "gt", "gtable", "gtExtras", "gtools", "gtsummary",
##     "hardhat", "haven", "hipread", "HistData", "Hmisc", "hms", "hrbrthemes",
##     "htmlTable", "htmltools", "htmlwidgets", "httpuv", "httr", "huxtable",
##     "igraph", "infer", "influenceR", "insight", "interp", "intervals", "ipred",
##     "ipumsr", "janitor", "jsonlite", "kableExtra", "kernlab", "klaR", "knitr",
##     "ks", "labeling", "labelled", "laeken", "later", "lava", "lavaan",
##     "leaflet", "lemon", "libcoin", "lifecycle", "lintr", "listenv", "lme4",
##     "logr", "lubridate", "lwgeom", "maditr", "magick", "markdown",
##     "MatrixModels", "matrixStats", "maxLik", "mclust", "mcmc", "MCMCpack",
##     "mda", "mice", "minqa", "miscTools", "mlbench", "modeldata", "modelenv",
##     "modelr", "modelsummary", "mosaic", "multcomp", "multcompView",
##     "multicool", "mvtnorm", "naniar", "norm", "odbc", "officer",
##     "opdisDownsampling", "openssl", "openxlsx", "packrat", "paletteer",
##     "parallelly", "parameters", "parsnip", "party", "pastecs", "patchwork",
##     "pbapply", "pcaPP", "performance", "pillar", "pkgbuild", "pkgload", "plm",
##     "plot3D", "plotly", "plotmo", "plotrix", "pls", "plyr", "polspline",
##     "pracma", "prettyunits", "pROC", "processx", "prodlim", "profvis",
##     "progress", "progressr", "promises", "ps", "pscl", "psych", "psychTools",
##     "purrr", "qqconf", "quantmod", "quantreg", "quarto", "questionr", "qvcalc",
##     "R.oo", "R.rsp", "R.utils", "ragg", "ranger", "raster", "rbibutils",
##     "Rcpp", "RcppArmadillo", "RcppEigen", "RcppTOML", "Rcsdp", "RCurl",
##     "Rdpack", "rdrobust", "reactable", "reactR", "readr", "readsdmx", "readxl",
##     "rearrr", "recipes", "rematch", "remotes", "repr", "reprex",
##     "ResourceSelection", "reticulate", "rio", "RJSONIO", "rlang", "rmarkdown",
##     "rms", "RobStatTM", "robustbase", "roxygen2", "rpart.plot", "RPEGLMEN",
##     "rprojroot", "rrcov", "rsample", "rsconnect", "rsq", "RSQLite", "rstatix",
##     "rstudioapi", "rvest", "s2", "sandwich", "sass", "scales", "scatterplot3d",
##     "sf", "sfheaders", "shape", "shiny", "shinybusy", "shinyWidgets",
##     "skedastic", "slider", "smotefamily", "SMOTEWB", "snakecase",
##     "sourcetools", "sp", "spacetime", "spatialreg", "spatstat.data",
##     "spatstat.explore", "spatstat.geom", "spatstat.random", "spdep", "stars",
##     "stinepack", "stringi", "stringmagic", "stringr", "styler", "subselect",
##     "survey", "svglite", "sys", "systemfonts", "tables", "TeachingDemos",
##     "terra", "testthat", "texreg", "textshaping", "TH.data", "themis",
##     "this.path", "tibble", "tictoc", "tidygraph", "tidymodels", "tidyr",
##     "tidyselect", "tidyverse", "timechange", "timeDate", "tinytest", "tinytex",
##     "TraMineRextras", "triebeard", "tsibble", "tsibbledata", "TTR", "tune",
##     "tweenr", "twosamples", "tzdb", "UBL", "units", "usethis", "usmap",
##     "usmapdata", "utf8", "uuid", "V8", "vars", "vcd", "vctrs", "VGAM", "vipor",
##     "viridis", "viridisLite", "vroom", "vtable", "waldo", "warp", "webshot",
##     "withr", "wk", "wooldridge", "workflows", "workflowsets", "writexl",
##     "xfun", "xgboost", "XML", "xml2", "xts", "yaml", "yardstick", "zip", "zoo"
##   ), update = TRUE, ask = FALSE, force = TRUE)
## 
## more details: BiocManager::valid()$too_new, BiocManager::valid()$out_of_date
  • If you still need to install the Rgraphviz package, please open the .Rmd file and see code here (not printed in output).
rm(list=ls())       # empty environment
sessionInfo()

# source("https://bioconductor.org/biocLite.R")     # will not work on my version of R, thus I will have to install an older version 
# biocLite("Rgraphviz") 

install.packages("BiocManager")
library(BiocManager)
BiocManager::install("Rgraphviz")

if (!require("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install(version = "3.15")

BiocManager::valid()     ## R version 3.5 or later
library(BiocManager)

# The Rgraphviz graphing package must be installed for this assignment, and thus the require command is just ensuring this (load and attach the add-on package Rgraphviz here)

require("Rgraphviz")

2 ADAPTED CODE (3 Branches)

2.1 1. INPUT

Note the notation is slightly different form the textbook example though.

# Marginal Probability of a1(academic events) / a2(sports events) / a3(no events) 

a1            <- 0.35
a2            <- 0.2
a3            <- 0.45



# Conditional Probability P(b | a)    

b1_no_park_given_a1     <- 0.25   # b1_no_park is garage full 
b1_no_park_given_a2     <- 0.70   
# thus, b2_with_park_XX is garage empty / not full 

b1_no_park_given_a3     <- .05
# On evenings when there are no events, it only fills up about 5% of the time.

2.2 2. OUTPUT

# Everything below here will be calculated

## complements 
b2_with_park_given_a1   <- 1 - b1_no_park_given_a1
b2_with_park_given_a2   <- 1 - b1_no_park_given_a2
b2_with_park_given_a3   <- 1 - b1_no_park_given_a3 

## Incorrect 
# b2_with_park_given_a3   <- 1 - b2_with_park_given_a1 - b2_with_park_given_a2 
# b1_no_park_given_a3     <- 1 - b2_with_park_given_a3   


# Calculate the rest of the values based upon the 3 variables above

probability_b1_no_park_given_a1   <-  a1  *   b1_no_park_given_a1
probability_b2_with_park_given_a1 <-  a1  *   b2_with_park_given_a1

probability_b1_no_park_given_a2   <-  a2  *   b1_no_park_given_a2
probability_b2_with_park_given_a2 <-  a2  *   b2_with_park_given_a2

probability_b1_no_park_given_a3   <-  a3  *   b1_no_park_given_a3
probability_b2_with_park_given_a3 <-  a3  *   b2_with_park_given_a3

2.3 3. GRAPH SETUP

node1 <- “P” node2 <- “A1” node3 <- “A2” node4 <- “A3” node5 <- “A1&B1” node6 <- “A1&B1’” node7 <- “A2&B2” node8 <- “A2&B2’” node9 <- “A3&B3” node10 <- “A3&B3’”

# These are the labels of the nodes on the graph

node1 <-  "Campus has"
node2 <-  "Academic Event"
node3 <-  "Sporting Event"
node4 <-  "No Event"
node5 <-  "AE & Garage Full"
node6 <-  "AE & Garage Not Full"
node7 <-  "SE & Garage Full"
node8 <-  "SE & Garage Not Full"
node9 <-  "NE & Garage Full"
node10 <- "NE & Garage Not Full"

nodeNames<-c(node1, node2, node3, node4, node5, node6, node7, node8, node9, node10)

 
?new # A call to a generating function for a class (see setClass) will pass its ... arguments to a corresponding call to new().
rEG <- new("graphNEL", 
           nodes = nodeNames, 
           edgemode="directed"
           )

# Erase any existing plots
# dev.off()

 

# Draw the "lines" or "branches" of the probability Tree

rEG <- addEdge(nodeNames[1], nodeNames[2], rEG, 1)
rEG <- addEdge(nodeNames[1], nodeNames[3], rEG, 1)
rEG <- addEdge(nodeNames[1], nodeNames[4], rEG, 1)
rEG <- addEdge(nodeNames[2], nodeNames[5], rEG, 1)
rEG <- addEdge(nodeNames[2], nodeNames[6], rEG, 1)
rEG <- addEdge(nodeNames[3], nodeNames[7], rEG, 1)
rEG <- addEdge(nodeNames[3], nodeNames[8], rEG, 1)
rEG <- addEdge(nodeNames[4], nodeNames[9], rEG, 1)
rEG <- addEdge(nodeNames[4], nodeNames[10], rEG, 10)

 

eAttrs <- list()

 

q<-edgeNames(rEG)

 

# Add the probability values to the the branch lines


eAttrs$label <- c(toString(a1),toString(a2), toString(a3),
 toString(b1_no_park_given_a1), toString(b2_with_park_given_a1),
 toString(b1_no_park_given_a2), toString(b2_with_park_given_a2),
 toString(b1_no_park_given_a3), toString(b2_with_park_given_a3)
 )

names(eAttrs$label) <- c(q[1],q[2], q[3], q[4], q[5], q[6], q[7],q[8],q[9])
edgeAttrs<-eAttrs

 

# Set the color, etc, of the tree

attributes<-list(node=list(label="foo", fillcolor="lightgreen", fontsize="15"),
edge=list(color="red"),graph=list(rankdir="LR"))

 

#Plot the probability tree using Rgraphvis
plot(rEG, edgeAttrs=eAttrs, attrs=attributes)

nodes(rEG)
##  [1] "Campus has"           "Academic Event"       "Sporting Event"      
##  [4] "No Event"             "AE & Garage Full"     "AE & Garage Not Full"
##  [7] "SE & Garage Full"     "SE & Garage Not Full" "NE & Garage Full"    
## [10] "NE & Garage Not Full"
edges(rEG)
## $`Campus has`
## [1] "Academic Event" "Sporting Event" "No Event"      
## 
## $`Academic Event`
## [1] "AE & Garage Full"     "AE & Garage Not Full"
## 
## $`Sporting Event`
## [1] "SE & Garage Full"     "SE & Garage Not Full"
## 
## $`No Event`
## [1] "NE & Garage Full"     "NE & Garage Not Full"
## 
## $`AE & Garage Full`
## character(0)
## 
## $`AE & Garage Not Full`
## character(0)
## 
## $`SE & Garage Full`
## character(0)
## 
## $`SE & Garage Not Full`
## character(0)
## 
## $`NE & Garage Full`
## character(0)
## 
## $`NE & Garage Not Full`
## character(0)
# Add the probability values to the leaves of tree




 text(450,380,probability_b1_no_park_given_a1, cex=.8)
# text(450,250,probability_b2_with_park_given_a1, cex=.8)
 text(450,250,probability_b1_no_park_given_a2, cex=.8)
# text(450,150,probability_b2_with_park_given_a2, cex=.8)
 text(450,120,probability_b1_no_park_given_a3,cex=.8)

# text(450,30, probability_b2_with_park_given_a3,cex=.8)


answer <- probability_b1_no_park_given_a2   / (probability_b1_no_park_given_a1+probability_b1_no_park_given_a2+probability_b1_no_park_given_a3)

answer 
## [1] 0.56

3 Alternative Approach:

The R code for Bayesian probability can be written in a few lines too (below). See “G. Jay Kerns, 2010, Introduction to Probability and Statistics Using R’’ for more details.

rm(list = ls())   # Clear environment 
cat("\f")         # Clear the console
prior       <-  c( 0.35, 0.2, 0.45 ) # marginal probabilities given in the question stored in a vector
likelihood  <-  c( 0.25, 0.7, 0.05 ) # conditional probabilities vector stored in a vector
posterior   <-  prior   *   likelihood
posterior
## [1] 0.0875 0.1400 0.0225
bayesian    <-  posterior / sum(posterior)
bayesian[2]                           # for event A_2
## [1] 0.56

3.1 ORIGINAL CODE (2 Branches)

3.1.1 1. INPUT

# Change the three variables below to match your actual values
# These are the values that you can change for your own probability tree
# From these three values, other probabilities (e.g. prob(b)) will be calculated 

# Probability of a
a             <-  .20

# Probability (b | a)
bGivena       <-  .70

# Probability (b | ¨a)
bGivenNota    <-  .30

3.1.2 2. OUTPUT

# Calculate the rest of the values based upon the 3 variables above
notA          <-    1 - a                   # Negation of a, and so on...
notbGivena    <-    1 - bGivena
notbGivenNota <-    1 - bGivenNota

# Joint Probabilities of a and B, a and notb, nota and b, nota and notb
aANDb         <-  a   *   bGivena
aANDnotb      <-  a   *   notbGivena
notaANDb      <- notA *   bGivenNota
notaANDnotb   <- notA *   notbGivenNota

# Probability of B
b             <-  aANDb   +   notaANDb
notB          <-  1 - b

# Bayes theorum - probabiliyt of A | B
# Prob (a | b) = Prob (a AND b) / prob (b)
aGivenb       <-  aANDb / b

3.1.3 3. GRAPH SETUP

# These are the labels of the nodes on the graph
# To signify "Not A" - we use A' or A prime 

# Erase any existing plots if you want to redraw  
# dev.off()

node1   <-  "P"
node2   <-  "A"
node3   <-  "A'"
node4   <-  "A&B"
node5   <-  "A&B'"
node6   <-  "A'&B"
node7   <-  "A'&B'"

nodeNames   <-  c(node1,node2,node3,node4, node5,node6, node7)
rEG     <-  new("graphNEL", nodes=nodeNames, edgemode="directed")
rEG
## A graphNEL graph with directed edges
## Number of Nodes = 7 
## Number of Edges = 0
# Draw the "lines" or "branches" of the probability Tree
rEG <- addEdge(nodeNames[1], nodeNames[2], rEG, 1)
rEG <- addEdge(nodeNames[1], nodeNames[3], rEG, 1)
rEG <- addEdge(nodeNames[2], nodeNames[4], rEG, 1)
rEG <- addEdge(nodeNames[2], nodeNames[5], rEG, 1)
rEG <- addEdge(nodeNames[3], nodeNames[6], rEG, 1)
rEG <- addEdge(nodeNames[3], nodeNames[7], rEG, 10)

eAttrs <- list()

q<-edgeNames(rEG)

# Add the probability values to the the branch lines

eAttrs$label <- c(toString(a),toString(notA),
                  toString(bGivena), toString(notbGivena),
                  toString(bGivenNota), toString(notbGivenNota))
names(eAttrs$label) <- c(q[1],q[2], q[3], q[4], q[5], q[6])
edgeAttrs<-eAttrs

# Set the color, etc, of the tree
attributes  <-  list(node=list(label="foo", fillcolor="lightgreen", fontsize="15"),
                 edge=list(color="red"),graph=list(rankdir="LR"))



# Plot the probability tree using Rgraphvis
plot(rEG, edgeAttrs=eAttrs, attrs=attributes)
nodes(rEG)
## [1] "P"     "A"     "A'"    "A&B"   "A&B'"  "A'&B"  "A'&B'"
edges(rEG)
## $P
## [1] "A"  "A'"
## 
## $A
## [1] "A&B"  "A&B'"
## 
## $`A'`
## [1] "A'&B"  "A'&B'"
## 
## $`A&B`
## character(0)
## 
## $`A&B'`
## character(0)
## 
## $`A'&B`
## character(0)
## 
## $`A'&B'`
## character(0)
#Add the probability values to the leaves of A&B, A&B', A'&B, A'&B'
text(500,420,aANDb, cex=.8)
text(500,280,aANDnotb,cex=.8)
text(500,160,notaANDb,cex=.8)
text(500,30,notaANDnotb,cex=.8)
text(340,440,"(B | A)",cex=.8)
text(340,230,"(B | A')",cex=.8)

#Write a table in the lower left of the probablites of A and B
text(80,50,paste("P(A):",a),cex=.9, col="darkgreen")
text(80,20,paste("P(A'):",notA),cex=.9, col="darkgreen")

text(160,50,paste("P(B):",round(b,digits=2)),cex=.9)
text(160,20,paste("P(B'):",round(notB, 2)),cex=.9)

text(80,420,paste("P(A|B): ",round(aGivenb,digits=2)),cex=.9,col="blue")

# Erase any existing plots if you want to redraw  
# dev.off()