Bayes’ Theorem can yield surprising results. Take a look at
Guided Practice 3.43 and attempt to solve this using Bayes’ formula.
Interpret the results. Then use the attached code and solve via a tree
diagram in R. You will need to change the initial parameters to the
appropriate values. Attach your final graph to your submission. Note:
for newer versions of R, you will need to install
BiocManager
.
install.packages("BiocManager")
library(BiocManager)
BiocManager::install("Rgraphviz")
Guided Practice 3.43: Jose visits campus every Thursday evening. However, some days the parking garage is full, often due to college events. There are academic events on 35% of evenings, sporting events on 20% of evenings, and no events on 45% of evenings. When there is an academic event, the garage fills up about 25% of the time, and it fills up 70% of evenings with sporting events. On evenings when there are no events, it only fills up about 5% of the time. If Jose comes to campus and finds the garage full, what is the probability that there is a sporting event? Use a tree diagram to solve this problem.
See installation help.
I am also setting warning=FALSE
and
message=FALSE
in the code chunk below for cleaner output,
and commenting out install.package()
commands to speed up
the code.
sessionInfo()
is printed before and after installation
of libraries to show you what Rgraphviz
adds.rm(list=ls()) # empty environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 528772 28.3 1176135 62.9 NA 669282 35.8
## Vcells 973829 7.5 8388608 64.0 32768 1840447 14.1
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.35 R6_2.5.1 jsonlite_1.8.8 evaluate_0.23
## [5] cachem_1.0.8 rlang_1.1.2 cli_3.6.2 rstudioapi_0.14
## [9] jquerylib_0.1.4 bslib_0.5.0 rmarkdown_2.21 tools_4.2.1
## [13] xfun_0.39 yaml_2.3.7 fastmap_1.1.1 compiler_4.2.1
## [17] htmltools_0.5.5 knitr_1.45 sass_0.4.6
# install.packages("BiocManager")
library(BiocManager)
# BiocManager::install("Rgraphviz")
require("Rgraphviz")
BiocManager::valid() ## R version 3.5 or later
##
## * sessionInfo()
##
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] Rgraphviz_2.40.0 graph_1.74.0 BiocGenerics_0.42.0
## [4] BiocManager_1.30.22
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.35 R6_2.5.1 jsonlite_1.8.8 stats4_4.2.1
## [5] evaluate_0.23 cachem_1.0.8 rlang_1.1.2 cli_3.6.2
## [9] rstudioapi_0.14 jquerylib_0.1.4 bslib_0.5.0 rmarkdown_2.21
## [13] tools_4.2.1 xfun_0.39 yaml_2.3.7 fastmap_1.1.1
## [17] compiler_4.2.1 htmltools_0.5.5 knitr_1.45 sass_0.4.6
##
## Bioconductor version '3.15'
##
## * 424 packages out-of-date
## * 2 packages too new
##
## create a valid installation with
##
## BiocManager::install(c(
## "acepack", "AER", "animint2", "askpass", "BayesFactor", "bayestestR",
## "bdsmatrix", "BH", "BiocManager", "blob", "bookdown", "brew", "brio",
## "broom", "broom.helpers", "bslib", "cachem", "callr", "car", "checkmate",
## "chron", "Ckmeans.1d.dp", "classInt", "cli", "clock", "coda", "coin",
## "collapse", "colourpicker", "commonmark", "covr", "cowplot", "cpp11",
## "credentials", "crosstable", "crosstalk", "crul", "Cubist", "curl", "cvms",
## "cyclocomp", "data.table", "DataExplorer", "datamods", "datawizard", "DBI",
## "dbplyr", "deldir", "DEoptimR", "desc", "DiagrammeR", "dials",
## "DiceDesign", "digest", "directlabels", "distributional", "downlit",
## "dplyr", "DT", "dtplyr", "e1071", "earth", "ellipse", "emmeans",
## "esquisse", "evaluate", "expm", "expss", "fable", "fabletools",
## "FactoMineR", "fansi", "fastICA", "fastmap", "fastmatrix", "feasts",
## "flextable", "float", "FNN", "fontawesome", "forcats", "forecast",
## "Formula", "fracdiff", "fs", "fstcore", "ftExtra", "future",
## "future.apply", "gam", "gamlss", "gamlss.data", "gamlss.dist", "gargle",
## "gdtools", "geometries", "gert", "gganimate", "ggbeeswarm", "ggbiplot",
## "ggcorrplot", "ggdag", "ggdendro", "ggeffects", "ggExtra", "ggforce",
## "ggfortify", "ggiraph", "ggplot2", "ggprism", "ggpubr", "ggraph",
## "ggrepel", "ggsci", "ggthemes", "gh", "glmnet", "globals", "glue", "gmp",
## "googledrive", "googlesheets4", "googleVis", "GPArotation", "gplots",
## "graphlayouts", "gstat", "gt", "gtable", "gtExtras", "gtools", "gtsummary",
## "hardhat", "haven", "hipread", "HistData", "Hmisc", "hms", "hrbrthemes",
## "htmlTable", "htmltools", "htmlwidgets", "httpuv", "httr", "huxtable",
## "igraph", "infer", "influenceR", "insight", "interp", "intervals", "ipred",
## "ipumsr", "janitor", "jsonlite", "kableExtra", "kernlab", "klaR", "knitr",
## "ks", "labeling", "labelled", "laeken", "later", "lava", "lavaan",
## "leaflet", "lemon", "libcoin", "lifecycle", "lintr", "listenv", "lme4",
## "logr", "lubridate", "lwgeom", "maditr", "magick", "markdown",
## "MatrixModels", "matrixStats", "maxLik", "mclust", "mcmc", "MCMCpack",
## "mda", "mice", "minqa", "miscTools", "mlbench", "modeldata", "modelenv",
## "modelr", "modelsummary", "mosaic", "multcomp", "multcompView",
## "multicool", "mvtnorm", "naniar", "norm", "odbc", "officer",
## "opdisDownsampling", "openssl", "openxlsx", "packrat", "paletteer",
## "parallelly", "parameters", "parsnip", "party", "pastecs", "patchwork",
## "pbapply", "pcaPP", "performance", "pillar", "pkgbuild", "pkgload", "plm",
## "plot3D", "plotly", "plotmo", "plotrix", "pls", "plyr", "polspline",
## "pracma", "prettyunits", "pROC", "processx", "prodlim", "profvis",
## "progress", "progressr", "promises", "ps", "pscl", "psych", "psychTools",
## "purrr", "qqconf", "quantmod", "quantreg", "quarto", "questionr", "qvcalc",
## "R.oo", "R.rsp", "R.utils", "ragg", "ranger", "raster", "rbibutils",
## "Rcpp", "RcppArmadillo", "RcppEigen", "RcppTOML", "Rcsdp", "RCurl",
## "Rdpack", "rdrobust", "reactable", "reactR", "readr", "readsdmx", "readxl",
## "rearrr", "recipes", "rematch", "remotes", "repr", "reprex",
## "ResourceSelection", "reticulate", "rio", "RJSONIO", "rlang", "rmarkdown",
## "rms", "RobStatTM", "robustbase", "roxygen2", "rpart.plot", "RPEGLMEN",
## "rprojroot", "rrcov", "rsample", "rsconnect", "rsq", "RSQLite", "rstatix",
## "rstudioapi", "rvest", "s2", "sandwich", "sass", "scales", "scatterplot3d",
## "sf", "sfheaders", "shape", "shiny", "shinybusy", "shinyWidgets",
## "skedastic", "slider", "smotefamily", "SMOTEWB", "snakecase",
## "sourcetools", "sp", "spacetime", "spatialreg", "spatstat.data",
## "spatstat.explore", "spatstat.geom", "spatstat.random", "spdep", "stars",
## "stinepack", "stringi", "stringmagic", "stringr", "styler", "subselect",
## "survey", "svglite", "sys", "systemfonts", "tables", "TeachingDemos",
## "terra", "testthat", "texreg", "textshaping", "TH.data", "themis",
## "this.path", "tibble", "tictoc", "tidygraph", "tidymodels", "tidyr",
## "tidyselect", "tidyverse", "timechange", "timeDate", "tinytest", "tinytex",
## "TraMineRextras", "triebeard", "tsibble", "tsibbledata", "TTR", "tune",
## "tweenr", "twosamples", "tzdb", "UBL", "units", "usethis", "usmap",
## "usmapdata", "utf8", "uuid", "V8", "vars", "vcd", "vctrs", "VGAM", "vipor",
## "viridis", "viridisLite", "vroom", "vtable", "waldo", "warp", "webshot",
## "withr", "wk", "wooldridge", "workflows", "workflowsets", "writexl",
## "xfun", "xgboost", "XML", "xml2", "xts", "yaml", "yardstick", "zip", "zoo"
## ), update = TRUE, ask = FALSE, force = TRUE)
##
## more details: BiocManager::valid()$too_new, BiocManager::valid()$out_of_date
Rgraphviz
package,
please open the .Rmd
file and see code here (not printed in
output).rm(list=ls()) # empty environment
sessionInfo()
# source("https://bioconductor.org/biocLite.R") # will not work on my version of R, thus I will have to install an older version
# biocLite("Rgraphviz")
install.packages("BiocManager")
library(BiocManager)
BiocManager::install("Rgraphviz")
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(version = "3.15")
BiocManager::valid() ## R version 3.5 or later
library(BiocManager)
# The Rgraphviz graphing package must be installed for this assignment, and thus the require command is just ensuring this (load and attach the add-on package Rgraphviz here)
require("Rgraphviz")
Note the notation is slightly different form the textbook example though.
# Marginal Probability of a1(academic events) / a2(sports events) / a3(no events)
a1 <- 0.35
a2 <- 0.2
a3 <- 0.45
# Conditional Probability P(b | a)
b1_no_park_given_a1 <- 0.25 # b1_no_park is garage full
b1_no_park_given_a2 <- 0.70
# thus, b2_with_park_XX is garage empty / not full
b1_no_park_given_a3 <- .05
# On evenings when there are no events, it only fills up about 5% of the time.
# Everything below here will be calculated
## complements
b2_with_park_given_a1 <- 1 - b1_no_park_given_a1
b2_with_park_given_a2 <- 1 - b1_no_park_given_a2
b2_with_park_given_a3 <- 1 - b1_no_park_given_a3
## Incorrect
# b2_with_park_given_a3 <- 1 - b2_with_park_given_a1 - b2_with_park_given_a2
# b1_no_park_given_a3 <- 1 - b2_with_park_given_a3
# Calculate the rest of the values based upon the 3 variables above
probability_b1_no_park_given_a1 <- a1 * b1_no_park_given_a1
probability_b2_with_park_given_a1 <- a1 * b2_with_park_given_a1
probability_b1_no_park_given_a2 <- a2 * b1_no_park_given_a2
probability_b2_with_park_given_a2 <- a2 * b2_with_park_given_a2
probability_b1_no_park_given_a3 <- a3 * b1_no_park_given_a3
probability_b2_with_park_given_a3 <- a3 * b2_with_park_given_a3
node1 <- “P” node2 <- “A1” node3 <- “A2” node4 <- “A3” node5 <- “A1&B1” node6 <- “A1&B1’” node7 <- “A2&B2” node8 <- “A2&B2’” node9 <- “A3&B3” node10 <- “A3&B3’”
# These are the labels of the nodes on the graph
node1 <- "Campus has"
node2 <- "Academic Event"
node3 <- "Sporting Event"
node4 <- "No Event"
node5 <- "AE & Garage Full"
node6 <- "AE & Garage Not Full"
node7 <- "SE & Garage Full"
node8 <- "SE & Garage Not Full"
node9 <- "NE & Garage Full"
node10 <- "NE & Garage Not Full"
nodeNames<-c(node1, node2, node3, node4, node5, node6, node7, node8, node9, node10)
?new # A call to a generating function for a class (see setClass) will pass its ... arguments to a corresponding call to new().
rEG <- new("graphNEL",
nodes = nodeNames,
edgemode="directed"
)
# Erase any existing plots
# dev.off()
# Draw the "lines" or "branches" of the probability Tree
rEG <- addEdge(nodeNames[1], nodeNames[2], rEG, 1)
rEG <- addEdge(nodeNames[1], nodeNames[3], rEG, 1)
rEG <- addEdge(nodeNames[1], nodeNames[4], rEG, 1)
rEG <- addEdge(nodeNames[2], nodeNames[5], rEG, 1)
rEG <- addEdge(nodeNames[2], nodeNames[6], rEG, 1)
rEG <- addEdge(nodeNames[3], nodeNames[7], rEG, 1)
rEG <- addEdge(nodeNames[3], nodeNames[8], rEG, 1)
rEG <- addEdge(nodeNames[4], nodeNames[9], rEG, 1)
rEG <- addEdge(nodeNames[4], nodeNames[10], rEG, 10)
eAttrs <- list()
q<-edgeNames(rEG)
# Add the probability values to the the branch lines
eAttrs$label <- c(toString(a1),toString(a2), toString(a3),
toString(b1_no_park_given_a1), toString(b2_with_park_given_a1),
toString(b1_no_park_given_a2), toString(b2_with_park_given_a2),
toString(b1_no_park_given_a3), toString(b2_with_park_given_a3)
)
names(eAttrs$label) <- c(q[1],q[2], q[3], q[4], q[5], q[6], q[7],q[8],q[9])
edgeAttrs<-eAttrs
# Set the color, etc, of the tree
attributes<-list(node=list(label="foo", fillcolor="lightgreen", fontsize="15"),
edge=list(color="red"),graph=list(rankdir="LR"))
#Plot the probability tree using Rgraphvis
plot(rEG, edgeAttrs=eAttrs, attrs=attributes)
nodes(rEG)
## [1] "Campus has" "Academic Event" "Sporting Event"
## [4] "No Event" "AE & Garage Full" "AE & Garage Not Full"
## [7] "SE & Garage Full" "SE & Garage Not Full" "NE & Garage Full"
## [10] "NE & Garage Not Full"
edges(rEG)
## $`Campus has`
## [1] "Academic Event" "Sporting Event" "No Event"
##
## $`Academic Event`
## [1] "AE & Garage Full" "AE & Garage Not Full"
##
## $`Sporting Event`
## [1] "SE & Garage Full" "SE & Garage Not Full"
##
## $`No Event`
## [1] "NE & Garage Full" "NE & Garage Not Full"
##
## $`AE & Garage Full`
## character(0)
##
## $`AE & Garage Not Full`
## character(0)
##
## $`SE & Garage Full`
## character(0)
##
## $`SE & Garage Not Full`
## character(0)
##
## $`NE & Garage Full`
## character(0)
##
## $`NE & Garage Not Full`
## character(0)
# Add the probability values to the leaves of tree
text(450,380,probability_b1_no_park_given_a1, cex=.8)
# text(450,250,probability_b2_with_park_given_a1, cex=.8)
text(450,250,probability_b1_no_park_given_a2, cex=.8)
# text(450,150,probability_b2_with_park_given_a2, cex=.8)
text(450,120,probability_b1_no_park_given_a3,cex=.8)
# text(450,30, probability_b2_with_park_given_a3,cex=.8)
answer <- probability_b1_no_park_given_a2 / (probability_b1_no_park_given_a1+probability_b1_no_park_given_a2+probability_b1_no_park_given_a3)
answer
## [1] 0.56
The R code for Bayesian probability can be written in a few lines too (below). See “G. Jay Kerns, 2010, Introduction to Probability and Statistics Using R’’ for more details.
rm(list = ls()) # Clear environment
cat("\f") # Clear the console
prior <- c( 0.35, 0.2, 0.45 ) # marginal probabilities given in the question stored in a vector
likelihood <- c( 0.25, 0.7, 0.05 ) # conditional probabilities vector stored in a vector
posterior <- prior * likelihood
posterior
## [1] 0.0875 0.1400 0.0225
bayesian <- posterior / sum(posterior)
bayesian[2] # for event A_2
## [1] 0.56
# Change the three variables below to match your actual values
# These are the values that you can change for your own probability tree
# From these three values, other probabilities (e.g. prob(b)) will be calculated
# Probability of a
a <- .20
# Probability (b | a)
bGivena <- .70
# Probability (b | ¨a)
bGivenNota <- .30
# Calculate the rest of the values based upon the 3 variables above
notA <- 1 - a # Negation of a, and so on...
notbGivena <- 1 - bGivena
notbGivenNota <- 1 - bGivenNota
# Joint Probabilities of a and B, a and notb, nota and b, nota and notb
aANDb <- a * bGivena
aANDnotb <- a * notbGivena
notaANDb <- notA * bGivenNota
notaANDnotb <- notA * notbGivenNota
# Probability of B
b <- aANDb + notaANDb
notB <- 1 - b
# Bayes theorum - probabiliyt of A | B
# Prob (a | b) = Prob (a AND b) / prob (b)
aGivenb <- aANDb / b
# These are the labels of the nodes on the graph
# To signify "Not A" - we use A' or A prime
# Erase any existing plots if you want to redraw
# dev.off()
node1 <- "P"
node2 <- "A"
node3 <- "A'"
node4 <- "A&B"
node5 <- "A&B'"
node6 <- "A'&B"
node7 <- "A'&B'"
nodeNames <- c(node1,node2,node3,node4, node5,node6, node7)
rEG <- new("graphNEL", nodes=nodeNames, edgemode="directed")
rEG
## A graphNEL graph with directed edges
## Number of Nodes = 7
## Number of Edges = 0
# Draw the "lines" or "branches" of the probability Tree
rEG <- addEdge(nodeNames[1], nodeNames[2], rEG, 1)
rEG <- addEdge(nodeNames[1], nodeNames[3], rEG, 1)
rEG <- addEdge(nodeNames[2], nodeNames[4], rEG, 1)
rEG <- addEdge(nodeNames[2], nodeNames[5], rEG, 1)
rEG <- addEdge(nodeNames[3], nodeNames[6], rEG, 1)
rEG <- addEdge(nodeNames[3], nodeNames[7], rEG, 10)
eAttrs <- list()
q<-edgeNames(rEG)
# Add the probability values to the the branch lines
eAttrs$label <- c(toString(a),toString(notA),
toString(bGivena), toString(notbGivena),
toString(bGivenNota), toString(notbGivenNota))
names(eAttrs$label) <- c(q[1],q[2], q[3], q[4], q[5], q[6])
edgeAttrs<-eAttrs
# Set the color, etc, of the tree
attributes <- list(node=list(label="foo", fillcolor="lightgreen", fontsize="15"),
edge=list(color="red"),graph=list(rankdir="LR"))
# Plot the probability tree using Rgraphvis
plot(rEG, edgeAttrs=eAttrs, attrs=attributes)
nodes(rEG)
## [1] "P" "A" "A'" "A&B" "A&B'" "A'&B" "A'&B'"
edges(rEG)
## $P
## [1] "A" "A'"
##
## $A
## [1] "A&B" "A&B'"
##
## $`A'`
## [1] "A'&B" "A'&B'"
##
## $`A&B`
## character(0)
##
## $`A&B'`
## character(0)
##
## $`A'&B`
## character(0)
##
## $`A'&B'`
## character(0)
#Add the probability values to the leaves of A&B, A&B', A'&B, A'&B'
text(500,420,aANDb, cex=.8)
text(500,280,aANDnotb,cex=.8)
text(500,160,notaANDb,cex=.8)
text(500,30,notaANDnotb,cex=.8)
text(340,440,"(B | A)",cex=.8)
text(340,230,"(B | A')",cex=.8)
#Write a table in the lower left of the probablites of A and B
text(80,50,paste("P(A):",a),cex=.9, col="darkgreen")
text(80,20,paste("P(A'):",notA),cex=.9, col="darkgreen")
text(160,50,paste("P(B):",round(b,digits=2)),cex=.9)
text(160,20,paste("P(B'):",round(notB, 2)),cex=.9)
text(80,420,paste("P(A|B): ",round(aGivenb,digits=2)),cex=.9,col="blue")
# Erase any existing plots if you want to redraw
# dev.off()