#loading libraries
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.1.1
## Warning: package 'readr' was built under R version 4.1.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(tidyr)
library(DiagrammeR)
## Warning: package 'DiagrammeR' was built under R version 4.1.1

1.Suppose a medical test has a 96% chance of detecting a disease if the person has it and a 92% chance of correctly indicating that the disease is absent if the person really does not have the disease. Assume 12% of a particular population has the disease.

1a. (4 pts) Create a probability tree diagram to show probabilities of outcomes. Indicate on your diagram with labels the following: sensitivity, specificity, false positive, false negative.

bayes_probability_tree <- function(prior, true_positive, true_negative) {
  
  if (!all(c(prior, true_positive, true_negative) > 0) && !all(c(prior, true_positive, true_negative) < 1)) {
    stop("probabilities must be greater than 0 and less than 1.",
         call. = FALSE)
  }
  c_prior <- 1 - prior
  c_tp <- 1 - true_positive
  c_tn <- 1 - true_negative
  
  round4 <- purrr::partial(round, digits = 4)
  
  b1 <- round4(prior * true_positive)
  b2 <- round4(prior * c_tp)
  b3 <- round4(c_prior * c_tn)
  b4 <- round4(c_prior * true_negative)
  
  bp <-  round4(b1/(b1 + b3))
  
  labs <- c("X", prior, c_prior, true_positive, c_tp, true_negative, c_tn, b1, b2, b4, b3)
  
  tree <-
    create_graph() %>%
    add_n_nodes(
      n = 11,
      type = "path",
      label = labs,
      node_aes = node_aes(
        shape = "circle",
        height = 1,
        width = 1,
        x = c(0, 3, 3, 6, 6, 6, 6, 8, 8, 8, 8),
        y = c(0, 2, -2, 3, 1, -3, -1, 3, 1, -3, -1))) %>% 
    add_edge(
      from = 1,
      to = 2,
      edge_aes = edge_aes(
        label = "Prior"
      )
    ) %>% 
    add_edge(
      from = 1, 
      to = 3,
      edge_aes = edge_aes(
        label = "Complimentary Prior"
      )
    ) %>% 
    add_edge(
      from = 2,
      to = 4,
      edge_aes = edge_aes(
        label = "True Positive (Sensitivity)"
      )
    ) %>% 
    add_edge(
      from = 2,
      to = 5,
      edge_aes = edge_aes(
        label = "False Negative"
      )
    ) %>% 
    add_edge(
      from = 3,
      to = 7,
      edge_aes = edge_aes(
        label = "False Positive"
      )
    ) %>% 
    add_edge(
      from = 3,
      to = 6,
      edge_aes = edge_aes(
        label = "True Negative (Specificity)"
      )
    ) %>% 
    add_edge(
      from = 4,
      to = 8,
      edge_aes = edge_aes(
        label = "="
      )
    ) %>% 
    add_edge(
      from = 5,
      to = 9,
      edge_aes = edge_aes(
        label = "="
      )
    ) %>% 
    add_edge(
      from = 7,
      to = 11,
      edge_aes = edge_aes(
        label = "="
      )
    ) %>% 
    add_edge(
      from = 6,
      to = 10,
      edge_aes = edge_aes(
        label = "="
      )
    ) 
  message(glue::glue("The probability of having {prior} after testing positive is {bp}"))
  print(render_graph(tree))
  invisible(tree)
}
#1a setting probabilities for each level
bayes_probability_tree(prior = 0.12, true_positive = 0.96, true_negative = 0.92) # this returns the probability tree where incidence of disease = .12, sensitivity = 0.96, specificity = 0.92
## The probability of having 0.12 after testing positive is 0.6207

1b. What is the probability that a randomly chosen person will test positive? (Show calculations)

= (0.12*0.96) + (0.88*0.08)
= 0.1152 + 0.0704
= 0.1856
=18.56% chance a random person will test positive

#1b calculations
a <- (0.12*0.96) + (0.88*0.08)
b <- 0.1152 + 0.07040
a * 100
## [1] 18.56
b * 100
## [1] 18.56

1c. Suppose that a randomly chosen person does test positive. What is the probability that this person really has the disease?

Bayes rule
= P(D|+)
= P(D and +) / P(+)
= (0.12*0.96) / 0.1856
= 62.06897%

#1c calculations
c <- (0.12*0.96) / 0.1856
c * 100
## [1] 62.06897

2. The prevalence of mild myopia in adults over age 40 is 23% in the U.S. Let random variable Y denote the number with myopia out of a random sample of 6.

2a. Complete the table

N = 6, p = 0.23
Y (No.of Myopic) successes No. of Non-myopic Probability
(Binomial expansion)
Probability (as a decimal)
0 6 6C0(0.23)0(0.77)6 dbinom(0, 6, .23) = 0.2084224
1 5 6C1(0.23)1(0.77)5 dbinom(1, 6, .23) = 0.3735362
2 4 6C2(0.23)2(0.77)4 dbinom(2, 6, .23) = 0.2789394
3 3 6C3(0.23)3(0.77)3 dbinom(3, 6, .23) = 0.1110927
4 2 6C4(0.23)4(0.77)2 dbinom(4, 6, .23) = 0.02488766
5 1 6C5(0.23)5(0.77)1 dbinom(5, 6, .23) = 0.00297359
6 0 6C6(0.23)6(0.77)0 dbinom(6, 6, .23) = 0.0001480359
#2a calculations
d <- dbinom(0, 6, .23)
d
## [1] 0.2084224
e <- dbinom(1, 6, .23)
e
## [1] 0.3735362
f <- dbinom(2, 6, .23)
f
## [1] 0.2789394
g <- dbinom(3, 6, .23)
g
## [1] 0.1110927
h <- dbinom(4, 6, .23)
h
## [1] 0.02488766
i <- dbinom(5, 6, .23)
i
## [1] 0.00297359
j <- dbinom(6, 6, .23)
j
## [1] 0.0001480359
#sum all probabilities to check if it equals 1
d+e+f+g+h+i+j
## [1] 1

2b. Pr{Y ≥ 3} = 0.02800929

#calculations 2b
pbinom(3, 6, .23, lower.tail = FALSE)
## [1] 0.02800929

2c. Pr{Y ≤ 2} = 0.860898

#calculations 2c
pbinom(2, 6, .23)
## [1] 0.860898