1. Demonstrate PDF, CDF and Quantile plots for the standard normal distribution

library(ggplot2)

PDF and CDF plots for the Standard Normal Distribution

Generate the x values

x <- seq(-5, 5, length.out = 10)
x
##  [1] -5.0000000 -3.8888889 -2.7777778 -1.6666667 -0.5555556  0.5555556
##  [7]  1.6666667  2.7777778  3.8888889  5.0000000

Generate PDF and CDF values

pdf_values <- dnorm(x)
pdf_values
##  [1] 1.486720e-06 2.074403e-04 8.421534e-03 9.947714e-02 3.418923e-01
##  [6] 3.418923e-01 9.947714e-02 8.421534e-03 2.074403e-04 1.486720e-06
cdf_values <- pnorm(x)
cdf_values
##  [1] 2.866516e-07 5.035210e-05 2.736602e-03 4.779035e-02 2.892574e-01
##  [6] 7.107426e-01 9.522096e-01 9.972634e-01 9.999496e-01 9.999997e-01

Generate PDF and CDF plots

pdf_plot <- ggplot(data.frame(x, pdf_values), aes(x = x, y = pdf_values)) + 
            geom_line(color = "red") +
            labs(title = "PDF of Standard Normal Distribution",
                 x = "x values",
                 y = "PDF values")
pdf_plot

cdf_plot <- ggplot(data.frame(x, cdf_values), aes(x = x, y = cdf_values)) +
            geom_line(color = "blue") +
            labs(title = "CDF of Standard Normal Distribution", 
                  x = "x values",
                  y = "CDF values")
cdf_plot

Quantile plot for Standard Normal Distribution

Generate probabilities for Quantile values

p <- seq(0.1, 0.9, length.out = 10)

Generate Quantile values

quantile_values <- qnorm(p)
quantile_values
##  [1] -1.2815516 -0.8819982 -0.5894558 -0.3406948 -0.1116372  0.1116372
##  [7]  0.3406948  0.5894558  0.8819982  1.2815516

Generate Quantile plot for Standard Normal Distribution

quantile_plot <- ggplot(data.frame(p, quantile_values), aes(x = p, y = quantile_values)) +
                  geom_line(color = "green") +
                  labs(title = "Quantile plot for Standard Normal Distribution",
                       x = " Probabilities",
                       y = "Quantile values")
quantile_plot

Generate PDF, CDF, and Quantile plots for Standard Normal Distribution

install.packages("gridExtra", repos = "https://cloud.r-project.org/")
## Installing package into 'C:/Users/alxn0/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'gridExtra' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\alxn0\AppData\Local\Temp\RtmpKGn94V\downloaded_packages
library(gridExtra)
grid.arrange(pdf_plot, cdf_plot, quantile_plot, nrow = 1)

2. Create a function in R to calculate and draw CDF from the stated PDF

pdf_function <- function(x) {
                ifelse(x >= 0 & x < 3, 0.2,
                ifelse(x >= 3 & x < 5, 0.05,
                ifelse(x >= 5 & x < 6, 0.15,
                ifelse(x >= 7 & x < 10, 0.05, 0))))
                }
cdf <- function(x, pdf_function) {
      cdf_values <- numeric(length(x))
      for (i in seq_along(x)) {
      cdf_values[i] <- integrate(pdf_function, lower = 0, upper = x[i]) $value
  }
return(cdf_values)
}
cdf_values
##  [1] 2.866516e-07 5.035210e-05 2.736602e-03 4.779035e-02 2.892574e-01
##  [6] 7.107426e-01 9.522096e-01 9.972634e-01 9.999496e-01 9.999997e-01

Generate x values

x <- seq(0, 10, length.out = 100)

Compute PDF and CDF values

pdf_values <- sapply(x, pdf_function)
cdf_values <- cdf(x, pdf_function)

Generate data frame

df <- data.frame(x = x, PDF = pdf_values, CDF = cdf_values)

Generate PDF and CDF plots

ggplot(df, aes(x)) +
  geom_line(aes(y = PDF, color = "PDF")) +
  geom_line(aes(y = CDF, color = "CDF")) +
  labs(title = "PDF and CDF Plots",
       x= "x value",
       y = "Function") +
  scale_color_manual(values = c("PDF" = "blue", "CDF" = "purple"))

3. Create a function to calculate posterior probability using Bayes theorem.

bayes_posterior <- function(prior, likelihood, marginal_evidence) {
  posterior <- (likelihood * prior) / marginal_evidence
  return(posterior)
}

4. Bayes Theorem Question

A certain virus infects one in every 400 people. A test used to detect the virus in a person is positive 85% of the time if the person has the virus and 5% of the time if the person does not have the virus. (This 5% result is called a false positive). Let A be the event “the person has the virus” and B be the event “the person tests positive”.

a) Find the probability that a person has the virus given that they have tested positive, i.e. find P(A|B). Round your answer to the nearest hundredth of a percent.

b) Find the probability that a person does not have the virus given that they test negative, i.e. find P(A’|B’). Round your answer to the nearest hundredth of a percent.

# Function to compute posterior probability using Bayes' Theorem
bayes_posterior <- function(prior, likelihood, marginal_evidence) {
  posterior <- (likelihood * prior) / marginal_evidence
  return(posterior)
}

# Function to calculate both probabilities
virus_test_probabilities <- function(prevalence, sensitivity, false_positive_rate) {
  
  # Given probabilities
  prior <- prevalence  # P(A)
  prior_neg <- 1 - prior  # P(A')

  likelihood <- sensitivity  # P(B | A)
  false_neg_rate <- 1 - sensitivity  # P(B' | A)

  false_pos_rate <- false_positive_rate  # P(B | A')
  specificity <- 1 - false_pos_rate  # P(B' | A')

  # Compute P(B)
  marginal_evidence_B <- (likelihood * prior) + (false_pos_rate * prior_neg)

  # Compute P(A | B) - Probability of having the virus given a positive test
  P_A_given_B <- bayes_posterior(prior, likelihood, marginal_evidence_B)

  # Compute P(B')
  marginal_evidence_B_neg <- (false_neg_rate * prior) + (specificity * prior_neg)

  # Compute P(A' | B') - Probability of NOT having the virus given a negative test
  P_A_neg_given_B_neg <- bayes_posterior(prior_neg, specificity, marginal_evidence_B_neg)

  # Return both probabilities
  return(list(P_A_given_B = round(P_A_given_B * 100, 2),
              P_A_neg_given_B_neg = round(P_A_neg_given_B_neg * 100, 2)))
}

# Example Usage:
result <- virus_test_probabilities(prevalence = 1/400, sensitivity = 0.85, false_positive_rate = 0.05)

# Print results
print(paste("P(A | B) =", result$P_A_given_B, "%"))
## [1] "P(A | B) = 4.09 %"
print(paste("P(A' | B') =", result$P_A_neg_given_B_neg, "%"))
## [1] "P(A' | B') = 99.96 %"