library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(stats)
library(glue)
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
Problem 1. Probability Density 1: X~Gamma. Using R, generate a random variable X that has 10,000 random Gamma pdf values. A Gamma pdf is completely describe by n (a size parameter) and lambda (λ , a shape parameter). Choose any n greater 3 and an expected value (λ) between 2 and 10 (you choose).
select <- dplyr::select
set.seed(1335)
n <- 4
lambda <- 8
ss <- 10000
XGamma <- rgamma(ss, n, lambda)
hist(XGamma)
Probability Density 2: Y~Sum of Exponentials. Then generate 10,000 observations from the sum of n exponential pdfs with rate/shape parameter (λ). The n and must be the same as in the previous case.
Ysum <- rexp(ss, lambda) + rexp(ss, lambda) + rexp(ss, lambda) + rexp(ss, lambda)
hist(Ysum)
Probability Density 3: Z~ Exponential. Then generate 10,000 observations from a single exponential pdf with rate/shape parameter (λ). NOTE: The Gamma distribution is quite common in data science. For example, it is used to model failures for multiple processes when each of those processes has the same failure rate. The exponential is used for constant failure rates, service times, etc.
Zexpo <- rexp(10000, lambda)
hist(Zexpo)
5 points.
#Question 1a. Calculate the empirical expected value (means) and
variances of all three pdfs. empirical expected mean: X-Gamma empirical
mean and variance
(E_mean_x<- n/lambda)
## [1] 0.5
{prettyNum(var(XGamma))}
## [1] "0.06252452"
Ysum empirical mean and variance
(E_mean_ysum<- n/lambda)
## [1] 0.5
{prettyNum(var(Ysum))}
## [1] "0.06408574"
Z Exponential empirical mean and variance
(E_mean_z<-1/lambda)
## [1] 0.125
{prettyNum(var(Zexpo))}
## [1] "0.01551766"
#Question 1b. Using calculus, calculate the expected value and variance of the Gamma pdf (X). Using the moment generating function for exponentials, calculate the expected value of the single exponential (Z) and the sum of exponentials (Y)
integrand <- function(x) x * dgamma(x, n, lambda)
expected_value_X <- integrate(integrand, lower = 0, upper = Inf)$value
expected_value_X
## [1] 0.5
integrand <- function(x) x^2 * dgamma(x, n, lambda)
variance_X <- integrate(integrand, lower = 0, upper = Inf)$value - expected_value_X^2
variance_X
## [1] 0.0625
#Question 1c-e. Probability. For pdf Z (the exponential), calculate empirically probabilities a through c. Then evaluate through calculus whether the memoryless property holds. a. P(Z>λ| Z>λ/2)
(Emp_prob_a <- 1-pexp(E_mean_z, lambda))
## [1] 0.3678794
(Emp_prob_b <- 1-pexp(E_mean_z, 2*lambda))
## [1] 0.1353353
(Emp_prob_c <- 1-pexp(E_mean_z, 3*lambda))
## [1] 0.04978707
#Question Loosely investigate whether P(YZ) = P(Y) P(Z) by building a table with quartiles and evaluating the marginal and joint probabilities.
#Question Check to see if independence holds by using Fisher’s Exact Test and the Chi Square Test. What is the difference between the two? Which is most appropriate?
#fisher.test(XGamma , simulate.p.value = TRUE)
#chisq.test(XGamma, simulate.p.value = T)