Special Distribution

Nirmal Ghimire

3/12/2021

Sampling from an exponential using the inverse sampling method

Random Draws form Uniform Distribution

u<-runif(100000,0,1)
str(u)
 num [1:100000] 0.7516 0.2853 0.0495 0.3173 0.5501 ...

Plot the Inverse of CDF of the Exponential

library(ggplot2)
##pdf("Runiform_Inverse_Exponential.pdf")#Stores graph as pdf
Inverse_Exponential_CDF <- function(x,lambda)-log(x)/lambda
y <- Inverse_Exponential_CDF(u,3)
density_y <- density(y)
plot(density_y, type="l", xlim=c(0,2),
     main="PDF of Inverse Exponential Funcation",
     lwd=3, col="navyblue", xlab="")

##hide<-dev.off()## Should be active if we used the .pdf above

OR Plot of the Inverse of CDF of the Exponential Using Q_EXP

y_qexp <- qexp(u,rate=3)
density_y_qexp <- density(y_qexp)
plot(density_y_qexp, type="l", xlim=c(0,2),
main="PDF of Inverse Exponential Function",
lwd=3, col="navyblue", xlab="")

OR Compare to Random Draws Straight from the Exponential Distribution

y_rexp <- rexp(10000, rate = 3)
density_y_rexp <- density(y_rexp)
plot(density_y_rexp, type="l", xlim=c(0,2),
     main="Random Variable Drawn from Exponential Distribution",
     lwd=3, col="darkred",xlab="")

We still get similar plot, and even smoother than previous.

Poisson Simulation

poisson <- numeric(1000000)

lambda <- 2 #Lambda is the total number of events (k) divided by the number of units (n) in the data (Lambda = k/n)
c <- (0.767 - 0.336/lambda)
beta <- pi/sqrt(3.0*lambda)
alpha <- beta*lambda
k <- (log(c)-lambda-log(beta))

set.seed(20)
u <- runif(100000,0,1)
x <- (alpha - log((1.0-u)/u)/beta)
n <- floor(x+0.5)

set.seed(42)
v <- runif(100000,0,1)
y <- alpha-beta*x
lhs <- y + log(v/(1.0+exp(y)^2))
rhs <- k + n*log(lambda)-log(factorial(n))

j <- 1
for(i in 1:100000){
  if(n[i] >= 0){
    if(lhs[i] <= rhs[i]){
      poisson[j] <- n[i]
      j <- j+1
    }
  }
}
poisson <- poisson[1:j]

Plotting the Simulated Poisson Random Variable

library(graphics)
hist <- hist(poisson,
             main = "Simulated Poisson Distribution",
             xlim = c(0,10),
             breaks = 0:(max(poisson)+1),
             freq = FALSE,
             xlab = "", ylab = "",
             col = "lightblue",
             xaxt = "n")
axis(1,at = hist$mids,labels = 0:max(poisson))

Compare to random draws from the poisson distribution

y_rpois <- rpois(100000,3)
hist <- hist(y_rpois,
             main="Random Variable Drawn from Poisson Distribution",
             xlim=c(0,10),breaks=0:(max(y_rpois)+1),
             freq=FALSE,
             xlab="",ylab="",
             col="lightcoral",
             xaxt="n")
axis(1,at=hist$mids, labels=0:max(y_rpois))

Choosing a Random Sample

library(rvest)
## Sample 25 of 50 states code, with and without replacement
url <- read_html("https://developers.google.com/public-data/docs/canonical/states_csv")
url %>%
  html_nodes("table") %>%
  .[[1]]%>% #takes the first table
  html_table()
# A tibble: 52 x 4
   state latitude longitude name                
   <chr>    <dbl>     <dbl> <chr>               
 1 AK        63.6    -154.  Alaska              
 2 AL        32.3     -86.9 Alabama             
 3 AR        35.2     -91.8 Arkansas            
 4 AZ        34.0    -111.  Arizona             
 5 CA        36.8    -119.  California          
 6 CO        39.6    -106.  Colorado            
 7 CT        41.6     -73.1 Connecticut         
 8 DC        38.9     -77.0 District of Columbia
 9 DE        38.9     -75.5 Delaware            
10 FL        27.7     -81.5 Florida             
# ... with 42 more rows

Choosing a Random Sample: Setting Data Straight

states <- html_table(url)
states <- as.data.frame(states)
head(states)
  state latitude  longitude       name
1    AK 63.58875 -154.49306     Alaska
2    AL 32.31823  -86.90230    Alabama
3    AR 35.20105  -91.83183   Arkansas
4    AZ 34.04893 -111.09373    Arizona
5    CA 36.77826 -119.41793 California
6    CO 39.55005 -105.78207   Colorado
str(states)
'data.frame':   52 obs. of  4 variables:
 $ state    : chr  "AK" "AL" "AR" "AZ" ...
 $ latitude : num  63.6 32.3 35.2 34 36.8 ...
 $ longitude: num  -154.5 -86.9 -91.8 -111.1 -119.4 ...
 $ name     : chr  "Alaska" "Alabama" "Arkansas" "Arizona" ...

Random Sampling of 25 States without Replacement

states_without_replacement <- list(sample(states$name, 25, replace = FALSE))

# Print Output
print(states_without_replacement)
[[1]]
 [1] "Missouri"      "Oregon"        "Alabama"       "Arizona"      
 [5] "Tennessee"     "Illinois"      "New Jersey"    "New York"     
 [9] "Indiana"       "Nevada"        "New Hampshire" "Utah"         
[13] "Oklahoma"      "Virginia"      "Texas"         "Rhode Island" 
[17] "Nebraska"      "Georgia"       "Massachusetts" "Florida"      
[21] "Washington"    "Mississippi"   "Iowa"          "Minnesota"    
[25] "Wisconsin"    

Random Sampling of 25 States with Replacement

states_with_replacement <- sample(states$name, 25, replace = TRUE)
# Print Output
print(states_with_replacement)
 [1] "New Hampshire" "Oklahoma"      "Tennessee"     "South Dakota" 
 [5] "Georgia"       "Alabama"       "Massachusetts" "Kentucky"     
 [9] "Nevada"        "South Dakota"  "Alaska"        "Kansas"       
[13] "Maine"         "Georgia"       "Nebraska"      "Idaho"        
[17] "New Jersey"    "West Virginia" "Wyoming"       "Nebraska"     
[21] "Washington"    "Missouri"      "Wyoming"       "New Mexico"   
[25] "New York"     

Calculating Probability from Normal Distribution

#Characterizing Distribution
mean_of_x <- 2
sd_of_x <- 0.5
# Setting Inputs
x1 <- 1.2
x2 <- 1.34
x3 <- 1.46
x4 <- 2.08
# Probability less than x1?
pnorm(x1,mean_of_x, sd_of_x)
[1] 0.05479929
# Probability between x2 and x3?
pnorm(x3, mean_of_x,sd_of_x) - pnorm(x2,mean_of_x, sd_of_x)
[1] 0.04665358
# Probability greater than x4?
pnorm(x4, mean_of_x, sd_of_x, lower.tail = FALSE)
[1] 0.4364405
# Probability greater than x4?
pnorm(x4, mean_of_x, sd_of_x)#lower.tail is default in R
[1] 0.5635595

Random Question

Julia is applying to a business school and has to take GMAT. To get into her dream school she has to score in the top 15% of all test takers for her year. Last year in 2020 the GMAT had a mean of 690 and a standard deviation of 18.9. Assuming a normal distribution what simple R code would we write to estimate the score Julia would need in to get into her dream school?

qnorm(0.85, 690, 18.9)
[1] 709.5886

Question 2:

A. A manufacturer receives a shipment of 1000 parts from a vendor. The shipment will be unacceptable if more than fifty of the parts are defective. The manufacturer is going to randomly select K parts from the shipment for inspection, and the shipment will be accepted if no defective parts are found in the sample.

How large does K have to be to ensure that the probability that the manufacturer accepts an unacceptable shipment is less than 0.1?

#Based on the Given Information, we have:
total_parts <- 1000
nonacceptable_defective_amount_m <- 51
total_minimum_nondefective_n <- 949
# Accepting and unaceptable shipment is less than 0.1
number_of_defective_sample_allowed_k <- 0
number_of_the_defective_items_drawn_x <- 0

#Establishing the Probability Conditions
probability <- dhyper(number_of_the_defective_items_drawn_x,nonacceptable_defective_amount_m, total_minimum_nondefective_n, number_of_defective_sample_allowed_k)

# Creating while Loop and Measuring the Probability
while(probability > 0.1){
  number_of_defective_sample_allowed_k <- number_of_defective_sample_allowed_k + 1
  probability <- dhyper(number_of_the_defective_items_drawn_x,nonacceptable_defective_amount_m, total_minimum_nondefective_n, number_of_defective_sample_allowed_k)
}
number_of_defective_sample_allowed_k
[1] 44

Question 3

Now suppose that the manufacturer decides to accept the shipment if there is at most 10 defective parts in the sample. How large does K have to be to ensure that the probability that the manufacturer accepts an unacceptable shipment is less than 0.1? As above, a shipment is unacceptable if there are more than 50 defective parts.

#Based on the Given Information, we have:
total_parts <- 1000
nonacceptable_defective_amount_m <- 51
total_minimum_nondefective_n <- 949
# Accepting and unaceptable shipment is less than 0.1
number_of_defective_sample_allowed_k <- 1
number_of_the_defective_items_drawn_x <- 1

#Establishing the Probability Conditions
probability <- phyper(number_of_the_defective_items_drawn_x,nonacceptable_defective_amount_m, total_minimum_nondefective_n, number_of_defective_sample_allowed_k)

# Creating while Loop and Measuring the Probability
while(probability > 0.1){
  number_of_defective_sample_allowed_k <- number_of_defective_sample_allowed_k + 1
  probability <- phyper(number_of_the_defective_items_drawn_x,nonacceptable_defective_amount_m, total_minimum_nondefective_n, number_of_defective_sample_allowed_k)
}
number_of_defective_sample_allowed_k
[1] 73

THANKS