Set up

This code gives an outline of how to simulate a single urn and count how many red balls there are in it at the end.

First we set up some book-keeping variables

# set the random number seed
 library(ggplot2)
library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
set.seed(1675)
# define your variables
# How many balls do we start with
InitialNumberOfBalls <- 2
# How many balls will we stop at
TargetNumberOfBalls <- 10
NumberOfSimulations <- 1000   # how many times to repeat the Urn simulation
FinalNumberOfReds <- NULL  # We will store the results in this vector

Set up a vector to be the Urn. For example:

Urn=c("white","black")
print(Urn)
## [1] "white" "black"

Start it off with one red ball and one blue ball

Urn[1] <- "white" 
Urn[2] <- "black"
print(Urn)
## [1] "white" "black"

We construct code to perform the drawing from the Urn, and testing if we have fewer white balls than black balls given n number of draws.

doTheUrn<-function(NumberOfDraws=1){
  Urn=c("white","black")
Urn[1] <- "white" 
Urn[2] <- "black"
NumberOfBalls <- length(Urn)
TargetToStop<-2+NumberOfDraws
while (NumberOfBalls < TargetToStop){
  #  Pick a ball - note that the ball isn't actually removed from the urn here
  Ball <- sample(Urn,1)
  # Add a copy to the Urn
  Urn <- c(Urn,Ball)
  # And count the number of balls
  NumberOfBalls <- length(Urn)
}
 return(Urn)
}

fewerWhiteInUrn<-function(NumberOfDraws=1){
     Urn<-doTheUrn(NumberOfDraws=NumberOfDraws)
    white<-length(which(Urn=='white'))
    black<-length(which(Urn=='black'))
    result<-ifelse(white<black,1,0)
    return(result)
}

What is the probability of fewer white balls after 2m+1 draws?

The problem specified for 2m+1 (odd) number of draws, what is the probability of fewer whites in the ball after an odd number of draws.
For several odd numbers the probability that fewer white balls is \(\sim \frac{1}{2}\)

# m=0 in the equation
d1<-replicate(NumberOfSimulations,fewerWhiteInUrn(1))%>%mean
message("probability of fewer white balls after 1 draw")
## probability of fewer white balls after 1 draw
print(d1)
## [1] 0.489
# m = 1
d3<-replicate(NumberOfSimulations,fewerWhiteInUrn(3))%>%mean
message("probability of fewer white balls after 3 draw")
## probability of fewer white balls after 3 draw
print(d3)
## [1] 0.514
# m =3
d7<-replicate(NumberOfSimulations,fewerWhiteInUrn(7))%>%mean
message("probability of fewer white balls after 7 draw")
## probability of fewer white balls after 7 draw
print(d7)
## [1] 0.502
d101<-replicate(NumberOfSimulations,fewerWhiteInUrn(101))%>%mean
message("probability of fewer white balls after 101 draw")
## probability of fewer white balls after 101 draw
print(d101)
## [1] 0.489
# need to plot a figure.

Question B What is the probability of drawing a white ball on the nth trial?

One interpretation of this problem statement, is that we draw n balls, and compute the last ball as being white.

  • Here we simulate drawing balls (1-100) and compute the probability that the \(n^{th}\) ball is white (final draw).

By simulating 1-100 trials, we look at the last ball (\(n^{th}\) trial), and observe that the probability of drawing a white ball is approaximately \(\frac{1}{2}\).

Hence we conclude that the last ball drawn, irrespective of the sequences of events of replacing the colors of balls, has equal probability of being black/white, then this means there can not be a tendency of balls to ‘take over’ the urn.

  • If you work on the conditional probability of drawing the white ball on 2 draw conditioned on the first draw being white or the first draw being black, the probability of \(P(W_2)=1/2\). Where the \(P(W_1)=P(W_1^{c})=1/2\). This suggests the probability of drawing the 2nd ball as white is conditionally independent of the first draw.

  • if you work on the total probability of \(P(W_3)\) worked out the total probability of drawing white on the 3rd draw \(P(W_3) = P(W_3 | W_2,W_1)P(W_2|W_1)P(W_1) + P(W_3 | W_2^{c},W_1)P(W_2^{c}|W_1)P(W_1) + P(W_3 | W_2,W_1^{c})P(W_2|W_1^{c})P(W_1^{c}) + P(W_3 | W_2^{c},W_1^{c})P(W_2^{c}|W_1^{c})P(W_1^{c})\)

  • This equates \(= (3/4)(2/3)(1/2) + (1/2)(1/3)(1/2)+ (1/2)(1/3)(1/2) + (1/4)(2/3)(1/2) = 1/2\).

  • This also suggests the nth trial is conditionally independent of the previous draws, which we showed by simulation previously.

 ## n can be even or odd.
## we just look at the last draw
nDrawWhite<-function(NumberofDraws=1,NumberOfSimulations){
  d1<-sapply(1:NumberOfSimulations,function(x) doTheUrn(NumberOfDraws=NumberofDraws))
 d1<-as.data.frame(t(d1))
 # grab the last (nth) trial 
 d1<- table(apply(d1,1,function(x) x[length(x)]))%>%prop.table
  return(d1)
}

# nDrawWhite(NumberofDraws = 1,NumberOfSimulations)
#  nDrawWhite(NumberofDraws =2,NumberOfSimulations)
#   nDrawWhite(NumberofDraws = 3,NumberOfSimulations)
# nDrawWhite(NumberofDraws = 4,NumberOfSimulations)
# nDrawWhite(NumberofDraws =5 ,NumberOfSimulations)

draw1_100<-sapply(1:100, function(x) nDrawWhite(NumberofDraws =x,NumberOfSimulations))
draws<-as.data.frame(t(draw1_100))
 draws$n<-rownames(draws)
  draws$n<-factor(draws$n,levels=rownames(draws))
  
  ggplot(draws,aes(x=n,y=white))+geom_point()+geom_hline(yintercept=0.50,colour='red',linetype='dashed')+xlab("number of draws from Urn")+ylab("probability final draw is white")+theme(axis.text.x=element_text(angle=90,hjust=1))

 message("the estimated probability of drawing white as the final ball")
## the estimated probability of drawing white as the final ball
  print(mean(draws$white))
## [1] 0.50051

Is there a tendency of counts?

For several draws we compute the proportion of counts of white and black balls after n draws. we know that if the nth ball has equal probability of being white for sequence of 1-100 draws, then there is not a tendency.

  • this code computes the overall proportion of white and black balls as a total in a sequence of draws. The previous code looked at the \(n^{th}\) draw.
  • We observe that the proportion of black/white balls are approximately 1/2 given several n.
nDrawWhiteTotal<-function(NumberofDraws=1,NumberOfSimulations){
  d1<-sapply(1:NumberOfSimulations,function(x) doTheUrn(NumberOfDraws=NumberofDraws))
 d1<-as.data.frame(t(d1))
 d1<- t(as.data.frame.matrix(apply(d1,1,function(x) prop.table(table(x)))))
  return(d1)
}

 # for simulations, we estimate the proportions of total white and total black balls.
 colMeans(nDrawWhiteTotal(NumberofDraws = 5,NumberOfSimulations))
##     black     white 
## 0.5028571 0.4971429
total1_100<-sapply(1:100, function(x) colMeans(nDrawWhiteTotal(NumberofDraws =x,NumberOfSimulations)))
draws<-as.data.frame(t(total1_100))
 draws$n<-rownames(draws)
  draws$n<-factor(draws$n,levels=rownames(draws))
  draws<-reshape2::melt(draws)
## Using n as id variables
  ggplot(draws,aes(x=n,y=value,color=variable))+geom_point()+geom_hline(yintercept=0.50,colour='red',linetype='dashed')+xlab("number of draws from Urn")+ylab("total proportion of white/black balls")+theme(axis.text.x=element_text(angle=90,hjust=1))

 message("the estimated probability of drawing white as the final ball")
## the estimated probability of drawing white as the final ball
 #on average, of the simulated rolls, each draw has an estimated probability of having equal proportions of white and black.
  #print( draws%>%dplyr::group_by(variable)%>%summarise(color=mean(value)))

Probability of drawing white on nth trial

  • Let \(A_i\) be the event that the ith ball drawn is white. and \(B_j\) be the event of drawing a black ball on the jth trial.

  • Let X be a random variable for the number of white balls drawn. We show that each Ai is exchangeable.

  • Let us draw 4 balls, what is the probability of having 3 white balls?

  • we have the events \((A1,A2,A3,B4),(A1,A2,B3,A4),(A1,B2,A3,A4),(B1,A2,A3,A4)\), where these events are the ways of have 3 white balls in 3 draws.

  • P(A1,A2,A3,B4)= \(\frac{1}{2}\frac{2}{3}\frac{3}{4}\frac{1}{5} = 1/20\)

  • P(A1,A2,B3,A4)= \(\frac{1}{2}\frac{2}{3}\frac{1}{4}\frac{3}{5} = 1/20\)

  • P(A1,B2,A3,A4)= \(\frac{1}{2}\frac{1}{3}\frac{1}{2}\frac{3}{5} = 1/20\)

  • P(B1,A2,A3,A4)= \(\frac{1}{2}\frac{1}{3}\frac{1}{2}\frac{3}{5} = 1/20\)

  • The probability of drawing 3 white balls in 4 draws is \(4/20 = 1/5\) suggesting that this has a uniform distribution of \(1/n+1\), where n is the number of draws.

  • The order of the sequence has the uniform probability, it is the total number of white balls that is informative, not the order in which they are drawn.

  • The probability distribution follows a uniform distribution and we compare the expected to the theoretical distribution visually.

Let us simulate draws 1-100 draws, and compute the probability of 1 white ball for all draws. We expected probability is (1/n+1) for n as the number of draws.

For 1 draw P(X=1) = 1/2.

probabilityWhite<-function(NumberOfDraws=NULL,
                           NumberOfSimulations,
                           X=NULL){
  d1<-sapply(1:NumberOfSimulations,function(x) doTheUrn(NumberOfDraws=NumberOfDraws))
 d1<-as.data.frame(t(d1))
 d1<- t(as.data.frame.matrix(apply(d1,1,function(x) (table(x)))))
 d1<-as.data.frame(d1)
  # count the successes where white =X
  results<-data.frame(success= nrow(d1[which(d1$white==X),]),
                      fail= nrow(d1[which(d1$white!=X),]),
                      N=nrow(d1))
  results$estimated.p<-results$success/results$N
  results$expected<-1/(NumberOfDraws+1)
  return(results)
}

 # for 1 white and 1 black balls, let us 
 # P(X=4)
 message("Probability of 1 white ball given 1 draw; i.e. P(X=1)")
## Probability of 1 white ball given 1 draw; i.e. P(X=1)
 probabilityWhite(NumberOfDraws=1,NumberOfSimulations,X=1)
##   success fail    N estimated.p expected
## 1     497  503 1000       0.497      0.5
 px1<-sapply(1:100, function(x)  probabilityWhite(NumberOfDraws=x,NumberOfSimulations,X=1))
  
 draws<-data.frame(esimtated=unlist(t(px1)[,4]),expected=unlist(t(px1)[,5])) 
 draws$n<-rownames(draws)
   draws$n<-factor(draws$n,levels=rownames(draws))
  draws<-reshape2::melt(draws)
## Using n as id variables
  library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
 a<-ggplot(draws,aes(x=n,y=value,color=variable))+geom_point()+xlab("number of draws from Urn")+ylab("Probability of drawing 1 white ball")+theme(axis.text.x=element_text(angle=90,hjust=1))
 message("the estimated probability of drawing white as the final ball")
## the estimated probability of drawing white as the final ball
b<-ggplot(data.frame(x=seq(1,100),y=1/(seq(1,100)+1)),aes(x=x,y=y))+geom_line()+ggtitle("uniform function")
print(a)

print(b)