Ronald WESONGA (PhD)
Last modified: 25 Sep 2022
Assume we want to generate a random variable \(X\) with cumulative distribution function (CDF) \(F_X\). The inverse transform sampling algorithm is simple:
Then, \(X\) will follow the distribution governed by the CDF \(F_X\), which was our desired result.
Notice that the second step requires a search.
Assume our random variable \(X\) can take on any one of \(K\) values with probabilities \(\{p_1, \ldots, p_K\}\). We implement the algorithm below, assuming these probabilities are stored in a vector called p.vec.
# note: this inefficient implementation is for pedagogical purposes
# in general, consider using the rmultinom() function
discrete.inv.transform.sample <- function( p.vec ) {
U <- runif(1)
if(U <= p.vec[1]){
return(1)
}
for(state in 2:length(p.vec)) {
if(sum(p.vec[1:(state-1)]) < U && U <= sum(p.vec[1:state]) ) {
return(state)
}
}
}Assume \(Y\) is an exponential random variable with rate parameter \(\lambda = 2\). Recall that the probability density function is \(p(y) = 2e^{-2y}\), for \(y > 0\). First, we compute the CDF: \[F_Y(x) = P(Y\leq x) = \int_0^x 2e^{-2y} dy = 1 - e^{-2x}\]
Solving for the inverse CDF, we get that \[F_Y^{-1}(y) = -\frac{\ln(1-y)}{2}\]
Using our algorithm above, we first generate \(U \sim \text{Unif}(0,1)\), then set \(X = F_Y^{-1}(U) = -\frac{\ln(1-U)}{2}\). We do this in the R code below and compare the histogram of our samples with the true density of \(Y\).
# inverse transform sampling
num.samples <- 1000
U <- runif(num.samples)
X <- -log(1-U)/2
# plot
hist(X, freq=F, xlab='X', main='Generating Exponential R.V.')
curve(dexp(x, rate=2) , 0, 3, lwd=2, xlab = "", ylab = "", add = T)| x_i | P(X=x_i) |
|---|---|
| 1 | 0.1 |
| 2 | 0.4 |
| 3 | 0.2 |
| 4 | 0.3 |
num.samples <- 1000
p.vec <- c(0.1, 0.4, 0.2, 0.3)
names(p.vec) <- 1:4
samples <- numeric(num.samples)
for(i in seq_len(num.samples) ) {
samples[i] <- discrete.inv.transform.sample(p.vec)
}
barplot(p.vec, main='True Probability Mass Function')\[= P(U \le F_X(x)) = F_U(F_X(x)) = F_X(x)\],
and therefore \(F^{-1}(U)\) has the same distribution as \(X\).
To generate a random observation \(X\),
NB: The method easy to apply, provided \(F_X^{-1}\) is easy to compute.
n <- 1000
u <- runif(n)
x <- u^(1/3)
hist(x, prob = TRUE, main = bquote(f(x)==3*x^2)) #density histogram of sample
y <- seq(0, 1, .01)
lines(y, 3*y^2) #density curve f(x) n <- 1000000
u <- runif(n)
lambda=2
x <- -log(u)/lambda
hist(x, prob = TRUE, breaks = 20, main = bquote(f(x)==lambda*exp(-lambda*x))) #density histogram of sample
y <- seq(0, 10, 0.2)
lines(y, lambda*exp(-lambda*y)) #density curve f(x) # write your own function to generate r.v. from Exponential distribution
myexp=function(n,lambda){
u <- runif(n)
x <- -log(u)/lambda
}
x=myexp(100,3)
# compare the timing of three different programing ways
n=400000
# 1. using loop without preallocation of x
ptm=proc.time() #start proc
x=0
for (i in 1:n){
u=runif(1)
x[i]=u^(1/3)
}
tmp=proc.time()-ptm #end proc
T[1]=tmp[1]
# 2. using loop with preallocation of x
ptm=proc.time() #start proc
x=rep(0,n)
for (i in 1:n){
u=runif(1)
x[i]=u^(1/3)
}
tmp=proc.time()-ptm #end proc
T[2]=tmp[1]
# 3. using vectorized programing (avoid using loop)
ptm=proc.time() #start proc
u <- runif (n) ; y <- u^(1/3)
tmp=proc.time()-ptm #end proc
T[3]=tmp[1]## [1] 0.409
## [1] 0.241961
rlogarithmic <- function(n, theta) {
#returns a random logarithmic(theta) sample size n
u <- runif(n)
#set the initial length of cdf vector
N <- ceiling(-16 / log10(theta))
k <- 1:N
a <- -1/log(1-theta)
fk <- exp(log(a) + k * log(theta) - log(k))
Fk <- cumsum(fk)
x <- integer(n)
for (i in 1:n) {
x[i] <- as.integer(sum(u[i] > Fk)) #F^{-1}(u)-1
while (x[i] == N) {
#if x==N we need to extend the cdf
#very unlikely because N is large
logf <- log(a) + (N+1)*log(theta) - log(N+1)
fk <- c(fk, exp(logf))
Fk <- c(Fk, Fk[N] + fk[N+1])
N <- N + 1
x[i] <- as.integer(sum(u[i] > Fk))
}
}
x + 1
} n <- 1000
theta <- 0.5
x <- rlogarithmic(n, theta)
#compute density of logarithmic(theta) for comparison
k <- sort(unique(x))
p <- -1 / log(1 - theta) * theta^k / k
se <- sqrt(p*(1-p)/n) #standard error
round(rbind(table(x)/n, p, se),3)## 1 2 3 4 5 6 8 9
## 0.734 0.172 0.059 0.022 0.008 0.003 0.001 0.001
## p 0.721 0.180 0.060 0.023 0.009 0.004 0.001 0.000
## se 0.014 0.012 0.008 0.005 0.003 0.002 0.001 0.001
rlogarithmic <- function(n, theta) {
u <- runif(n)
#set the initial length of cdf vector
N <- ceiling(-16 / log10(theta))
k <- 1:N
a <- -1/log(1-theta)
fk <- a*theta^k/k
Fk <- cumsum(fk)
Mu=rep(1,N)%*%t(u)
y=as.integer(colSums(Mu>Fk))
I=which(y==N); n1=length(I);
if (n1>0) {
for (i in 1:n1) {
while (y[I[i]] == N) {
#if x==N we need to extend the cdf
#very unlikely because N is large
f <- a*theta^(N+1)/N
fk <- c(fk, f)
Fk <- c(Fk, Fk[N] + fk[N+1])
N <- N + 1
y[I[i]] <- as.integer(sum(u[I[i]] > Fk))
}
}
}
y+1
}
n <- 1000000
theta <- 0.9
ptm=proc.time()
set.seed(10)
u <- runif(n)
#set the initial length of cdf vector
N <- ceiling(-16 / log10(theta))
k <- 1:N
a <- -1/log(1-theta)
fk <- a*theta^k/k
Fk <- cumsum(fk)
x <- integer(n)
for (i in 1:n) {
x[i] <- as.integer(sum(u[i] > Fk)) # F^{-1}(u)-1
while (x[i] == N) {
#if x==N we need to extend the cdf
#very unlikely because N is large
logf <- log(a) + (N+1)*log(theta) - log(N+1)
fk <- c(fk, exp(logf))
Fk <- c(Fk, Fk[N] + fk[N+1])
N <- N + 1
x[i] <- as.integer(sum(u[i] > Fk))
}
}
x=x + 1
proc.time()-ptm
ptm=proc.time()
set.seed(10)
u <- runif(n)
#set the initial length of cdf vector
N <- ceiling(-16 / log10(theta))
k <- 1:N
a <- -1/log(1-theta)
fk <- a*theta^k/k
Fk <- cumsum(fk)
Mu=rep(1,N)%*%t(u)
y=as.integer(colSums(Mu>Fk))
I=which(y==N); n1=length(I);
if (n1>0) {
for (i in 1:n1) {
while (y[I[i]] == N) {
#if x==N we need to extend the cdf
#very unlikely because N is large
f <- a*theta^(N+1)/N
fk <- c(fk, f)
Fk <- c(Fk, Fk[N] + fk[N+1])
N <- N + 1
y[I[i]] <- as.integer(sum(u[I[i]] > Fk))
}
}
}
y=y+1
proc.time()-ptm
Find rv Y with g that satisfies \(\frac{f(t)}{g(t)} \le c ~~~\forall~~ t~~ s.t~~f(t)>0\)
For each random variate
ptm=proc.time()
n <- 100000
k <- 0 #counter for accepted
j <- 0 #iterations
y <- numeric(n)
while (k < n) {
u <- runif(1)
j <- j + 1
x <- runif(1) #random variate from g
if (x * (1-x) > u) {
#we accept x
k <- k + 1
y[k] <- x
}
}
proc.time()-ptm## user system elapsed
## 1.988 0.154 2.150
ptm=proc.time()
n=100000; c=6; cn=c*n;
x=runif(cn); u=runif(cn);
y=numeric(n)
ytmp=x[x*(1-x)>u]
n1=length(ytmp)
if (n1>=n)
y=ytmp[1:n] else {
y[1:n1]=ytmp
k <- n1+1 #counter for accepted
j <- 0 #iterations
while (k < n) {
u <- runif(1)
j <- j + 1
x <- runif(1) #random variate from g
if (x * (1-x) > u) {
#we accept x
k <- k + 1
y[k] <- x
}
}
}
proc.time()-ptm## user system elapsed
## 0.053 0.005 0.058
#compare empirical and theoretical percentiles
p <- seq(.1, .9, .1)
Qhat <- quantile(y, p) #quantiles of sample
Q <- qbeta(p, 2, 2) #theoretical quantiles
se <- sqrt(p * (1-p) / (n * dbeta(Q, 2, 2)^2)) #see Ch. 2
round(rbind(Qhat, Q, se), 3)## 10% 20% 30% 40% 50% 60% 70% 80% 90%
## Qhat 0.197 0.288 0.365 0.434 0.500 0.567 0.638 0.715 0.804
## Q 0.196 0.287 0.363 0.433 0.500 0.567 0.637 0.713 0.804
## se 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001
n <- 1000
a <- 3
b <- 2
u <- rgamma(n, shape=a, rate=1)
v <- rgamma(n, shape=b, rate=1)
x <- u / (u + v)
q <- qbeta(ppoints(n), a, b)
qqplot(q, x, cex=0.25, xlab="Beta(3, 2)", ylab="Sample")
abline(0, 1) n <- 1000
theta <- 0.5
u <- runif(n) #generate logarithmic sample
v <- runif(n)
x <- floor(1 + log(v) / log(1 - (1 - theta)^u))
k <- 1:max(x) #calc. logarithmic probs.
p <- -1 / log(1 - theta) * theta^k / k
se <- sqrt(p*(1-p)/n)
p.hat <- tabulate(x)/n
print(round(rbind(p.hat, p, se), 3))## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## p.hat 0.737 0.175 0.054 0.022 0.004 0.005 0.002 0.000 0.001
## p 0.721 0.180 0.060 0.023 0.009 0.004 0.002 0.001 0.000
## se 0.014 0.012 0.008 0.005 0.003 0.002 0.001 0.001 0.001
n <- 1000
nu <- 2
X <- matrix(rnorm(n*nu), n, nu)^2 #matrix of sq. normals
#sum the squared normals across each row: method 1
y <- rowSums(X)
#method 2
y <- apply(X, MARGIN=1, FUN=sum) #a vector length n
mean(y)## [1] 1.932833
## [1] 7.435713
n <- 1000
x1 <- rgamma(n, 2, 2)
x2 <- rgamma(n, 2, 4)
s <- x1 + x2 #the convolution
u <- runif(n)
k <- as.integer(u > 0.5) #vector of 0's and 1's
x <- k * x1 + (1-k) * x2 #the mixture
par(mfcol=c(1,2)) #two graphs per page
hist(s, prob=TRUE)
hist(x, prob=TRUE)# density estimates are plotted
n <- 5000
k <- sample(1:5, size=n, replace=TRUE, prob=(1:5)/15)
rate <- 1/k
x <- rgamma(n, shape=3, rate=rate)
#plot the density of the mixture
#with the densities of the components
plot(density(x), xlim=c(0,40), ylim=c(0,.3),
lwd=3, xlab="x", main="")
for (i in 1:5)
lines(density(rgamma(n, 3, 1/i))) f <- function(x, lambda, theta) {
#density of the mixture at the point x
sum(dgamma(x, 3, lambda) * theta)
}
p <- c(.1,.2,.2,.3,.2)
lambda <- c(1,1.5,2,2.5,3)
x <- seq(0, 8, length=200)
dim(x) <- length(x) #need for apply
#compute density of the mixture f(x) along x
y <- apply(x, 1, f, lambda=lambda, theta=p)
#plot the density of the mixture
plot(x, y, type="l", ylim=c(0,.85), lwd=3, ylab="Density")
for (j in 1:5) {
#add the j-th gamma density to the plot
y <- apply(x, 1, dgamma, shape=3, rate=lambda[j])
lines(x, y)
} #generate a Poisson-Gamma mixture
n <- 1000
r <- 4
beta <- 3
lambda <- rgamma(n, r, beta) #lambda is random
#now supply the sample of lambda's as the Poisson mean
x <- rpois(n, lambda) #the mixture
#compare with negative binomial
mix <- tabulate(x+1) / n
negbin <- round(dnbinom(0:max(x), r, beta/(1+beta)), 3)
se <- sqrt(negbin * (1 - negbin) / n)
round(rbind(mix, negbin, se), 3)## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## mix 0.322 0.312 0.183 0.103 0.052 0.017 0.006 0.003 0.002
## negbin 0.316 0.316 0.198 0.099 0.043 0.017 0.006 0.002 0.001
## se 0.015 0.015 0.013 0.009 0.006 0.004 0.002 0.001 0.001
# mean and covariance parameters
mu <- c(0, 0)
Sigma <- matrix(c(1, .9, .9, 1), nrow = 2, ncol = 2)
rmvn.eigen <-
function(n, mu, Sigma) {
# generate n random vectors from MVN(mu, Sigma)
# dimension is inferred from mu and Sigma
d <- length(mu)
ev <- eigen(Sigma, symmetric = TRUE)
lambda <- ev$values
V <- ev$vectors
R <- V %*% diag(sqrt(lambda)) %*% t(V)
Z <- matrix(rnorm(n*d), nrow = n, ncol = d)
X <- Z %*% R + matrix(mu, n, d, byrow = TRUE)
X
}
# generate the sample
X <- rmvn.eigen(1000, mu, Sigma)
plot(X, xlab = "x", ylab = "y", pch = 20)## [1] 0.03464225 0.03213818
## [,1] [,2]
## [1,] 1.0000 0.9018
## [2,] 0.9018 1.0000
rmvn.svd <-
function(n, mu, Sigma) {
# generate n random vectors from MVN(mu, Sigma)
# dimension is inferred from mu and Sigma
d <- length(mu)
S <- svd(Sigma)
R <- S$u %*% diag(sqrt(S$d)) %*% t(S$v) #sq. root Sigma
Z <- matrix(rnorm(n*d), nrow=n, ncol=d)
X <- Z %*% R + matrix(mu, n, d, byrow=TRUE)
X
} rmvn.Choleski <-
function(n, mu, Sigma) {
# generate n random vectors from MVN(mu, Sigma)
# dimension is inferred from mu and Sigma
d <- length(mu)
Q <- chol(Sigma) # Choleski factorization of Sigma
Z <- matrix(rnorm(n*d), nrow=n, ncol=d)
X <- Z %*% Q + matrix(mu, n, d, byrow=TRUE)
X
}
#generating the samples according to the mean and covariance
#structure as the four-dimensional iris virginica data
y <- subset(x=iris, Species=="virginica")[, 1:4]
mu <- colMeans(y)
Sigma <- cov(y)
mu## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 6.588 2.974 5.552 2.026
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 0.40434286 0.09376327 0.30328980 0.04909388
## Sepal.Width 0.09376327 0.10400408 0.07137959 0.04762857
## Petal.Length 0.30328980 0.07137959 0.30458776 0.04882449
## Petal.Width 0.04909388 0.04762857 0.04882449 0.07543265
library(MASS)
library(mvtnorm)
n <- 100 #sample size
d <- 30 #dimension
N <- 2000 #iterations
mu <- numeric(d)
set.seed(100)
system.time(for (i in 1:N)
rmvn.eigen(n, mu, cov(matrix(rnorm(n*d), n, d))))## user system elapsed
## 1.593 0.017 1.617
## user system elapsed
## 1.719 0.018 1.744
## user system elapsed
## 1.090 0.016 1.117
## user system elapsed
## 1.496 0.016 1.514
## user system elapsed
## 1.887 0.023 1.932
## user system elapsed
## 0.510 0.003 0.514
library(MASS) #for mvrnorm
#ineffecient version loc.mix.0 with loops
loc.mix.0 <- function(n, p, mu1, mu2, Sigma) {
#generate sample from BVN location mixture
X <- matrix(0, n, 2)
for (i in 1:n) {
k <- rbinom(1, size = 1, prob = p)
if (k)
X[i,] <- mvrnorm(1, mu = mu1, Sigma) else
X[i,] <- mvrnorm(1, mu = mu2, Sigma)
}
return(X)
}
#more efficient version
loc.mix <- function(n, p, mu1, mu2, Sigma) {
#generate sample from BVN location mixture
n1 <- rbinom(1, size = n, prob = p)
n2 <- n - n1
x1 <- mvrnorm(n1, mu = mu1, Sigma)
x2 <- mvrnorm(n2, mu = mu2, Sigma)
X <- rbind(x1, x2) #combine the samples
return(X[sample(1:n), ]) #mix them
}
x <- loc.mix(1000, .5, rep(0, 4), 2:5, Sigma = diag(4))
r <- range(x) * 1.2
par(mfrow = c(2, 2))
for (i in 1:4)
hist(x[ , i], xlim = r, ylim = c(0, .3), freq = FALSE,
main = "", breaks = seq(-5, 10, .5)) runif.sphere <- function(n, d) {
# return a random sample uniformly distributed
# on the unit sphere in R ^d
M <- matrix(rnorm(n*d), nrow = n, ncol = d)
L <- apply(M, MARGIN = 1,
FUN = function(x){sqrt(sum(x*x))})
D <- diag(1 / L)
U <- D %*% M
U
}
#generate a sample in d=2 and plot
X <- runif.sphere(200, 2)
par(pty = "s")
plot(X, xlab = bquote(x[1]), ylab = bquote(x[2])) lambda <- 2
t0 <- 3
upper <- 100
pp <- numeric(10000)
for (i in 1:10000) {
N <- rpois(1, lambda * upper)
Un <- runif(N, 0, upper) #unordered arrival times
Sn <- sort(Un) #arrival times
n <- min(which(Sn > t0)) #arrivals+1 in [0, t0]
pp[i] <- n - 1 #arrivals in [0, t0]
}
#alternately, the loop can be replaced by replicate function
pp <- replicate(10000, expr = {
N <- rpois(1, lambda * upper)
Un <- runif(N, 0, upper) #unordered arrival times
Sn <- sort(Un) #arrival times
n <- min(which(Sn > t0)) #arrivals+1 in [0, t0]
n - 1 }) #arrivals in [0, t0]
c(mean(pp), var(pp))## [1] 5.989900 5.930391
lambda <- 3
upper <- 100
N <- rpois(1, lambda * upper)
Tn <- rexp(N, lambda)
Sn <- cumsum(Tn)
Un <- runif(N)
keep <- (Un <= cos(Sn)^2) #indicator, as logical vector
Sn[keep]## [1] 0.001097207 0.790234714 2.988116220 5.270267990 5.989815518
## [6] 6.089009458 6.455637919 6.605802346 9.391179436 10.242951711
## [11] 10.495410809 12.325213930 12.614410749 15.526806567 15.759715421
## [16] 16.677197908 17.523612852 18.665033420 19.373242993 19.614895832
## [21] 21.375084270 21.450162534 21.510639341 21.804333071 21.863031174
## [26] 22.009819126 22.280539776 22.286979109 22.750249688 24.412027582
## [31] 24.983073530 25.412349526 25.420579035 25.498061756 27.441607537
## [36] 27.865654097 28.114364558 28.181945353 28.450639956 28.961695552
## [41] 30.463253583 30.607886949 30.806660768 30.898600211 32.355933237
## [46] 33.761265381 36.855718691 37.423747602 37.472743631 37.686058677
## [51] 38.592530008 38.906927921 39.608020608 39.925756695 39.977400775
## [56] 40.038045009 40.413594766 41.008060356 41.444759043 43.367921678
## [61] 44.240647378 44.298076693 44.309494234 44.388075648 45.047162507
## [66] 46.986129871 47.011497207 47.471522081 47.609871694 47.782076295
## [71] 48.914056938 50.746883776 50.810697266 51.006964557 51.078330431
## [76] 51.193885565 51.449352836 52.893897727 53.256766326 53.631421060
## [81] 53.790066627 53.916777008 53.974030113 54.415255263 55.913519440
## [86] 56.540217247 56.714779538 58.328552163 59.138638852 59.531450892
## [91] 59.605316577 59.714068769 59.876747497 60.345854063 60.605089614
## [96] 62.363190124 62.683130410 62.684676671 62.989091876 66.159647190
## [101] 68.206635812 68.833442112 68.958425028 69.085421430 69.423941766
## [106] 69.659171300 70.068821289 71.915636982 72.348321335 72.550750102
## [111] 72.976777800 74.391609237 74.949803743 74.959154730 75.422882328
## [116] 75.601859751 77.814640931 78.524164498 78.700919604 79.000359354
## [121] 81.144564110 82.644366811 82.870674394 84.724164904 85.097678177
## [126] 85.111671505 85.195912874 87.735149356 87.763498170 88.360490164
## [131] 90.485826505 90.579362688 90.710549050 90.999782038 91.080940991
## [136] 91.306603103 91.518876128 92.365629717 93.868046811 94.078289261
## [141] 94.204521814 94.445765368 94.677991967 94.775215965
## [1] 0.0011 0.7902 2.9881 5.2703 5.9898 6.0890 6.4556 6.6058 9.3912
## [10] 10.2430 10.4954 12.3252 12.6144 15.5268 15.7597 16.6772 17.5236 18.6650
## [19] 19.3732 19.6149 21.3751 21.4502 21.5106 21.8043 21.8630 22.0098 22.2805
## [28] 22.2870 22.7502 24.4120 24.9831 25.4123 25.4206 25.4981 27.4416 27.8657
## [37] 28.1144 28.1819 28.4506 28.9617 30.4633 30.6079 30.8067 30.8986 32.3559
## [46] 33.7613 36.8557 37.4237 37.4727 37.6861 38.5925 38.9069 39.6080 39.9258
## [55] 39.9774 40.0380 40.4136 41.0081 41.4448 43.3679 44.2406 44.2981 44.3095
## [64] 44.3881 45.0472 46.9861 47.0115 47.4715 47.6099 47.7821 48.9141 50.7469
## [73] 50.8107 51.0070 51.0783 51.1939 51.4494 52.8939 53.2568 53.6314 53.7901
## [82] 53.9168 53.9740 54.4153 55.9135 56.5402 56.7148 58.3286 59.1386 59.5315
## [91] 59.6053 59.7141 59.8767 60.3459 60.6051 62.3632 62.6831 62.6847 62.9891
## [100] 66.1596 68.2066 68.8334 68.9584 69.0854 69.4239 69.6592 70.0688 71.9156
## [109] 72.3483 72.5508 72.9768 74.3916 74.9498 74.9592 75.4229 75.6019 77.8146
## [118] 78.5242 78.7009 79.0004 81.1446 82.6444 82.8707 84.7242 85.0977 85.1117
## [127] 85.1959 87.7351 87.7635 88.3605 90.4858 90.5794 90.7105 90.9998 91.0809
## [136] 91.3066 91.5189 92.3656 93.8680 94.0783 94.2045 94.4458 94.6780 94.7752
t0 <- 5
Tn <- rgeom(100, prob = .2) #interarrival times
Sn <- cumsum(Tn) #arrival times
n <- min(which(Sn > t0)) #arrivals+1 in [0, t0]
Nt0 <- replicate(1000, expr = {
Sn <- cumsum(rgeom(100, prob = .2))
min(which(Sn > t0)) - 1
})
table(Nt0)/1000## Nt0
## 0 1 2 3 4 5 6 7 8
## 0.253 0.308 0.225 0.132 0.049 0.020 0.008 0.004 0.001
## [1] 1 1 0 0 0 3 2 0 0 0 1 1 1 4 0 6 0 2 2 2 0 2 2 1 3 3 0 1 3 2 2 3 3 0 3 2 3
## [38] 1 2 1 3 6 3 1 4 0 2 1 1 3 1 0 1 1 2 2 4 1 2 1 1 0 2 3 2 1 0 3 1 2 0 0 0 4
## [75] 3 0 0 2 1 2 0 0 1 1 1 3 2 1 0 3 0 3 2 1 0 3 3 3 2 2 1 7 3 1 1 2 3 2 5 1 1
## [112] 2 1 4 3 3 2 0 1 1 1 0 2 2 1 1 1 2 0 2 0 2 0 1 2 1 2 5 1 2 3 0 1 2 0 0 0 0
## [149] 2 0 3 0 1 1 1 0 2 1 1 1 3 0 0 2 1 2 3 0 3 1 2 0 2 0 0 1 2 0 2 3 1 3 1 1 3
## [186] 2 2 1 1 5 1 2 1 4 1 1 1 2 0 3 3 4 0 2 2 3 1 0 0 2 3 1 1 0 0 2 2 0 0 3 1 1
## [223] 1 2 1 1 0 0 3 1 1 4 2 2 2 2 1 0 5 3 5 1 1 3 2 3 0 1 1 3 3 2 2 6 1 2 1 0 3
## [260] 1 2 2 1 0 1 0 5 2 1 0 2 3 1 0 4 0 1 4 1 2 2 1 1 2 0 1 0 3 0 3 0 5 4 1 1 2
## [297] 3 2 2 1 1 0 1 2 2 2 2 0 1 1 1 2 3 3 1 2 0 1 2 2 2 0 4 2 3 0 2 0 3 3 0 1 0
## [334] 2 0 1 1 3 0 0 3 1 1 2 0 2 0 1 4 1 1 0 1 4 3 1 1 4 1 3 0 2 1 5 3 2 2 4 0 1
## [371] 5 1 2 1 0 1 1 4 1 5 0 3 0 0 1 0 1 1 0 0 1 3 1 1 1 1 1 1 1 1 1 0 2 1 2 1 4
## [408] 3 3 0 6 4 1 1 0 0 1 5 3 1 3 0 4 1 2 0 0 1 2 1 0 1 1 0 2 2 1 0 4 4 0 2 2 0
## [445] 2 0 3 1 2 0 0 1 0 2 1 1 4 1 2 1 0 4 0 3 2 0 1 1 0 0 4 1 0 0 1 0 4 1 1 0 0
## [482] 1 0 3 5 4 0 0 2 2 2 1 2 0 0 1 2 2 3 2 3 2 2 1 0 1 2 0 0 0 1 0 2 3 1 4 3 1
## [519] 2 1 2 1 5 1 2 1 1 1 0 1 4 0 1 2 1 1 1 3 4 4 1 0 0 3 4 0 1 0 3 3 0 5 0 2 1
## [556] 1 0 0 2 2 2 4 1 3 2 2 2 2 4 2 1 0 0 1 1 0 1 2 3 0 1 1 1 2 2 1 0 4 0 3 2 0
## [593] 1 1 0 2 1 1 3 1 1 0 0 0 3 0 2 2 2 3 3 0 0 2 3 2 0 5 4 0 1 2 1 1 2 2 1 3 0
## [630] 2 1 1 0 1 3 0 3 4 0 2 4 3 0 0 1 2 0 4 1 2 1 3 1 4 2 3 4 3 1 6 1 1 2 0 2 1
## [667] 1 2 0 2 2 1 2 0 0 3 3 1 0 4 2 2 1 2 2 2 2 1 1 0 1 0 0 2 1 2 0 3 3 0 0 1 2
## [704] 3 0 0 2 1 1 1 3 3 3 0 0 1 1 2 1 2 1 3 2 1 0 0 0 3 2 1 3 2 3 0 1 1 2 2 3 7
## [741] 2 1 2 1 2 2 0 1 1 2 1 0 2 1 2 1 5 1 1 2 0 0 0 0 2 0 0 0 1 2 1 3 3 2 0 1 0
## [778] 3 0 2 2 3 1 1 1 2 5 0 2 4 2 1 3 2 2 3 0 1 0 1 0 1 2 0 0 0 0 2 1 3 0 0 3 0
## [815] 2 3 0 1 1 0 3 0 0 2 2 3 1 0 0 1 2 3 0 2 3 8 1 0 1 0 5 2 3 3 1 1 1 1 1 1 1
## [852] 0 2 0 2 2 0 1 2 2 2 0 2 0 3 2 0 1 0 2 0 1 1 0 2 1 3 0 1 3 1 3 2 2 0 0 3 0
## [889] 1 0 2 1 1 3 0 0 2 0 0 2 1 2 2 0 2 0 4 1 2 0 3 1 2 3 1 0 2 5 0 1 4 0 1 0 1
## [926] 0 1 2 0 1 0 0 1 0 1 1 3 2 4 1 4 1 0 2 0 1 1 1 1 1 1 6 3 0 1 0 2 1 0 5 1 1
## [963] 1 3 1 2 1 1 7 0 0 1 1 6 1 1 1 3 1 2 3 2 1 2 4 6 4 7 0 3 1 0 1 2 0 0 0 1 0
## [1000] 2
t0 <- seq(0.1, 30, .1)
mt <- numeric(length(t0))
for (i in 1:length(t0)) {
mt[i] <- mean(replicate(1000,
{
Sn <- cumsum(rgeom(100, prob = .2))
min(which(Sn > t0[i])) - 1
}))
}
plot(t0, mt, type = "l", xlab = "t", ylab = "mean")
abline(0, .25) n <- 400
incr <- sample(c(-1, 1), size = n, replace = TRUE)
S <- as.integer(c(0, cumsum(incr)))
plot(0:n, S, type = "l", main = "", xlab = "i") set.seed(12345)
#compute the probabilities directly
n <- 1:10000
p2n <- exp(lgamma(2*n-1)
- log(n) - (2*n-1)*log(2) - 2*lgamma(n))
#or compute using dbinom
P2n <- (.5/n) * dbinom(n-1, size = 2*n-2, prob = 0.5)
pP2n <- cumsum(P2n)
#given n compute the time of the last return to 0 in (0,n]
n <- 200
sumT <- 0
while (sumT <= n) {
u <- runif(1)
s <- sum(u > pP2n)
if (s == length(pP2n))
warning("T is truncated")
Tj <- 2 * (1 + s)
#print(c(Tj, sumT))
sumT <- sumT + Tj
}
sumT - Tj## [1] 132