Question : Generate two exponential random variables that respectively follows exp(1) and exp(2). Generate random numbers for each case; 1) independent random variable X, Y, 2) corr(X,Y) =0.7

1. Generating independent exponential random variables with inverse transformation method

set.seed(2013122032)
x<-c()
y<-c()
for(i in 1:100){
u1<-runif(1)
u2<-runif(1)
x[i]<--log(1-u1)
y[i]<--(1/3)*log(1-u2)
}
indep.set<-rbind(x,y)
indep.set[,1:5]
##        [,1]       [,2]      [,3]      [,4]      [,5]
## x 0.7572952 0.35661441 0.0977687 2.1055625 0.1451131
## y 0.2458642 0.04823983 1.0869986 0.1608287 0.2561813

2. Generating dependent exponential random variables by two methods

1) (Attempted) Using general copula and H function
set.seed(2013122032)
unif<-matrix(runif(200),,2)
unif<-as.data.frame(unif)
colnames(unif)<-c("u1", "u2")
F.inverse<--log(1-unif$u1)
G.inverse<--(1/3)*log(1-unif$u2)
H<-pexp(F.inverse, 1)*pexp(G.inverse,3)
H[1:10]
##  [1] 0.08270042 0.51313078 0.05028662 0.11589813 0.07195936 0.75179174
##  [7] 0.68751827 0.18375061 0.05058912 0.32974008
  • However, the only way to generate dependent random variables is by using Gaussian Copula. There is no other method to take correlation into account.
2) Generating dependent exponential variables by using Gaussian Copula and standard normal random variables.
library(Matrix)
x<-c()
y<-c()
rho<-0.7
mu1<-0
mu2<-0
sigma1<-1
sigma2<-1
X1<-c()
X2<-c()
X<-matrix(0,2,1)
set.seed(2013122032)
for(i in 1:100){
  z1<-rnorm(1)
  z2<-rnorm(1)
  mu<-matrix(c(mu1, mu2),2,)
  Z<-matrix(c(z1,z2),2,1)
  C<-matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2*sigma2^2),2,2)
  A<-t(chol(C))
  X<-A%*%Z + mu
  X1[i]<-X[1]
  X2[i]<-X[2]
  expo1<-pnorm(X1[i])
  expo2<-pnorm(X2[i])
  x[i]<--log(1-expo1)
  y[i]<--(1/3)*log(1-expo2)
}
depend.set<-rbind(x,y)
depend.set[,1:5]
##        [,1]       [,2]       [,3]      [,4]       [,5]
## x 0.7572952 0.09776871 0.14511311 0.6215402 0.17778300
## y 0.1563894 0.20737970 0.02439581 0.1997487 0.05578053
  • Gaussian Copula method uses standard normal random variables as intermediate random variables to generate set of variables from desired distribution. Here we generated Z1, Z2 to be transformed and usedcdf to draw values between 0 and 1.

Q2. Comparing with histogram

par(mfrow=c(2,2))
hist(indep.set[1,], main="X1 from independent set",xlab="X1")
hist(indep.set[2,], main="X2 from independent set",xlab="X2")
hist(depend.set[1,], main="X1 from dependent set", xlab="X1")
hist(depend.set[2,], main="X2 from dependent set", xlab="X2")

Q3. Comparing with scatter plot

par(mfrow=c(2,2))
plot(indep.set[1,], main="X1 from independent set", ylab="X1")
plot(indep.set[2,], main="X2 from independent set", ylab="X2")
plot(depend.set[1,], main="X1 from dependent set", ylab="X1")
plot(depend.set[2,], main="X2 from dependent set", ylab="X2")

Q4. Verify if correlation matches the theoretical value.

cor(indep.set[1,],indep.set[2,])
## [1] -0.08317728
cor(depend.set[1,],depend.set[2,])
## [1] 0.5631688
cor(X1, X2)
## [1] 0.649058

Appendix

  1. Tried increasing rho(correlation between initial standard normal variables)
x<-c()
y<-c()
rho<-0.8
mu1<-0
mu2<-0
sigma1<-1
sigma2<-1

X<-matrix(0,2,1)
set.seed(2013122032)
for(i in 1:100){
  z1<-rnorm(1)
  z2<-rnorm(1)
  mu<-matrix(c(mu1, mu2),2,1)
  Z<-matrix(c(z1,z2),2)
  C<-matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2*sigma2^2),2,2)
  A<-t(chol(C))
  X<-A%*%Z + mu
  X1<-X[1]
  X2<-X[2]
  expo1<-pnorm(X1)
  expo2<-pnorm(X2)
  x[i]<--log(1-expo1)
  y[i]<--(1/3)*log(1-expo2)
}
depend.set<-rbind(x,y)
depend.set[,1:5]
##        [,1]       [,2]       [,3]      [,4]       [,5]
## x 0.7572952 0.09776871 0.14511311 0.6215402 0.17778300
## y 0.1704889 0.14892284 0.02447113 0.1997560 0.05166304
cor(depend.set[1,], depend.set[2,])
## [1] 0.7154539
  1. I tried increasing sample size to see if such effort could increase correlation between X1 and X2
x<-c()
y<-c()
rho<-0.7
mu1<-0
mu2<-0
sigma1<-1
sigma2<-1

X<-matrix(0,2,1)
set.seed(2013122032)
for(i in 1:9999){
  z1<-rnorm(1)
  z2<-rnorm(1)
  mu<-matrix(c(mu1, mu2),2,1)
  Z<-matrix(c(z1,z2),2,1)
  C<-matrix(c(sigma1^2, rho*sigma1*sigma2, rho*sigma1*sigma2*sigma2^2),2,2)
  A<-t(chol(C))
  X<-A%*%Z + mu
  X1<-X[1]
  X2<-X[2]
  expo1<-pnorm(X1)
  expo2<-pnorm(X2)
  x[i]<--log(1-expo1)
  y[i]<--(1/3)*log(1-expo2)
}
depend.set<-rbind(x,y)
depend.set[,1:5]
##        [,1]       [,2]       [,3]      [,4]       [,5]
## x 0.7572952 0.09776871 0.14511311 0.6215402 0.17778300
## y 0.1563894 0.20737970 0.02439581 0.1997487 0.05578053
cor(depend.set[1,], depend.set[2,])
## [1] 0.6579665