Comparing Exponential Distribution to the Central Limit Theorem

By: Mary T.
Overview:
The purpose of this project is to illustrate via 1,000 simulations the properties of the distribution of the mean of 40 exponentials. The project will:
1. Show the sample mean and compare it to the theoretical mean of the distribution.
2. Show how variable the sample is (via variance) and compare it to the theoretical variance of the distribution..
3. Show that the distribution is approximately normal.

The exponential distribution

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
expdistrib = round(rexp(1000, 0.2))
expdistrib = as.data.frame(expdistrib)

# Plot the exponential distribution
ggplot(expdistrib, aes(x = expdistrib)) + 
  ggtitle ("Exponential Distribution") +
  geom_histogram(fill = "navyblue",alpha = .20, binwidth=0.5, color = "white", aes(y = ..density..)) +
  xlab ("Exponential distribution") + 
  ylab ("Density")

## Illustrate 1,000 simulations of the distribution of 40 exponentials & lambda of 0.2
s= 1000   
n = 40    
lambda = 0.2

mns = NULL
for (i in 1 : s) mns = c(mns, mean(rexp(n,0.2)))
mns = as.data.frame(mns)

ggplot(mns, aes(x = mns)) + 
  ggtitle ("Means of Exponential Distribution Samples") +
  geom_histogram(fill = "red",alpha = .20, binwidth=0.3, color = "white", aes(y = ..density..)) + 
  geom_density(colour="black", size=0.5, lty=2) +
  xlab("Means of Exponential Samples") + 
  ylab("Density")

COMPARE SIMULATED SAMPLE MEAN TO THEORETICAL MEAN OF THE DISTRIBUTION

mns = NULL
for (i in 1 : s) mns = c(mns, mean(rexp(n,0.2)))
lambda = 0.2


hist(mns,  freq=TRUE, breaks=20,
     main="Comparing Simulated Vs.Theoretical Means",
     xlab="Means of Exponential Samples",
     ylab="Density",
     col='khaki3')
     abline(v=1/lambda,col='red',lwd=2)
     abline(v=mean(mns),col='black',lwd=2)
     legend('topright', c("Simulated Mean", "Theoretical Mean"), 
     col=c("black", "red"), lty=c(1,1))

#Theoretical Mean
1/lambda
## [1] 5
#Sample Mean
mean(mns)
## [1] 5.007617
# SAMPLE VARIANCE VS. THEORETICAL VARIANCE
#Thorectical Variance
((1/lambda)^2)/n
## [1] 0.625
#Sample Mean
var(mns)
## [1] 0.6465143

NORMAL DISTRIBUTION VS. SIMULATION RESULTS

hist(mns,  prob=T,breaks=50,
     main="Normal Distribution and Simulation Results",
     xlab="Means of Exponential Samples",
     ylab="Density",
     col='plum2')
     # Simulated mean
     lines(density(mns),col="black", lty=2, lwd =2)
     # Theoretical Mean 
     abline(v=mean(mns), col='red', lwd=2)
     # Theoretical density of the exponential distribution
     xfit <- seq(min(mns), max(mns), length=1000)
     yfit <- dnorm(xfit, mean=1/lambda, sd=(1/lambda/sqrt(n)))
     lines(xfit, yfit, pch=22, col="red", lty=2, lwd =2)
     # Legend
     legend('topright', c("Simulation", "Theoretical"), 
     col=c("black", "red"), lty=c(1,1))

Conclusion:
The simulations illiustate the property of the Central Limit Theorem, that is, the sampling distribution of sample means will result in a normal or bell-shaped curve, regardless of the initial distribution of the data sample. The means and variances of the simulation will also be similar to those of the data sample.