library(ggplot2)
# lambda is 0.2
lambda = 0.2
# we will be using 40 exponentials
n = 40
# we will be running 1000 simulations
nsims = 1:1000
# set a seed to reproduce the data
set.seed(100)
# gather the means
means <- data.frame(x = sapply(nsims, function(x) {mean(rexp(n, lambda))}))
# lets take a looks at the top means
head(means)
## x
## 1 4.137412
## 2 6.051703
## 3 4.415869
## 4 4.404714
## 5 3.210413
## 6 5.475307
ggplot(data = means, aes(x = x)) +
geom_histogram(binwidth=0.1, aes(y=..density..)) +
labs(x="Means") +
labs(y="Density")
mu = 1 / lambda
print(mu)
## [1] 5
simmn <- mean(means$x)
print(simmn)
## [1] 4.999702
expsd <- (1/lambda)/sqrt(n)
print(expsd)
## [1] 0.7905694
expvar <- expsd^2
print(expvar)
## [1] 0.625
sd <- sd(means$x)
print(sd)
## [1] 0.8020251
var <- var(means$x)
print(var)
## [1] 0.6432442
ggplot(data = means, aes(x = x)) +
geom_histogram(binwidth=0.1, aes(y=..density..), fill = I('#8A8A8A'),) +
stat_function(fun = dnorm, arg = list(mean = mu , sd = sd), colour = "red", size=2) +
geom_vline(xintercept = mu, size=1, colour="red") +
geom_density(colour="blue", size=2) +
geom_vline(xintercept = simmn, size=1, colour="blue") +
labs(x="Means") +
labs(y="Density")
###increase the number of simulations from 1000 to 100,000 to see if the sample distribution improves
nsims = 1:100000
set.seed(100)
means <- data.frame(x = sapply(nsims, function(x) {mean(rexp(n, lambda))}))
# recalculate
simmean <- mean(means$x)
simsd <- sd(means$x)
simmu = 1 / lambda
ggplot(data = means, aes(x = x)) +
geom_histogram(binwidth=0.1, aes(y=..density..), fill = I('#8A8A8A'),) +
stat_function(fun = dnorm, arg = list(mean = simmu , sd = simsd), colour = "red", size=2) +
geom_vline(xintercept = simmu, size=1, colour="red") +
geom_density(colour="blue", size=2) +
geom_vline(xintercept = simmean, size=1, colour="blue") +
labs(x="Means") +
labs(y="Density")