Simulated datasets

library(ggplot2)

sim=1000
n=40
lambda=0.2
set.seed(2)
simdata=matrix(rexp(sim*n, rate=lambda), sim)
s_mean=apply(simdata,1,mean)

hist(s_mean,xlab="Mean of simulated data", col="green")

Q1)Where is the distribution centred? How does it compare to the theoretical distribution?

#theoretical centre of distribution
t_mean=1/lambda
t_mean
## [1] 5
#Actual centre of distribution
round(mean(s_mean),3)
## [1] 5.016

Q2) Compare variances

#Theoretical variance and SD
t_var=(1/lambda)^2/n
t_sd=(1/lambda)/sqrt(n)

t_var
## [1] 0.625
t_sd
## [1] 0.7905694
#actual variance and SD
round(var(s_mean),3)
## [1] 0.578
round(sd(s_mean),3)
## [1] 0.76

Q3) Are the distributions approximately normal?

plot=data.frame(s_mean)

x=ggplot(plot,aes(x=s_mean))
x=x+geom_histogram(aes(y=..density..), colour="black", fill = "pink")
 x+ geom_density(colour="green", size=1)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

Q4) Confidence interval

#theoretical CI

t_ci=t_mean+c(-1,1)*1.96*t_sd/n
t_ci
## [1] 4.961262 5.038738
#actual CI
a_ci=mean(s_mean)+c(-1,1)*1.96*sd(s_mean)/sqrt(n)
a_ci
## [1] 4.780827 5.251885