In these (part1 and part2) reports I am going to make some statistical analysis that are self-centred about parametric models using some simulations. The main subject that is intended in terms of concentration is “Central Limit Theorem”.

1-Show the sample mean and compare it to the theoretical mean of the distribution. 3-Show that the distribution is approximately normal.

iter=1000
lambda=0.2
n=40
dat=matrix(rexp(n*iter,lambda),nrow=n, ncol=iter)
ave=apply(dat,1,mean)
hist(ave)
curve(dnorm(x, mean=mean(ave), sd=sd(ave)), 
      col="darkblue", lwd=2, add=TRUE, yaxt="n")
abline(v=1/lambda, lwd=5, col="red")
abline(v=mean(ave),lwd=4, col= "blue")
legend("topright", 
  legend = c("True Value", "Theoritical Mean"), 
  col = c("red","blue"), 
  pch = c(17,19), 
  bty = "n", 
  pt.cex = 2, 
  cex = 1.2, 
  text.col = "black", 
  horiz = F , 
  inset = c(0.1, 0.1))

2-Show how variable the sample is (via variance) and compare it to the theoretical variance of the distribution.

theoretical_variance = 1/lambda**2
sample_var = var(c(dat[1:40,]))
matrix(c("sample_var=",sample_var, "theoretical_variance", theoretical_variance),ncol = 2, byrow = T)
##      [,1]                   [,2]              
## [1,] "sample_var="          "25.7288107907283"
## [2,] "theoretical_variance" "25"
 c("theoretical_variance/sample_var=", theoretical_variance/sample_var)
## [1] "theoretical_variance/sample_var=" "0.971673358840551"

3-distribution of a large collection of random exponentials

library(ggplot2)
dataset <- data.frame(X = c(dat[1:40,]))
ggplot(dataset, aes(x = X)) + 
  geom_histogram(aes(y = ..density..), binwidth = 2) + 
  geom_density()