synopsis

The exponential distribution in R has been investigated and compared with the Central Limit Theorem. Using rexp(n, lambda) in R the exponential distribution can be simulated where lambda is the rate parameter. The mean of exponential distribution is 1/lambda and the standard deviation is also 1/lambda. Set lambda = 0.2 for all of the simulations. You will investigate the distribution of averages of 10000 exponentials. A thousand of simulation has been performed.

Based on the findings, the sample mean compared to the theoretical mean of the distribution as well as variance and normality of distribution.

library(ggplot2)
n <- 1:10000
lambda <- 0.2
y <- rexp(n, lambda)
AA <- data.frame(variable = y)

x1 <- runif(1000)
AA1 <- data.frame(variable = x1)
ggplot(AA1, aes(x=variable)) +
geom_histogram(fill = "red", alpha = 0.4)+theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mns = NULL
for (i in 1 : 1000) mns = c(mns, mean(runif(40)))
AA1 <- data.frame(variable = mns)
ggplot(AA1, aes(x=variable)) +
geom_histogram(fill = "yellow", alpha = 0.4)+theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mns = NULL
for (i in 1 : 1000) mns = c(mns, mean(rexp(40, 0.2)))
AA1 <- data.frame(variable = mns)
ggplot(AA1, aes(x=variable)) +
geom_histogram(fill = "orange", alpha = 0.2)+theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The cumulative mean convergence (Normal Distribution)

n <- 10000;
means <- cumsum(rexp(40, 0.2)) / (1  : n);
g <- ggplot(data.frame(x = 1 : n, y = means), aes(x = x, y = y)) 
g <- g + geom_hline(yintercept = 0) + geom_line(size = 2) 
g <- g + labs(x = "Number of obs", y = "Cumulative mean")+theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
g

Calculating the theoretical mean and the sample mean.

mean <- 1/lambda
sd <- 1/(lambda^2)
mean

## [1] 5

Comparison of the theoretical and sample mean and variance.

lambda = .2
sim = NULL
n = 40
for (i in 1 : 1000) sim = c(sim, mean(rexp(n, rate=lambda)))
expected.sd <- 1/(lambda*sqrt(40))
simulation.sd <- sd(sim)
Values <- data.frame(Expected.StandardDeviation=expected.sd, Simulation.Standard.deviation=simulation.sd)
Values

##   Expected.StandardDeviation Simulation.Standard.deviation
## 1                  0.7905694                     0.8024843

Statistical Inference Project-Part 1

Reza Rahimi

April 8, 2017

synopsis