library(ggplot2)
set.seed(2000)
n = 40
lambda = .2
nosim = 1000
binwidth <- 0.5
mean_list <- numeric()
## Finding sample mean
for(i in 1:nosim){
exp <- rexp(n, lambda)
mean_list <- c(mean_list, mean(exp))
}
theoretical_mean_averages = 1/lambda
theoretical_sd_averages = 1/(lambda*sqrt(n))
g <- qplot(mean_list) + geom_histogram(bins=10,binwidth = binwidth) + geom_vline(xintercept = theoretical_mean_averages, show.legend = T)
g <- g + xlab("Mean of averages of 40 random exponentials") + ylab("Frequency")
g <- g + stat_function(
fun = function(x, mean, sd, n, bw){
dnorm(x = x, mean = mean, sd = sd) * n * bw
},
args = c(mean = theoretical_mean_averages, sd = theoretical_sd_averages, n = nosim, bw = binwidth))
g
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
The standard deviation of the exponential distribution is 1/lambda = 5 The standard deviation of the distribution of the mean of 40 random exponentials should be the standard error and that is the S/sqrt(40) = 5/sqrt(40)
theoretical_variance_averages = (1/(lambda*sqrt(40)))^2
theoretical_variance_averages
## [1] 0.625
var(mean_list)
## [1] 0.6168082
library(ggplot2)
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
exp2 <- rexp(nosim, lambda)
q <- qplot(exp2) + geom_histogram(binwidth = 0.5, bins=30)
q <- q + xlab("Random exponentials") + ylab("Frequency")
plot_grid(q, g)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.