To investigate the exponential distribution in R and compare it with the Central Limit Theorem.
The purpose of these 3 simulation sets is to show the distribution variance as the number of simulations increases.
# Number of simulation sets to produce: the first one for 10 simulations, the second one for
# 100 simulations, and the third one for 1,000 simulations
nSimulationSets <- 3
# List containing simulated sample data
simulationSets <- list()
# Number of exponentials per simulation
nExponentials <- 40
# Initial number of simulations to perform
nSimulations <- 10
# Simulation rate
simulationRate <- 0.2
for (i in 1:nSimulationSets) {
# Set the seed for the current simulation. This action guarantees reproducibility of the
# simulated data
set.seed(i)
# Create a matrix of simulations with row dimensions given by the currrent value of nSimulations
simulationData <- matrix(nrow = nSimulations, ncol = nExponentials)
dimnames(simulationData) <- list(rownames(simulationData, do.NULL = FALSE, prefix = "sim"),
colnames(simulationData, do.NULL = FALSE, prefix = "exp"))
# Perform the number of simulations given by the currrent value of nSimulations
for (j in 1:nSimulations)
simulationData[j,] <- rexp(nExponentials, rate = simulationRate)
# Add the matrix to simulationSets
simulationSets[[i]] <- simulationData
# Increase the number of simulations to perform on the next iteration by a factor of 10
nSimulations <- nSimulations * 10
} # END for
# Explore the distribution of means per simulation set
for (i in 1:nSimulationSets) {
simulationSetMeans <- apply(simulationSets[[i]], 1, mean)
hist( simulationSetMeans, main = paste0("Distribution of Sample Means for ", nrow(simulationSets[[i]]), " Simulations"), xlab = "Mean")
print(paste0(i, ".1. Theoretical Mean: ", (1/simulationRate)))
print(paste0(i, ".2. Sample Mean: ", round(mean(simulationSetMeans), 3)))
print(paste0(i, ".3. Theoretical Variance: ", ((1/simulationRate)/sqrt(nExponentials))^2))
print(paste0(i, ".4. Sample Variance: ", round(var(simulationSetMeans), 3)))
} # END for
## [1] "1.1. Theoretical Mean: 5"
## [1] "1.2. Sample Mean: 4.866"
## [1] "1.3. Theoretical Variance: 0.625"
## [1] "1.4. Sample Variance: 0.491"
## [1] "2.1. Theoretical Mean: 5"
## [1] "2.2. Sample Mean: 5.003"
## [1] "2.3. Theoretical Variance: 0.625"
## [1] "2.4. Sample Variance: 0.646"
## [1] "3.1. Theoretical Mean: 5"
## [1] "3.2. Sample Mean: 4.987"
## [1] "3.3. Theoretical Variance: 0.625"
## [1] "3.4. Sample Variance: 0.632"
sessionInfo()
## R version 3.2.2 (2015-08-14)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] magrittr_1.5 tools_3.2.2 htmltools_0.3 yaml_2.1.13
## [5] stringi_0.5-5 rmarkdown_0.9.2 knitr_1.12.3 stringr_1.0.0
## [9] digest_0.6.8 evaluate_0.8