Overview
The project consists of two parts: 1. A simulation exercise, and 2. Basic inferential data analysis.
In Part 1, Simulations will be ran, results including Sample Mean versus Theoretical Mean, Sample Variance versus Theoretical Variance, and Distribution will be shown.
Simulations
# Create empty matrix to store the simulation data
# Each simulation contains 40 exponentials (Column number)
# In total 1000 simulations were ran (Row number)
SimData <- matrix(0,1000,40)
# define rate parameter lambda
lambda <- 0.2
for (i in 1:1000) {
SimData[i,] <- rexp(40, lambda)
}
Sample Mean versus Theoretical Mean
# Calculate Sample Mean
mean_sample <- apply(SimData, 1, mean)
# Theoretical Mean is 1/lambda
mean_theo <- 1/lambda
# plot histogram of sample means on figure
hist(mean_sample, prob=TRUE, breaks = 20, col = "grey", main = c("Distribution of Sample Mean vs. Theoretical Mean"), xlab = c("Sample Mean of 1000 exponential simulations"))
# plot theoretical mean on the same histogram
abline(v = mean_theo, col = "blue", lwd = 5)

# Print Mean values
mean_table<- as.data.frame(c(mean(mean_sample), mean_theo), row.names= c("Sample Mean","Theoretical Mean"))
colnames(mean_table) = c("Mean")
print(mean_table)
## Mean
## Sample Mean 5.036945
## Theoretical Mean 5.000000
## The sample mean of the simulated dataset is very close to the theoretical mean value as shown in the table above
Sample Variance versus Theoretical Variance
# Calculate Sample variance
var_sample <- apply(SimData, 1, var)
# Theoretical standard deviation is 1/lambda
var_theo <- (1/lambda)^2
# plot histogram of sample variance on figure
hist(var_sample, prob=TRUE, breaks = 15, col = "grey", main = c("Distribution of Sample Variance vs. Theoretical Variance"), xlab = c("Sample Variance of 1000 exponential simulations"))
# plot theoretical variance on the same histogram
abline(v = var_theo, col = "blue", lwd = 5)

# Print Mean values
var_table<- as.data.frame(c(mean(var_sample), var_theo), row.names= c("Sample Variance","Theoretical Variance"))
colnames(var_table) = c("Variance")
print(var_table)
## Variance
## Sample Variance 24.98718
## Theoretical Variance 25.00000
## The sample variance of the simulated dataset is very close to the theoretical variance value as shown in the table above
Distribution
hist(scale(mean_sample), prob=TRUE, breaks = 15, xlim = c(-4,4), ylim = c(0,0.5), col = "grey", main = c("Scaled Distribution of Sample Mean vs. Theoretical Mean"), xlab = c("Scaled Sample Mean of 1000 exponential simulations"))
curve(dnorm(x, 0,1),-4,4, col='red',add=TRUE)

# The sample mean is approximated normalized distributed. As seen from the figure above, the shape of the histogram follows the bell curve.