rm(list = ls()) # Clear environment gc() # Clear unused memory cat(“) # Clear the console

#1 ##The Central Limit Theorem (CLT) states that the distribution of the sample means approaches a normal distribution as the sample size increases, regardless of the underlying population distribution.

#part2

# set the seed for reproducibility
set.seed(123)

# generate data from a normal distribution with mean 10 and sd 2
data <- rnorm(1000, mean = 10, sd = 2)

# define a function to generate sample means for a given sample size and number of replicates
generate_sample_means <- function(data, sample_size, n_replicates) {
  replicate(n_replicates, mean(sample(data, sample_size)))
}

# generate sample means for sample sizes 2, 5, 10, 20, 50, and 100
sample_sizes <- c(2, 5, 10, 20, 50, 100)
sample_means <- sapply(sample_sizes, generate_sample_means, data = data, n_replicates = 1000)

# plot the distribution of the sample means for each sample size
library(ggplot2)
library(reshape2)

ggplot(melt(data.frame(sample_means)), aes(value)) +
  geom_histogram(aes(y = ..density..), binwidth = 0.5, color = "black", fill = "white") +
  stat_function(fun = dnorm, args = list(mean = 10, sd = 2/sqrt(sample_sizes)), color = "red", size = .5) +
  facet_wrap(~variable, nrow = 1) +
  labs(x = "Sample Mean", y = "Density", title = "Central Limit Theorem for Normal Distribution")
## No id variables; using all as measure variables
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.

#The resulting plot shows that, as the sample size increases, the distribution of the sample means approaches a normal distribution, consistent with the Central Limit Theorem. This demonstrates that the CLT holds for the exponential distribution, just as it does for other distributions.

#part 3

# generate Poisson data with lambda = 5
set.seed(123)
data <- rpois(1000, lambda = 5)

# generate sample medians for sample sizes 2, 10, 20, 50, 100
sample_medians <- sapply(c(2, 10, 20, 50, 100), function(n) {
  replicate(1000, median(sample(data, n)))
})


library(ggplot2)
library(reshape2)

ggplot(melt(data.frame(sample_medians)), aes(value)) +
  geom_histogram(aes(y = ..density..), binwidth = 0.5, color = "black", fill = "white") +
  stat_function(fun = dnorm, args = list(mean = median(data), sd = sd(data)/sqrt(pi/2)), color = "red", size = 1) +
  facet_wrap(~variable, nrow = 1) +
  labs(x = "Sample Median", y = "Density", title = "Central Limit Theorem for Poisson Distribution")
## No id variables; using all as measure variables