library(knitr)

set.seed(123) #For reproducibility
n=100000 #number of realizations 
x1 = rnorm(n, mean = 2, sd = 1)
x2 = rnorm(n, mean = 3, sd= 2)
x3 = rnorm(n, mean = 4, sd = 3)

z= x1+x2+x3

hist(z, breaks = 50, probability = TRUE, col = "lightblue",
     main = "Relative Frequency of the PMF of Z", xlab = "z", ylim = c(0,0.12))


z_vals = seq(min(z), max(z), length.out = 1000) #PDF of Z
pdf_z = dnorm(z_vals, mean = mean(z), sd = sd(z))

lines(z_vals, pdf_z, col = "red", lwd =2)

n_sim = 100000 #amount of simulations
n = 9
rate = 0.1
xbar = array() #array to store xbar vectors
set.seed(123) #reproducibility
for (i in 1:n_sim){
  sample = rexp(n,rate)
  xbar[i] <- mean(sample)
}
#
hist(xbar, breaks = 50, col = "lightblue", probability = TRUE,
     main = "Relative Frequency Histogram of Simulated Sample Means",
     xlab = "Sample Mean", ylab = "Relative Frequency", xlim = (c(0,35)))

xbar100 = replicate(n_sim, mean(rexp(100,rate))) #simulates 100 means of exponential

hist(xbar100, breaks = 50, col = "lightblue", probability = TRUE,
     main = "Relative Frequency Histogram of Simulated Sample Means",
     xlab = "Sample Mean", ylab = "Relative Frequency", xlim = (c(5,15)))

mean(xbar)

## [1] 10.00084

sd(xbar)

## [1] 3.330647

mean(xbar100)

## [1] 9.996591

sd(xbar100)

## [1] 0.9976306

x_vals = seq(0, 100, length.out = 1000)
pdf_vals = dexp(x_vals, rate = 0.1)
lines(x_vals, pdf_vals, col = "red", lwd = 2)

# Load necessary library
library(ggplot2)

mu = 50
sigma = 10
sigma2 = sigma^2
n_sim = 10000
n_vals = 2:50

mse_mle <- numeric(length(n_vals))
mse_unbiased <- numeric(length(n_vals))

# Loop over different sample sizes
for (i in 2:n_vals) {
  n = n_vals[i]

  # Generate 10,000 sample variances for both estimators
  sample_vars <- replicate(n_sim, {
    x <- rnorm(n, mean = mu, sd = sigma)
    var_mle <- mean((x - mean(x))^2)   # Estimator 1 (MLE)
    var_unbiased <- sum((x - mean(x))^2) / (n - 1)  # Estimator 2 (Unbiased)

    c(var_mle, var_unbiased)
  })

  # Compute MSE for MLE (Estimator 1)
  mse_mle[i] <- mean((sample_vars[1, ] - sigma2)^2)

  # Compute MSE for Unbiased Estimator (Estimator 2)
  mse_unbiased[i] <- mean((sample_vars[2, ] - sigma2)^2)
}

## Warning in 2:n_vals: numerical expression has 49 elements: only the first used

# Plot the MSE comparison using base R
plot(n_vals, mse_mle, type = "l", col = "blue", lwd = 2,
     ylim = range(c(mse_mle, mse_unbiased)),
     xlab = "Sample Size (n)", ylab = "MSE",
     main = "Empirical MSE Comparison of Two Variance Estimators")
points(n_vals, mse_unbiased, type = "b", col = "red", pch = 17, lty = 2)
legend("topright", legend = c("MLE (Estimator 1)", "Unbiased (Estimator 2)"),
       col = c("red", "blue"), lwd = 2)