install.packages("rmarkdown")
## Error in contrib.url(repos, "source"): trying to use CRAN without setting a mirror
install.packages("knitr")
## Error in contrib.url(repos, "source"): trying to use CRAN without setting a mirror
library(rmarkdown) library(knitr)
#1. Calculate the mean and variance of the population. population = c(0, 1, 4, 5) mean_pop = mean(population) var_pop = var(population) mean_pop
## [1] 2.5
var_pop
## [1] 5.666667
#2. Sampling without replacement: samples_wo = combn(population, 2) sample_means_wo = colMeans(samples_wo) expected_value_wo = mean(sample_means_wo) variance_wo = var(sample_means_wo) expected_value_wo
## [1] 2.5
variance_wo
## [1] 1.7
#3. Sampling with replacement: samples_w = expand.grid(population, population) sample_means_w = rowMeans(samples_w) expected_value_w = mean(sample_means_w) variance_w = var(sample_means_w) expected_value_w
## [1] 2.5
variance_w
## [1] 2.266667
### Simulate 100 Observations: set.seed(123) n = 100 min_val = 0 max_val = 75 sample = runif(n, min_val, max_val) # Estimators T1 = 2 * mean(sample) T2 = max(sample) # Sequence of estimators T1_seq = 2 * cumsum(sample) / (1:n) T2_seq = cummax(sample) # Plot plot(1:n, T1_seq, type = 'l', col = 'blue', ylim = range(c(T1_seq, T2_seq, max_val)), ylab = "Estimator Values", xlab = "Sample Size") lines(1:n, T2_seq, col = 'red') abline(h = max_val, col = 'green', lty = 2) legend("bottomright", legend = c("T1", "T2", "True Value"), col = c("blue", "red", "green"), lty = 1)
### Simulation of 10,000 Samples: simulations = 10000 T1_vals = numeric(simulations) T2_vals = numeric(simulations) for (i in 1:simulations) { sample = runif(n, min_val, max_val) T1_vals[i] = 2 * mean(sample) T2_vals[i] = max(sample) } # Boxplot boxplot(T1_vals, T2_vals, names = c("T1", "T2"), main = "Boxplot of Estimators", col = c("blue", "red"))
### Simulate 15 Observations from a Standard Normal Distribution: set.seed(123) n = 15 sample = rnorm(n) alpha = 0.05 # Testing H0: mu = 0 t_test_mu_0 = t.test(sample, mu = 0) t_test_mu_0
## ## One Sample t-test ## ## data: sample ## t = 0.69816, df = 14, p-value = 0.4965 ## alternative hypothesis: true mean is not equal to 0 ## 95 percent confidence interval: ## -0.3157490 0.6205177 ## sample estimates: ## mean of x ## 0.1523843
# p-value p_value_mu_0 = t_test_mu_0$p.value p_value_mu_0
## [1] 0.4965107
# Repeat 10,000 times for H0: mu = 0 simulations = 10000 p_values_mu_0 = numeric(simulations) for (i in 1:simulations) { sample = rnorm(n) p_values_mu_0[i] = t.test(sample, mu = 0)$p.value } # Simulated alpha level alpha_simulated = mean(p_values_mu_0 < alpha) # Histogram hist(p_values_mu_0, main = "Histogram of p-values (H0: mu = 0)", col = "blue")
alpha_simulated
## [1] 0.0473
### Testing H0: mu = 1 t_test_mu_1 = t.test(sample, mu = 1) t_test_mu_1
## ## One Sample t-test ## ## data: sample ## t = -2.3729, df = 14, p-value = 0.03251 ## alternative hypothesis: true mean is not equal to 1 ## 95 percent confidence interval: ## -0.3973286 0.9294368 ## sample estimates: ## mean of x ## 0.2660541
# p-value p_value_mu_1 = t_test_mu_1$p.value p_value_mu_1
## [1] 0.03250973
# Repeat 10,000 times for H0: mu = 1 p_values_mu_1 = numeric(simulations) for (i in 1:simulations) { sample = rnorm(n) p_values_mu_1[i] = t.test(sample, mu = 1)$p.value } # Simulated power of the test power_simulated = mean(p_values_mu_1 < alpha) # Histogram hist(p_values_mu_1, main = "Histogram of p-values (H0: mu = 1)", col = "red")
power_simulated
## [1] 0.9493
### Simulate 15 Observations from a Log-Normal Distribution: set.seed(123) n = 15 mu = 1 sigma = 1.5 sample = rlnorm(n, meanlog = mu, sdlog = sigma) # 95% Confidence Interval alpha = 0.05 sample_mean = mean(sample) sample_sd = sd(sample) se = sample_sd / sqrt(n) ci = sample_mean + c(-1, 1) * qt(1 - alpha/2, df = n - 1) * se # Display results sample_mean
## [1] 7.49678
ci
## [1] 1.531404 13.462157
### Repeat 10,000 Times: simulations = 10000 contains_true_value = numeric(simulations) below_true_value = numeric(simulations) above_true_value = numeric(simulations) for (i in 1:simulations) { sample = rlnorm(n, meanlog = mu, sdlog = sigma) sample_mean = mean(sample) sample_sd = sd(sample) se = sample_sd / sqrt(n) ci = sample_mean + c(-1, 1) * qt(1 - alpha/2, df = n - 1) * se true_value = exp(mu + (sigma^2) / 2) contains_true_value[i] = true_value >= ci[1] && true_value <= ci[2] below_true_value[i] = true_value < ci[1] above_true_value[i] = true_value > ci[2] } # Results mean(contains_true_value)
## [1] 0.7471
mean(below_true_value)
## [1] 2e-04
mean(above_true_value)
## [1] 0.2527