Homework3.knit

# 1.Draw a picture of the density function

x = seq(0, 8, length.out = 100)
density = rep(1/8, length(x))

plot(x, density, type = "l", col = "blue", lwd = 2, ylim = c(0, 0.2),
     xlab = "Minutes Late (X)", ylab = "Density f(x)", main = "Uniform Density Function on (0, 8)")
abline(h = 1/8, col = "blue", lwd = 2)
abline(v = c(0, 8), lty = 2, col = "red")
text(4, 1/8 + 0.02, "f(x) = 1/8", col = "blue")

# Given parameters
mean = 100
std = 20

# 2a. Compute P(80 < X < 120
z_lower_a = (80 - mean) / std
z_upper_a = (120 - mean) / std
prob_a = pnorm(z_upper_a) - pnorm(z_lower_a)
cat("P(80 < X < 120):", prob_a, "\n")

## P(80 < X < 120): 0.6826895

# 2b.Compute P(60 < X < 140). 
z_lower_b = (60 - mean) / std
z_upper_b = (140 - mean) / std
prob_b = pnorm(z_upper_b) - pnorm(z_lower_b)
cat("P(60 < X < 140):", prob_b, "\n")

## P(60 < X < 140): 0.9544997

# 2c.Compute P(40 < X < 160)

z_lower_c = (40 - mean) / std
z_upper_c = (160 - mean) / std
prob_c = pnorm(z_upper_c) - pnorm(z_lower_c)
cat("P(40 < X < 160):", prob_c, "\n")

## P(40 < X < 160): 0.9973002

# 2d. Verification with standard normal distribution (mean = 0, sigma = 1)

# P(-1 < Z < 1)
prob_standard_a = pnorm(1) - pnorm(-1)
cat("Standard Normal P(-1 < Z < 1):", prob_standard_a, "\n")

## Standard Normal P(-1 < Z < 1): 0.6826895

# P(-2 < Z < 2)
prob_standard_b = pnorm(2) - pnorm(-2)
cat("Standard Normal P(-2 < Z < 2):", prob_standard_b, "\n")

## Standard Normal P(-2 < Z < 2): 0.9544997

# P(-3 < Z < 3)
prob_standard_c = pnorm(3) - pnorm(-3)
cat("Standard Normal P(-3 < Z < 3):", prob_standard_c, "\n")

## Standard Normal P(-3 < Z < 3): 0.9973002

# Set seed for reproducibility
set.seed(123)

# Number of simeanlations
simeanlations = 100000

# Rate parameter
lambda = 1/8

# Function to simeanlate one system
simeanlate_system = function() {
  # Generate failure times for 3 components
  component_failures = rexp(3, rate = lambda)
  
  # System fails when last component fails (max of 3 failures)
  system_failure = max(component_failures)
  
  # Return TRUE if system fails before 15 years
  return(system_failure < 15)
}

# Run simeanlation many times
system_failures = replicate(simeanlations, simeanlate_system())

# Calculate probability
probability_failure = mean(system_failures)

# Print results
cat("Probability of system failure before 15 years:", round(probability_failure, 4), "\n")

## Probability of system failure before 15 years: 0.6078

# Calculate 95% confidence interval
margin_error = qnorm(0.975) * sqrt(probability_failure * (1-probability_failure) / simeanlations)
ci_lower = probability_failure - margin_error
ci_upper = probability_failure + margin_error

cat("95% Confidence Interval: (", round(ci_lower, 4), ",", round(ci_upper, 4), ")\n")

## 95% Confidence Interval: ( 0.6048 , 0.6109 )

# We can also verify this theoretically
# P(system fails before 15) = P(all components fail before 15)
# = [P(one component fails before 15)]^3
# = [1 - exp(-15/8)]^3
theoretical_prob = (1 - exp(-15/8))^3
cat("Theoretical probability:", round(theoretical_prob, 4))

## Theoretical probability: 0.6069

set.seed(123)

# Parameters
num_simeanlations = 5000
N = 100

# 4a. Generate 5000 simeanlations with each containing 100 outcomes from U(1, 8)
simeanlations = replicate(num_simeanlations, mean(runif(N, min = 1, max = 8)))

# Display mean values for the 5,000 experiments
sample_means_100 = simeanlations
cat("Mean of sample means (N=100):", mean(sample_means_100), "\n")

## Mean of sample means (N=100): 4.499529

# 4b. Calculate mean and variance for the 5,000 sample means
mean_sample_means_100 = mean(sample_means_100)
variance_sample_means_100 = var(sample_means_100)

cat("Mean of sample means (N=100):", mean_sample_means_100, "\n")

## Mean of sample means (N=100): 4.499529

cat("Variance of sample means (N=100):", variance_sample_means_100, "\n")

## Variance of sample means (N=100): 0.03997657

# 4c. Function to simeanlate sample means for a given N and num_simeanlations
simeanlate_means = function(N, num_simeanlations = 2000) {
  replicate(num_simeanlations, mean(runif(N, min = 1, max = 8)))
}

# Different values of N
N_values = c(100, 1000, 10000, 100000, 1000000)
results = data.frame(N = N_values, Mean = NA, Variance = NA)

for (i in 1:length(N_values)) {
  N = N_values[i]
  sample_means = simeanlate_means(N, num_simeanlations = 2000)
  results$Mean[i] = mean(sample_means)
  results$Variance[i] = var(sample_means)
}

# Display results
print(results)

##       N     Mean     Variance
## 1 1e+02 4.493608 4.106401e-02
## 2 1e+03 4.500047 4.139404e-03
## 3 1e+04 4.499456 4.079720e-04
## 4 1e+05 4.500056 3.980236e-05
## 5 1e+06 4.499961 4.037235e-06

# 4d. Log transform variance and N values
log_variance = log(results$Variance)
log_N = log(results$N)

# Plot log(Variance) vs. log(N)
plot(log_N, log_variance, main = "log(Variance) vs. log(N)", xlab = "log(N)", ylab = "log(Variance)", type = "b", pch = 19)

# Fit linear model to get slope
slope = coef(lm(log_variance ~ log_N))[2]
cat("Slope of the line:", slope, "\n")

## Slope of the line: -1.003178

# 4e. Generate sample means for N = 10,000
sample_means_10000 = simeanlate_means(10000, num_simeanlations = 2000)

# QQ-Plot
qqnorm(sample_means_10000)
qqline(sample_means_10000, col = "blue")

# Load required library
library(MASS)  # for mvrnorm function

# Set seed for reproducibility
set.seed(123)

# Define parameters
mean = c(-1, 2)  # mean vector
rho = -0.75    # correlation
variance_x = 1      # variance of X
variance_y = 2      # variance of Y

# Calculate covariance matrix
cov_xy = rho * sqrt(variance_x * variance_y)  # covariance formeanla: ρ * σx * σy
sigma = matrix(c(variance_x, cov_xy, 
                 cov_xy, variance_y), 
               nrow=2)

# Part a: Simeanlate 1,000 points and plot
n_small = 1000
sim_small = mvrnorm(n_small, mean, sigma)

# Create plot
plot(sim_small[,1], sim_small[,2], 
     main="Simulated Bivariate Normal Distribution",
     xlab="X", ylab="Y",
     col="blue", pch=20, alpha=0.5)

## Warning in plot.window(...): "alpha" is not a graphical parameter

## Warning in plot.xy(xy, type, ...): "alpha" is not a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "alpha" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "alpha" is not a
## graphical parameter

## Warning in box(...): "alpha" is not a graphical parameter

## Warning in title(...): "alpha" is not a graphical parameter

# Add center point
points(mean[1], mean[2], col="red", pch=19, cex=2)

# Part b: Simulate 1,000,000 points for estimates
n_large = 1000000
sim_large = mvrnorm(n_large, mean, sigma)

# i. & ii. Estimate E(X) and E(Y)
ex = mean(sim_large[,1])
ey = mean(sim_large[,2])

# iii. Estimate Var(X) and Var(Y)
variance_x_est = var(sim_large[,1])
variance_y_est = var(sim_large[,2])

# iv. Estimate E(X+Y) and Var(X+Y)
sum_xy = sim_large[,1] + sim_large[,2]
e_sum_xy = mean(sum_xy)
var_sum_xy = var(sum_xy)

# Theoretical calculations
e_sum_theory = mean[1] + mean[2]
var_sum_theory = variance_x + variance_y + 2*cov_xy

# Print results
cat("\nEstimated values:\n")

## 
## Estimated values:

cat("E(X):", round(ex, 4), "\n")

## E(X): -0.9988

cat("E(Y):", round(ey, 4), "\n")

## E(Y): 1.9997

cat("Var(X):", round(variance_x_est, 4), "\n")

## Var(X): 0.9991

cat("Var(Y):", round(variance_y_est, 4), "\n")

## Var(Y): 2.0002

cat("E(X+Y):", round(e_sum_xy, 4), "\n")

## E(X+Y): 1.0009

cat("Var(X+Y):", round(var_sum_xy, 4), "\n")

## Var(X+Y): 0.8789

cat("\nTheoretical values:\n")

## 
## Theoretical values:

cat("E(X):", mean[1], "\n")

## E(X): -1

cat("E(Y):", mean[2], "\n")

## E(Y): 2

cat("Var(X):", variance_x, "\n")

## Var(X): 1

cat("Var(Y):", variance_y, "\n")

## Var(Y): 2

cat("E(X+Y):", e_sum_theory, "\n")

## E(X+Y): 1

cat("Var(X+Y):", var_sum_theory, "\n")

## Var(X+Y): 0.8786797