#A1

# Set parameters
N <- 1000
p <- 0.5
iterations <- 5000

# Initialize vector to store results
odds_ratios <- numeric(iterations)

# Simulate data and calculate odds ratios
for (i in 1:iterations) {
  # Generate sample of Bernoulli random variables
  sample_data <- rbinom(N, 1, p)
  
  # Calculate sample average
  sample_average <- mean(sample_data)
  
  # Calculate odds ratio
  odds_ratio <- sample_average / (1 - sample_average)
  
  # Store result
  odds_ratios[i] <- odds_ratio
}

# Plot histogram of simulated odds ratios
hist(odds_ratios, freq = FALSE, breaks = 30, main = "Histogram of Simulated Odds Ratios",
     xlab = "Odds Ratio", ylab = "Density")

# Calculate mean and variance of odds ratios
mean_odds_ratio <- mean(odds_ratios)
var_odds_ratio <- var(odds_ratios)

# Calculate standard error using Delta method
se_delta_method <- sqrt((1/(1-p)^3)*(p/N))

# Plot normal distribution with mean and standard error
curve(dnorm(x, mean = mean_odds_ratio, sd = se_delta_method), 
      col = "blue", lwd = 2, add = TRUE)

#A2

# Set parameters
N_values <- c(10, 30, 50, 100, 500)
p <- 0.5
iterations <- 5000

# Function to calculate standard error using Delta method
calculate_se_delta_method <- function(p, N) {
  return(sqrt((1/(1-p)^3)*(p/N)))
}

# Function to simulate data and plot histogram for given N
simulate_and_plot <- function(N) {
  # Initialize vector to store results
  odds_ratios <- numeric(iterations)
  
  # Simulate data and calculate odds ratios
  for (i in 1:iterations) {
    # Generate sample of Bernoulli random variables
    sample_data <- rbinom(N, 1, p)
    
    # Calculate sample average
    sample_average <- mean(sample_data)
    
    # Calculate odds ratio
    odds_ratio <- sample_average / (1 - sample_average)
    
    # Store result
    odds_ratios[i] <- odds_ratio
  }
  
  # Plot histogram of simulated odds ratios
  hist(odds_ratios, freq = FALSE, breaks = 30, 
       main = paste("Histogram of Simulated Odds Ratios (N =", N, ")"),
       xlab = "Odds Ratio", ylab = "Density")
  
  # Calculate mean and variance of odds ratios
  mean_odds_ratio <- mean(odds_ratios)
  var_odds_ratio <- var(odds_ratios)
  
  # Calculate standard error using Delta method
  se_delta_method <- calculate_se_delta_method(p, N)
  
  # Plot normal distribution with mean and standard error
  curve(dnorm(x, mean = mean_odds_ratio, sd = se_delta_method), 
        col = "blue", lwd = 2, add = TRUE)
  
  # Add legend
  legend("topright", legend = "Normal Curve", col = "blue", lwd = 2)
}

# Loop over different values of N
for (N in N_values) {
  simulate_and_plot(N)
}

#2(A)

# Define the sample sizes and true parameters
sample_sizes <- c(10, 30, 100, 1000)
true_ps <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)
# Initialize an empty dataframe for results
results_df <- data.frame(N = integer(),
                         P = numeric(),
                         N_P_set = character(),
                         Expectation = numeric(),
                         Variance = numeric(),
                         stringsAsFactors = FALSE) 

# Function to calculate delta method estimates
calculate_delta <- function(N, p_hat) {
  g_p_hat <- p_hat / (1 - p_hat) # Expected value approximation
  var_g_p_hat <- (1 / ((1 - p_hat)^2))^2 * (p_hat * (1 - p_hat) / N) # Variance approximation
  return(c(g_p_hat, var_g_p_hat))
}

# Loop over each N and p, simulate Bernoulli trials, and apply the delta method
for (N in sample_sizes) {
  for (p in true_ps) {
    for (i in 1:100) { # Repeat each configuration 100 times
      samples <- rbinom(N, 1, p) # Simulate N Bernoulli trials
      p_hat <- mean(samples) # Sample mean as estimator for p
      estimates <- calculate_delta(N, p_hat)
      # Create the combined N_P_set identifier
      N_P_set_value <- sprintf("N.%dp.%s.%d", N, gsub("0\\.", "", format(p, nsmall = 2)), i)
      # Append to results dataframe
      results_df <- rbind(results_df, data.frame(N = N,
                                                  P = p,
                                                  N_P_set = N_P_set_value,
                                                  Expectation = estimates[1],
                                                  Variance = estimates[2]))
    }
  }
}

# Save results to CSV file with specified headers
write.csv(results_df, "delta_method_results.csv", row.names = FALSE)

# Print confirmation message
print("Delta method results saved in the working directory")

#2(B)

# Define the sample sizes and true parameters
sample_sizes <- c(10, 30, 100, 1000)
true_ps <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)
B <- 100 # Number of bootstrap realizations

# Initialize an empty dataframe for results
bootstrap_results_df <- data.frame(N = integer(),
                                   P = numeric(),
                                   N_P_set = character(),
                                   Bootstrap_Mean = numeric(),
                                   Bootstrap_Variance = numeric(),
                                   stringsAsFactors = FALSE)

# Function to calculate g(𝑋̄) for a given sample
calculate_g_x_bar <- function(samples) {
  p_hat <- mean(samples) # Sample mean as estimator for p
  g_p_hat <- p_hat / (1 - p_hat) # Expected value approximation
  return(g_p_hat)
}

# Function to perform bootstrap resampling
perform_bootstrap <- function(samples, B) {
  bootstrap_estimates <- replicate(B, {
    bootstrap_sample <- sample(samples, replace = TRUE) # Bootstrap resampling
    calculate_g_x_bar(bootstrap_sample) # Calculate g(𝑋̄) for bootstrap sample
  })
  return(bootstrap_estimates)
}

# Loop over each N and p, simulate Bernoulli trials, and apply bootstrap
for (N in sample_sizes) {
  for (p in true_ps) {
    for (i in 1:100) { # Repeat each configuration 100 times
      samples <- rbinom(N, 1, p) # Simulate N Bernoulli trials
      
      bootstrap_estimates <- perform_bootstrap(samples, B)
      
      bootstrap_mean <- mean(bootstrap_estimates) # Bootstrap mean
      bootstrap_variance <- var(bootstrap_estimates) * B / (B - 1) # Bootstrap variance
      
      # Create the combined N_P_set identifier
      N_P_set_value <- sprintf("N.%dp.%s.%d", N, gsub("0\\.", "", format(p, nsmall = 2)), i)
      
      # Append to results dataframe
      bootstrap_results_df <- rbind(bootstrap_results_df, data.frame(N = N,
                                                                     P = p,
                                                                     N_P_set = N_P_set_value,
                                                                     Bootstrap_Mean = bootstrap_mean,
                                                                     Bootstrap_Variance = bootstrap_variance))
    }
  }
}

# Save results to CSV file with specified headers
write.csv(bootstrap_results_df, "bootstrap_results.csv", row.names = FALSE)

#2(B)

# Define function to calculate g(X-bar)
calculate_g <- function(sample_average) {
  return(sample_average / (1 - sample_average))
}

# Define function to perform bootstrap
perform_bootstrap <- function(sample_data, B) {
  g_values <- numeric(B)
  for (b in 1:B) {
    bootstrap_sample <- sample(sample_data, replace = TRUE)
    sample_average <- mean(bootstrap_sample)
    g_values[b] <- calculate_g(sample_average)
  }
  return(g_values)
}

# Set parameters
B <- 100  # Number of bootstrap realizations
iterations <- 100  # Number of iterations
N <- 100  # Sample size
p <- 0.5  # True parameter value

# Initialize matrices to store results
expected_values <- matrix(NA, nrow = iterations, ncol = 1)
variances <- matrix(NA, nrow = iterations, ncol = 1)

# Perform iterations
for (iter in 1:iterations) {
  # Generate sample of Bernoulli random variables
  sample_data <- rbinom(N, 1, p)
  
  # Calculate sample average
  sample_average <- mean(sample_data)
  
  # Calculate g(X-bar)
  g <- calculate_g(sample_average)
  
  # Perform bootstrap
  bootstrap_g <- perform_bootstrap(sample_data, B)
  
  # Estimate expected value and variance of g(X-bar)
  expected_values[iter, 1] <- mean(bootstrap_g)
  variances[iter, 1] <- var(bootstrap_g) * B / (B - 1)
}

# Calculate mean and standard error of expected value and variance
mean_expected_value <- mean(expected_values)
mean_variance <- mean(variances)
standard_error_expected_value <- sd(expected_values) / sqrt(iterations)
standard_error_variance <- sd(variances) / sqrt(iterations)

# Print results
cat("Estimated Expected Value of g(X-bar):", mean_expected_value, "\n")
cat("Estimated Variance of g(X-bar):", mean_variance, "\n")
cat("Standard Error of Expected Value:", standard_error_expected_value, "\n")
cat("Standard Error of Variance:", standard_error_variance, "\n")

#3(D)

# Load required library
library(MASS)

# Function to calculate likelihood function for p
likelihood <- function(p, x) {
  prod(p^x / (1-p)^(1-x))
}

# Function to calculate posterior distribution for p
posterior_p <- function(prior_alpha, prior_beta, x) {
  sum_x <- sum(x)
  n <- length(x)
  posterior_alpha <- prior_alpha + sum_x
  posterior_beta <- prior_beta - n + sum_x
  return(list(alpha = posterior_alpha, beta = posterior_beta))
}

# Function to transform posterior distribution for p to obtain posterior distribution for z
posterior_z <- function(posterior_alpha, posterior_beta) {
  z_mean <- posterior_alpha / (posterior_alpha + posterior_beta)
  z_var <- (posterior_alpha * posterior_beta) / ((posterior_alpha + posterior_beta)^2 * (posterior_alpha + posterior_beta + 1))
  return(list(mean = z_mean, variance = z_var))
}

# Parameters
prior_alpha <- 0.5
prior_beta <- 0.5
N_values <- c(10, 30, 100, 1000)  # Different values of N
true_params <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)  # True parameters for simulation
num_simulations <- 100  # Number of simulations per configuration

# Storage for results
results_list <- list()

# Perform simulations for each combination of N and true parameters
for (N in N_values) {
  for (p in true_params) {
    results <- matrix(NA, nrow = num_simulations, ncol = 2)
    
    for (i in 1:num_simulations) {
      # Generate N Bernoulli samples with parameter p
      x <- rbinom(N, 1, p)
      
      # Calculate posterior distribution for p
      posterior_params <- posterior_p(prior_alpha, prior_beta, x)
      
      # Calculate posterior distribution for z
      z_posterior <- posterior_z(posterior_params$alpha, posterior_params$beta)
      
      # Store mean and variance of z samples
      results[i, ] <- c(z_posterior$mean, z_posterior$variance)
    }
    
    # Store results for this configuration
    config_name <- paste("N", N, "p", gsub("\\.", "", as.character(p)), sep = "_")
    results_list[[config_name]] <- results
  }
}

# Print results for each configuration
#for (config_name in names(results_list)) {
  #cat("Results for configuration:", config_name, "\n")
 # print(results_list[[config_name]])
 # cat("\n")
# Flatten the results list into a data frame
results_df <- do.call(rbind, lapply(names(results_list), function(config_name) {
  config_results <- results_list[[config_name]]
  config_results <- cbind(config_name = rep(config_name, nrow(config_results)), config_results)
  return(as.data.frame(config_results))
}))

# Write the results to a CSV file
write.csv(results_df, "simulation_results.csv", row.names = FALSE)


# Flatten the results list into a data frame
results_df <- do.call(rbind, lapply(names(results_list), function(config_name) {
  config_results <- results_list[[config_name]]
  config_results <- cbind(config_name = rep(config_name, nrow(config_results)), config_results)
  return(as.data.frame(config_results))
}))

# Write the results to a CSV file
write.csv(results_df, "simulation_results_Posterior.csv", row.names = FALSE)
---
title: "Statistics 5114: Odds Ratios: Delta Method,Bootstrap and Bayes"
output: html_notebook
---


#A1
```{r}
# Set parameters
N <- 1000
p <- 0.5
iterations <- 5000

# Initialize vector to store results
odds_ratios <- numeric(iterations)

# Simulate data and calculate odds ratios
for (i in 1:iterations) {
  # Generate sample of Bernoulli random variables
  sample_data <- rbinom(N, 1, p)
  
  # Calculate sample average
  sample_average <- mean(sample_data)
  
  # Calculate odds ratio
  odds_ratio <- sample_average / (1 - sample_average)
  
  # Store result
  odds_ratios[i] <- odds_ratio
}

# Plot histogram of simulated odds ratios
hist(odds_ratios, freq = FALSE, breaks = 30, main = "Histogram of Simulated Odds Ratios",
     xlab = "Odds Ratio", ylab = "Density")

# Calculate mean and variance of odds ratios
mean_odds_ratio <- mean(odds_ratios)
var_odds_ratio <- var(odds_ratios)

# Calculate standard error using Delta method
se_delta_method <- sqrt((1/(1-p)^3)*(p/N))

# Plot normal distribution with mean and standard error
curve(dnorm(x, mean = mean_odds_ratio, sd = se_delta_method), 
      col = "blue", lwd = 2, add = TRUE)

```
#A2

```{r}
# Set parameters
N_values <- c(10, 30, 50, 100, 500)
p <- 0.5
iterations <- 5000

# Function to calculate standard error using Delta method
calculate_se_delta_method <- function(p, N) {
  return(sqrt((1/(1-p)^3)*(p/N)))
}

# Function to simulate data and plot histogram for given N
simulate_and_plot <- function(N) {
  # Initialize vector to store results
  odds_ratios <- numeric(iterations)
  
  # Simulate data and calculate odds ratios
  for (i in 1:iterations) {
    # Generate sample of Bernoulli random variables
    sample_data <- rbinom(N, 1, p)
    
    # Calculate sample average
    sample_average <- mean(sample_data)
    
    # Calculate odds ratio
    odds_ratio <- sample_average / (1 - sample_average)
    
    # Store result
    odds_ratios[i] <- odds_ratio
  }
  
  # Plot histogram of simulated odds ratios
  hist(odds_ratios, freq = FALSE, breaks = 30, 
       main = paste("Histogram of Simulated Odds Ratios (N =", N, ")"),
       xlab = "Odds Ratio", ylab = "Density")
  
  # Calculate mean and variance of odds ratios
  mean_odds_ratio <- mean(odds_ratios)
  var_odds_ratio <- var(odds_ratios)
  
  # Calculate standard error using Delta method
  se_delta_method <- calculate_se_delta_method(p, N)
  
  # Plot normal distribution with mean and standard error
  curve(dnorm(x, mean = mean_odds_ratio, sd = se_delta_method), 
        col = "blue", lwd = 2, add = TRUE)
  
  # Add legend
  legend("topright", legend = "Normal Curve", col = "blue", lwd = 2)
}

# Loop over different values of N
for (N in N_values) {
  simulate_and_plot(N)
}
```


#2(A)


```{r}
# Define the sample sizes and true parameters
sample_sizes <- c(10, 30, 100, 1000)
true_ps <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)
# Initialize an empty dataframe for results
results_df <- data.frame(N = integer(),
                         P = numeric(),
                         N_P_set = character(),
                         Expectation = numeric(),
                         Variance = numeric(),
                         stringsAsFactors = FALSE) 

# Function to calculate delta method estimates
calculate_delta <- function(N, p_hat) {
  g_p_hat <- p_hat / (1 - p_hat) # Expected value approximation
  var_g_p_hat <- (1 / ((1 - p_hat)^2))^2 * (p_hat * (1 - p_hat) / N) # Variance approximation
  return(c(g_p_hat, var_g_p_hat))
}

# Loop over each N and p, simulate Bernoulli trials, and apply the delta method
for (N in sample_sizes) {
  for (p in true_ps) {
    for (i in 1:100) { # Repeat each configuration 100 times
      samples <- rbinom(N, 1, p) # Simulate N Bernoulli trials
      p_hat <- mean(samples) # Sample mean as estimator for p
      estimates <- calculate_delta(N, p_hat)
      # Create the combined N_P_set identifier
      N_P_set_value <- sprintf("N.%dp.%s.%d", N, gsub("0\\.", "", format(p, nsmall = 2)), i)
      # Append to results dataframe
      results_df <- rbind(results_df, data.frame(N = N,
                                                  P = p,
                                                  N_P_set = N_P_set_value,
                                                  Expectation = estimates[1],
                                                  Variance = estimates[2]))
    }
  }
}

# Save results to CSV file with specified headers
write.csv(results_df, "delta_method_results.csv", row.names = FALSE)

# Print confirmation message
print("Delta method results saved in the working directory")
```




#2(B)
```{r}
# Define the sample sizes and true parameters
sample_sizes <- c(10, 30, 100, 1000)
true_ps <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)
B <- 100 # Number of bootstrap realizations

# Initialize an empty dataframe for results
bootstrap_results_df <- data.frame(N = integer(),
                                   P = numeric(),
                                   N_P_set = character(),
                                   Bootstrap_Mean = numeric(),
                                   Bootstrap_Variance = numeric(),
                                   stringsAsFactors = FALSE)

# Function to calculate g(𝑋̄) for a given sample
calculate_g_x_bar <- function(samples) {
  p_hat <- mean(samples) # Sample mean as estimator for p
  g_p_hat <- p_hat / (1 - p_hat) # Expected value approximation
  return(g_p_hat)
}

# Function to perform bootstrap resampling
perform_bootstrap <- function(samples, B) {
  bootstrap_estimates <- replicate(B, {
    bootstrap_sample <- sample(samples, replace = TRUE) # Bootstrap resampling
    calculate_g_x_bar(bootstrap_sample) # Calculate g(𝑋̄) for bootstrap sample
  })
  return(bootstrap_estimates)
}

# Loop over each N and p, simulate Bernoulli trials, and apply bootstrap
for (N in sample_sizes) {
  for (p in true_ps) {
    for (i in 1:100) { # Repeat each configuration 100 times
      samples <- rbinom(N, 1, p) # Simulate N Bernoulli trials
      
      bootstrap_estimates <- perform_bootstrap(samples, B)
      
      bootstrap_mean <- mean(bootstrap_estimates) # Bootstrap mean
      bootstrap_variance <- var(bootstrap_estimates) * B / (B - 1) # Bootstrap variance
      
      # Create the combined N_P_set identifier
      N_P_set_value <- sprintf("N.%dp.%s.%d", N, gsub("0\\.", "", format(p, nsmall = 2)), i)
      
      # Append to results dataframe
      bootstrap_results_df <- rbind(bootstrap_results_df, data.frame(N = N,
                                                                     P = p,
                                                                     N_P_set = N_P_set_value,
                                                                     Bootstrap_Mean = bootstrap_mean,
                                                                     Bootstrap_Variance = bootstrap_variance))
    }
  }
}

# Save results to CSV file with specified headers
write.csv(bootstrap_results_df, "bootstrap_results.csv", row.names = FALSE)

```








#2(B)
```{r}
# Define function to calculate g(X-bar)
calculate_g <- function(sample_average) {
  return(sample_average / (1 - sample_average))
}

# Define function to perform bootstrap
perform_bootstrap <- function(sample_data, B) {
  g_values <- numeric(B)
  for (b in 1:B) {
    bootstrap_sample <- sample(sample_data, replace = TRUE)
    sample_average <- mean(bootstrap_sample)
    g_values[b] <- calculate_g(sample_average)
  }
  return(g_values)
}

# Set parameters
B <- 100  # Number of bootstrap realizations
iterations <- 100  # Number of iterations
N <- 100  # Sample size
p <- 0.5  # True parameter value

# Initialize matrices to store results
expected_values <- matrix(NA, nrow = iterations, ncol = 1)
variances <- matrix(NA, nrow = iterations, ncol = 1)

# Perform iterations
for (iter in 1:iterations) {
  # Generate sample of Bernoulli random variables
  sample_data <- rbinom(N, 1, p)
  
  # Calculate sample average
  sample_average <- mean(sample_data)
  
  # Calculate g(X-bar)
  g <- calculate_g(sample_average)
  
  # Perform bootstrap
  bootstrap_g <- perform_bootstrap(sample_data, B)
  
  # Estimate expected value and variance of g(X-bar)
  expected_values[iter, 1] <- mean(bootstrap_g)
  variances[iter, 1] <- var(bootstrap_g) * B / (B - 1)
}

# Calculate mean and standard error of expected value and variance
mean_expected_value <- mean(expected_values)
mean_variance <- mean(variances)
standard_error_expected_value <- sd(expected_values) / sqrt(iterations)
standard_error_variance <- sd(variances) / sqrt(iterations)

# Print results
cat("Estimated Expected Value of g(X-bar):", mean_expected_value, "\n")
cat("Estimated Variance of g(X-bar):", mean_variance, "\n")
cat("Standard Error of Expected Value:", standard_error_expected_value, "\n")
cat("Standard Error of Variance:", standard_error_variance, "\n")
```


#3(D)
```{r}
# Load required library
library(MASS)

# Function to calculate likelihood function for p
likelihood <- function(p, x) {
  prod(p^x / (1-p)^(1-x))
}

# Function to calculate posterior distribution for p
posterior_p <- function(prior_alpha, prior_beta, x) {
  sum_x <- sum(x)
  n <- length(x)
  posterior_alpha <- prior_alpha + sum_x
  posterior_beta <- prior_beta - n + sum_x
  return(list(alpha = posterior_alpha, beta = posterior_beta))
}

# Function to transform posterior distribution for p to obtain posterior distribution for z
posterior_z <- function(posterior_alpha, posterior_beta) {
  z_mean <- posterior_alpha / (posterior_alpha + posterior_beta)
  z_var <- (posterior_alpha * posterior_beta) / ((posterior_alpha + posterior_beta)^2 * (posterior_alpha + posterior_beta + 1))
  return(list(mean = z_mean, variance = z_var))
}

# Parameters
prior_alpha <- 0.5
prior_beta <- 0.5
N_values <- c(10, 30, 100, 1000)  # Different values of N
true_params <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)  # True parameters for simulation
num_simulations <- 100  # Number of simulations per configuration

# Storage for results
results_list <- list()

# Perform simulations for each combination of N and true parameters
for (N in N_values) {
  for (p in true_params) {
    results <- matrix(NA, nrow = num_simulations, ncol = 2)
    
    for (i in 1:num_simulations) {
      # Generate N Bernoulli samples with parameter p
      x <- rbinom(N, 1, p)
      
      # Calculate posterior distribution for p
      posterior_params <- posterior_p(prior_alpha, prior_beta, x)
      
      # Calculate posterior distribution for z
      z_posterior <- posterior_z(posterior_params$alpha, posterior_params$beta)
      
      # Store mean and variance of z samples
      results[i, ] <- c(z_posterior$mean, z_posterior$variance)
    }
    
    # Store results for this configuration
    config_name <- paste("N", N, "p", gsub("\\.", "", as.character(p)), sep = "_")
    results_list[[config_name]] <- results
  }
}

# Print results for each configuration
#for (config_name in names(results_list)) {
  #cat("Results for configuration:", config_name, "\n")
 # print(results_list[[config_name]])
 # cat("\n")
# Flatten the results list into a data frame
results_df <- do.call(rbind, lapply(names(results_list), function(config_name) {
  config_results <- results_list[[config_name]]
  config_results <- cbind(config_name = rep(config_name, nrow(config_results)), config_results)
  return(as.data.frame(config_results))
}))

# Write the results to a CSV file
write.csv(results_df, "simulation_results.csv", row.names = FALSE)


# Flatten the results list into a data frame
results_df <- do.call(rbind, lapply(names(results_list), function(config_name) {
  config_results <- results_list[[config_name]]
  config_results <- cbind(config_name = rep(config_name, nrow(config_results)), config_results)
  return(as.data.frame(config_results))
}))

# Write the results to a CSV file
write.csv(results_df, "simulation_results_Posterior.csv", row.names = FALSE)

```

