A Nice Little Copula

library(evd)
## Warning: package 'evd' was built under R version 4.4.2
library(mvnfast)  # For fast multivariate normal distribution sampling
## Warning: package 'mvnfast' was built under R version 4.4.2
library(moments)  # For kurtosis calculation

estimate_covariance <- function(df_rankings) {
  # Compute Spearman's rank correlation matrix
  cor_matrix <- cor(df_rankings, method = "spearman")
  
  # Convert correlation to covariance (assuming unit variance)
  cov_matrix <- cor_matrix
  
  return(cov_matrix)
}

rank_to_uniform <- function(rankings) {
  ranks <- rank(rankings, ties.method = "average")  # Compute ranks (handling ties)
  n <- length(ranks)
  return((ranks - 0.5) / n)  # Normalized to (0,1)
}

# Function to compute Monte Carlo integration for Gumbel Copula
gumbel_copula_monte_carlo <- function(rankings1, rankings2, rankings3, num_samples=10000) {
  # Create a data frame of rankings
  df_rankings <- data.frame(rank1 = rankings1, rank2 = rankings2, rank3 = rankings3)
  
  # Convert rankings to uniform variables using empirical CDF
  U1 <- rank_to_uniform(rankings1)
  U2 <- rank_to_uniform(rankings2)
  U3 <- rank_to_uniform(rankings3)
  
  # Estimate covariance matrix from ranking data
  Sigma <- estimate_covariance(df_rankings)
  
  # Define mean vector
  mu <- rep(0, ncol(Sigma))
  
  # Generate samples from multivariate normal distribution
  normal_samples <- rmvn(n = num_samples, mu = mu, sigma = Sigma)
  
  # Transform normal samples to Gumbel samples
  gumbel_samples <- -log(-log(pnorm(normal_samples)))
  
  # Transform Gumbel samples to uniform using CDF
  u_samples <- pgumbel(gumbel_samples)
  
  # Compute empirical joint CDF
  joint_prob <- mean(
    (u_samples[,1] <= U1) & 
      (u_samples[,2] <= U2) & 
      (u_samples[,3] <= U3)
  )
  
  conditional_marginals <- list(
    "P(U1 | U2, U3)" = mean(pgumbel(U1)),
    "P(U2 | U1, U3)" = mean(pgumbel(U2)),
    "P(U3 | U1, U2)" = mean(pgumbel(U3))
  )
  
  return(list(joint_prob = joint_prob, conditional_marginals = conditional_marginals))
}

# Example usage
set.seed(42)
n_items <- 100

# Generate rankings with some ties
rankings1 <- sample(1:(n_items/2), n_items, replace = TRUE)  # Contains ties
rankings2 <- rankings1 + rnorm(n_items, mean = 0, sd = 10)  # Correlated with rankings1
rankings3 <- sample(1:n_items, n_items, replace = FALSE)  # No ties

# Compute joint probability and conditional marginals
result <- gumbel_copula_monte_carlo(rankings1, rankings2, rankings3)

# Print results
cat(sprintf("Joint Probability (Gumbel-Copula - Monte Carlo): %.6f\n", result$joint_prob))
## Joint Probability (Gumbel-Copula - Monte Carlo): 0.190200
print(result$conditional_marginals)
## $`P(U1 | U2, U3)`
## [1] 0.5400381
## 
## $`P(U2 | U1, U3)`
## [1] 0.5400323
## 
## $`P(U3 | U1, U2)`
## [1] 0.5400323