A Nice Little Copula
library(evd)
## Warning: package 'evd' was built under R version 4.4.2
library(mvnfast) # For fast multivariate normal distribution sampling
## Warning: package 'mvnfast' was built under R version 4.4.2
library(moments) # For kurtosis calculation
estimate_covariance <- function(df_rankings) {
# Compute Spearman's rank correlation matrix
cor_matrix <- cor(df_rankings, method = "spearman")
# Convert correlation to covariance (assuming unit variance)
cov_matrix <- cor_matrix
return(cov_matrix)
}
rank_to_uniform <- function(rankings) {
ranks <- rank(rankings, ties.method = "average") # Compute ranks (handling ties)
n <- length(ranks)
return((ranks - 0.5) / n) # Normalized to (0,1)
}
# Function to compute Monte Carlo integration for Gumbel Copula
gumbel_copula_monte_carlo <- function(rankings1, rankings2, rankings3, num_samples=10000) {
# Create a data frame of rankings
df_rankings <- data.frame(rank1 = rankings1, rank2 = rankings2, rank3 = rankings3)
# Convert rankings to uniform variables using empirical CDF
U1 <- rank_to_uniform(rankings1)
U2 <- rank_to_uniform(rankings2)
U3 <- rank_to_uniform(rankings3)
# Estimate covariance matrix from ranking data
Sigma <- estimate_covariance(df_rankings)
# Define mean vector
mu <- rep(0, ncol(Sigma))
# Generate samples from multivariate normal distribution
normal_samples <- rmvn(n = num_samples, mu = mu, sigma = Sigma)
# Transform normal samples to Gumbel samples
gumbel_samples <- -log(-log(pnorm(normal_samples)))
# Transform Gumbel samples to uniform using CDF
u_samples <- pgumbel(gumbel_samples)
# Compute empirical joint CDF
joint_prob <- mean(
(u_samples[,1] <= U1) &
(u_samples[,2] <= U2) &
(u_samples[,3] <= U3)
)
conditional_marginals <- list(
"P(U1 | U2, U3)" = mean(pgumbel(U1)),
"P(U2 | U1, U3)" = mean(pgumbel(U2)),
"P(U3 | U1, U2)" = mean(pgumbel(U3))
)
return(list(joint_prob = joint_prob, conditional_marginals = conditional_marginals))
}
# Example usage
set.seed(42)
n_items <- 100
# Generate rankings with some ties
rankings1 <- sample(1:(n_items/2), n_items, replace = TRUE) # Contains ties
rankings2 <- rankings1 + rnorm(n_items, mean = 0, sd = 10) # Correlated with rankings1
rankings3 <- sample(1:n_items, n_items, replace = FALSE) # No ties
# Compute joint probability and conditional marginals
result <- gumbel_copula_monte_carlo(rankings1, rankings2, rankings3)
# Print results
cat(sprintf("Joint Probability (Gumbel-Copula - Monte Carlo): %.6f\n", result$joint_prob))
## Joint Probability (Gumbel-Copula - Monte Carlo): 0.190200
print(result$conditional_marginals)
## $`P(U1 | U2, U3)`
## [1] 0.5400381
##
## $`P(U2 | U1, U3)`
## [1] 0.5400323
##
## $`P(U3 | U1, U2)`
## [1] 0.5400323