library(mvtnorm); library(sn); library(fitHeavyTail); library(moments); library(MASS); library(magrittr) ; library(dplyr); library(plotly); library(webshot)
## Loading required package: stats4
##
## Attaching package: 'sn'
## The following object is masked from 'package:stats':
##
## sd
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Creating a sequence of x and y values covering the observed data range: (i) A calculate_range function is defined, which given a data vector, generates grid points around it by expanding the original data by a specified factor (given by expand_val). (ii) grid contains the comprehensive set of input points that span the specified ranges of x and y.
calculate_range <- function(data, expansion_factor = 0.9) {
range_val <- range(data)
expand_val <- (range_val[2] - range_val[1]) * expansion_factor
seq(range_val[1] - expand_val, range_val[2] + expand_val, length.out = 600)
}
x_range <- calculate_range(bivariate_data$r_observed) ; y_range <- calculate_range(bivariate_data$m_observed)
grid <- expand.grid(x = x_range, y = y_range)
We use the above grid points for generating contour plots for all four different model fits.
For a bivariate normal distribution, the MLE of the parameters (mean vector and covariance matrix) is just the sample mean vector and the sample covariance matrix
#MLE Estimates:
mean_vec <- colMeans(bivariate_data); cov_matrix <- cov(bivariate_data)
#AIC Criterion:
log_likelihood <- sum(dmvnorm(bivariate_data, mean = mean_vec, sigma = cov_matrix, log = TRUE))
k = 5 #2 means, 2 variances, one covariance
AIC <- 2*k - 2*log_likelihood; print(AIC)
## [1] 24866.43
Given the grid points above, we calculate the density for each grid point with the MLE estimates for bivariate normal using dmvnorm. grid$density1 takes each row from the grid and calculates the pdf of that row (bivariate point) given the MLE estimates and stores it in new a column of the grid matrix.
grid$density1 <- apply(grid, 1, function(point) {
mass_radius_vector <- c(point[2], point[1]) #the first column in grid is radius and second is mass.
dmvnorm(x = mass_radius_vector, mean = mean_vec, sigma = cov_matrix)
})
plot(bivariate_data$r_observed, bivariate_data$m_observed,
xlab = "Radius", ylab = "Mass",
main = "Bivariate Data with Normal Density Contours")
contour(x_range, y_range, matrix(grid$density1, nrow = length(y_range)), add = TRUE, col = "orange")
Note: I could not find any standard package to fit multivariate skew normal, so i am using optim function, optimizing over the cholesky decomposition of Omega (ensuring it remains a p.d), and setting an appropriate lower bound for the parameters. (i) Define initial parameters (omega, location and alpha) - initialize omega_init as the covariance of observed data, perform cholesky decomposition to obtain lower triangular matrix and extract its element in the vector s. -the location parameter is set to be the mean of the data-set -the alpha parameter (slant density) is initalized to be (-1,-1) -> considering left skewness
Define the negative log likelihood function that takes params and data as input, given the first three elements of param (s), Omega is reconstructed, location parameter is the 4 and 5th input of params and alpha is the last two input of params.
The negative log likelihood is optimized using optim function using the L-BFGS-B method. Setting appropriate lower bounds for each of the parameter.
With this, AIC is calculated.
#Initialization:
omega_init <- cov(bivariate_data)
L_init <- t(chol(omega_init)) #lower triangular
s <- c(L_init[lower.tri(L_init, diag = TRUE)])
mean <- c(mean(bivariate_data$m_observed),mean(bivariate_data$r_observed) )
alpha = c(-1,-1)
initial_params <- c(s,mean,alpha)
neg_log_likelihood <- function(params, data) {
# Reconstruct L from params
L <- matrix(0, nrow = 2, ncol = 2)
L[lower.tri(L, diag = TRUE)] <- params[1:3]
Omega <- L %*% t(L)
# Extract location (xi) and shape (alpha) parameters from params
xi <- params[4:5]
alpha <- params[6:7]
# Calculate the log-likelihood of the data
log_likelihood <- sum(dmsn(x = data, xi = xi, Omega = Omega, alpha = alpha, log = TRUE))
# Return negative log-likelihood
-log_likelihood
}
# Perform optimization
result <- optim(
par = initial_params,
fn = neg_log_likelihood,
data = bivariate_data,
method = "L-BFGS-B",
lower = c(0.01, -Inf, 0.01, rep(-Inf, 4)),
)
print(result)
## $par
## [1] 0.156160971 0.972270211 0.623198660 1.392607717 12.910934829
## [6] -0.043586978 0.004426554
##
## $value
## [1] 12428.22
##
## $counts
## function gradient
## 81 81
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
#AIC:
k <- length(result$par) # Number of parameters
NLL <- result$value # Negative Log-Likelihood
AIC <- 2*k - 2*(-NLL) ; print(AIC)
## [1] 24870.45
# Reconstruct Omega, extract location parameter and slant parameter from optimization result
L <- matrix(0, nrow = 2, ncol = 2)
L[lower.tri(L, diag = TRUE)] <- result$par[1:3]
Omega <- L %*% t(L)
xi <- result$par[4:5]
alpha <- result$par[6:7]
grid$density2 <- apply(grid, 1, function(point) {
mass_radius_vector <- c(point[2],point[1])
dmsn(x = mass_radius_vector, xi = xi, Omega = Omega, alpha = alpha, log = FALSE)
})
plot(bivariate_data$r_observed, bivariate_data$m_observed,
xlab = "Radius", ylab = "Mass",
main = "Bivariate Data with Skew-Normal Density Contours")
contour(x_range, y_range, matrix(grid$density2, nrow = length(y_range)), add = TRUE, col = 'orange')
# List of initial nu values
nu_values <- c(3, 5, 10)
# Initialize an empty list to store fit results
fit_results <- list()
# Loop over nu_values to fit the model with different fixed nu values
for (i in seq_along(nu_values)) {
# Specify initial values for mu and cov, but not nu, since it will be fixed
initial_values <- list(
mu = colMeans(bivariate_data),
cov = cov(bivariate_data)
)
# Fit the model with fixed nu and store the result
fit_results[[i]] <- fit_mvt(
X = bivariate_data,
initial = initial_values,
nu = nu_values[i] # Specify fixed nu directly here
)
}
# Naming the fit_results for clarity
names(fit_results) <- paste("fit_result", nu_values, sep = "_")
print(fit_results)
## $fit_result_3
## $fit_result_3$mu
## m_observed r_observed
## 1.387674 12.846741
##
## $fit_result_3$scatter
## m_observed r_observed
## m_observed 0.01678062 0.1060646
## r_observed 0.10606458 0.9202649
##
## $fit_result_3$nu
## [1] 3
##
## $fit_result_3$mean
## m_observed r_observed
## 1.387674 12.846741
##
## $fit_result_3$cov
## m_observed r_observed
## m_observed 0.05034185 0.3181937
## r_observed 0.31819374 2.7607948
##
## $fit_result_3$converged
## [1] TRUE
##
## $fit_result_3$num_iterations
## [1] 6
##
## $fit_result_3$cpu_time
## elapsed
## 0.012
##
##
## $fit_result_5
## $fit_result_5$mu
## m_observed r_observed
## 1.387588 12.853106
##
## $fit_result_5$scatter
## m_observed r_observed
## m_observed 0.01875882 0.1182929
## r_observed 0.11829287 1.0280650
##
## $fit_result_5$nu
## [1] 5
##
## $fit_result_5$mean
## m_observed r_observed
## 1.387588 12.853106
##
## $fit_result_5$cov
## m_observed r_observed
## m_observed 0.0312647 0.1971548
## r_observed 0.1971548 1.7134416
##
## $fit_result_5$converged
## [1] TRUE
##
## $fit_result_5$num_iterations
## [1] 5
##
## $fit_result_5$cpu_time
## elapsed
## 0.005
##
##
## $fit_result_10
## $fit_result_10$mu
## m_observed r_observed
## 1.387522 12.861579
##
## $fit_result_10$scatter
## m_observed r_observed
## m_observed 0.02092315 0.1314623
## r_observed 0.13146227 1.1456351
##
## $fit_result_10$nu
## [1] 10
##
## $fit_result_10$mean
## m_observed r_observed
## 1.387522 12.861579
##
## $fit_result_10$cov
## m_observed r_observed
## m_observed 0.02615394 0.1643278
## r_observed 0.16432784 1.4320438
##
## $fit_result_10$converged
## [1] TRUE
##
## $fit_result_10$num_iterations
## [1] 3
##
## $fit_result_10$cpu_time
## elapsed
## 0.009
Next, we find the AIC for each of the three initializations
#AIC Criterion
aic_values <- numeric(length(fit_results))
for (i in seq_along(fit_results)) {
fit <- fit_results[[i]]# Extract the current fit result
# Calculate log-likelihood
log_likelihood_value <- sum(dmvt(x = as.matrix(bivariate_data),
delta = fit$mu,
sigma = fit$cov,
df = fit$nu,
log = TRUE))
# Number of parameters: 2 for mu, 3 for cov, 1 for nu
k <- 6
# Calculate AIC
aic_values[i] <- 2*k - 2*log_likelihood_value
# Print the AIC values
cat("AIC for nu =", fit$nu, ":", aic_values[i], "\n")
}
## AIC for nu = 3 : 41642.92
## AIC for nu = 5 : 29889.52
## AIC for nu = 10 : 25968.42
Note: The AIC for nu = 3 gives the highest value.
Similar to above, we calculate the density for each grid point using the MLE estimates and dmvt function and plot the contour of these densities over the observed data. I am plotting the contour for the MLE estimates of df = 3 initilizaiton.
grid$density3 <- apply(grid, 1, function(point) {
mass_radius_vector <- c(point[2], point[1]) # Switching the order to mass, radius
# Now, pass this vector to 'dmvt'
dmvt(x = mass_radius_vector, delta = fit_results[[1]]$mu, sigma = fit_results[[1]]$cov, df = fit_results[[1]]$nu, log = FALSE)
})
plot(bivariate_data$r_observed, bivariate_data$m_observed,
xlab = "Radius", ylab = "Mass",
main = "Bivariate Data with T Density Contours (df=3)")
contour(x_range, y_range, matrix(grid$density3, nrow = length(y_range)), add = TRUE, col = "orange")
We make use of the standard fit_mvst function from the fitHeavyTail Library to fit the multivariate skew t distribution. Again, we fit this for three initializations of nu (df) set to 3,5 and 10.
nu_values <- c(3,5,10) # Initialize nu_values to consider
fit_results1 <- list() # Initialize an empty list to store fit results
initial_values <- list(
mu = colMeans(bivariate_data), # Initial location vector
scatter = cov(bivariate_data)# Initial scatter matrix
#gamma = c(skewness(bivariate_data$m_observed), skewness(bivariate_data$r_observed))# Initial skewness vector
)
for (i in seq_along(nu_values)) {
fit_results1[[i]] <- fit_mvst(
X = bivariate_data,
nu = nu_values[i],
initial = initial_values
)
}
names(fit_results1) <- paste("fit_result_nu", nu_values, sep = "_") # Naming the fit_results for clarity
print(fit_results1)
## $fit_result_nu_3
## $fit_result_nu_3$mu
## m_observed r_observed
## 1.387488 12.745558
##
## $fit_result_nu_3$gamma
## m_observed r_observed
## 7.799244e-05 6.890570e-02
##
## $fit_result_nu_3$scatter
## m_observed r_observed
## m_observed 0.0168082 0.1063395
## r_observed 0.1063395 0.9164758
##
## $fit_result_nu_3$nu
## [1] 3
##
## $fit_result_nu_3$mean
## m_observed r_observed
## 1.387722 12.952275
##
## $fit_result_nu_3$cov
## [1] NA
##
## $fit_result_nu_3$converged
## [1] TRUE
##
## $fit_result_nu_3$num_iterations
## [1] 46
##
## $fit_result_nu_3$cpu_time
## [1] 7.438
##
##
## $fit_result_nu_5
## $fit_result_nu_5$mu
## m_observed r_observed
## 1.386654 12.679991
##
## $fit_result_nu_5$gamma
## m_observed r_observed
## 0.0006479625 0.1320593774
##
## $fit_result_nu_5$scatter
## m_observed r_observed
## m_observed 0.01879654 0.1186803
## r_observed 0.11868030 1.0202967
##
## $fit_result_nu_5$nu
## [1] 5
##
## $fit_result_nu_5$mean
## m_observed r_observed
## 1.387734 12.900090
##
## $fit_result_nu_5$cov
## m_observed r_observed
## m_observed 0.03132989 0.1982759
## r_observed 0.19827589 1.7973815
##
## $fit_result_nu_5$converged
## [1] TRUE
##
## $fit_result_nu_5$num_iterations
## [1] 59
##
## $fit_result_nu_5$cpu_time
## [1] 8.883
##
##
## $fit_result_nu_10
## $fit_result_nu_10$mu
## m_observed r_observed
## 1.383892 12.502102
##
## $fit_result_nu_10$gamma
## m_observed r_observed
## 0.003035832 0.306978297
##
## $fit_result_nu_10$scatter
## m_observed r_observed
## m_observed 0.02097497 0.131934
## r_observed 0.13193399 1.125673
##
## $fit_result_nu_10$nu
## [1] 10
##
## $fit_result_nu_10$mean
## m_observed r_observed
## 1.387687 12.885824
##
## $fit_result_nu_10$cov
## m_observed r_observed
## m_observed 0.02622352 0.1654029
## r_observed 0.16540287 1.4561722
##
## $fit_result_nu_10$converged
## [1] TRUE
##
## $fit_result_nu_10$num_iterations
## [1] 110
##
## $fit_result_nu_10$cpu_time
## [1] 18.612
#AIC Criterion
aic_values <- numeric(length(fit_results1))
for (i in seq_along(fit_results1)) {
fit <- fit_results1[[i]]# Extract the current fit result
# Calculate log-likelihood
log_likelihood_value <- sum(dmst(x = as.matrix(bivariate_data),
xi = fit$mu,
Omega = fit$scatter,
alpha = fit$gamma,
nu = fit$nu,
log = TRUE))
# Number of parameters: 2 for location, 3 for cov, 2 for skewness, 1 for df
k <- 8
# Calculate AIC
aic_values[i] <- 2*k - 2*log_likelihood_value
# Print the AIC values
cat("AIC for nu =", fit$nu, ":", aic_values[i], "\n")
}
## AIC for nu = 3 : 28875.23
## AIC for nu = 5 : 27689
## AIC for nu = 10 : 30795.65
Initialization of nu = 10 and 5 has the highest AIC result, so considering its MLE estimates for contour plotting. Using the same logic as before, finding the density for each of the grid points using the dmst function.
grid$density4 <- apply(grid, 1, function(point) {
mass_radius_vector <- c(point[2], point[1]) # Switching the order to mass, radius
dmst(x = mass_radius_vector, xi = fit_results1[[1]]$mu, Omega = fit_results1[[1]]$scatter, alpha = fit_results1[[1]]$gamma, nu = fit_results1[[1]]$nu, log = FALSE)
})
plot(bivariate_data$r_observed, bivariate_data$m_observed,
xlab = "Radius", ylab = "Mass",
main = "Bivariate Data with Skew t-Distribution Density Contours (df=3)")
contour(x_range, y_range, matrix(grid$density4, nrow = length(y_range)), add = TRUE, col = "orange")
## Contour for df = 5
grid$density5 <- apply(grid, 1, function(point) {
mass_radius_vector <- c(point[2], point[1]) # Switching the order to mass, radius
dmst(x = mass_radius_vector, xi = fit_results1[[2]]$mu, Omega = fit_results1[[2]]$cov, alpha = fit_results1[[2]]$gamma, nu = fit_results1[[2]]$nu, log = FALSE)
})
plot(bivariate_data$r_observed, bivariate_data$m_observed,
xlab = "Radius", ylab = "Mass",
main = "Bivariate Data with Skew t-Distribution Density Contours (df=5)")
contour(x_range, y_range, matrix(grid$density5, nrow = length(y_range)), add = TRUE, col = "orange")
grid$density6 <- apply(grid, 1, function(point) {
mass_radius_vector <- c(point[2], point[1]) # Switching the order to mass, radius
dmst(x = mass_radius_vector, xi = fit_results1[[3]]$mu, Omega = fit_results1[[3]]$cov, alpha = fit_results1[[3]]$gamma, nu = fit_results1[[3]]$nu, log = FALSE)
})
plot(bivariate_data$r_observed, bivariate_data$m_observed,
xlab = "Radius", ylab = "Mass",
main = "Bivariate Data with Skew t-Distribution Density Contours (df=10)")
contour(x_range, y_range, matrix(grid$density6, nrow = length(y_range)), add = TRUE, col = "orange")
mean_vec <- colMeans(bivariate_data); cov_matrix <- cov(bivariate_data)
sample_data <- mvrnorm(n = 1000, mu = mean_vec, Sigma = cov_matrix)
plot(sample_data[,2], sample_data[,1])
# Parameters
L <- matrix(0, nrow = 2, ncol = 2)
L[lower.tri(L, diag = TRUE)] <- result$par[1:3]
Omega <- L %*% t(L)
xi <- result$par[4:5]
alpha <- result$par[6:7]
# Generate samples
samples <- rmst(n = 1000, xi = xi, Omega = Omega, alpha = alpha)
plot(samples[,2], samples[,1])
delta = fit_results[[1]]$mu; sigma = fit_results[[1]]$cov; df = fit_results[[1]]$nu
samples_t = rmvt(n = 1000, sigma = sigma, df = df, delta = delta)
plot(samples_t[,2],samples_t[,1], xlim = c(9,18), ylim =c(1,2))
xi = fit_results1[[2]]$mu; Omega = fit_results1[[2]]$cov; alpha = fit_results1[[2]]$gamma; nu = fit_results1[[2]]$nu
samples_skewt = rmst(n = 1000, xi = xi, Omega = Omega, alpha = alpha, nu = nu)
plot(samples_skewt[,2], samples_skewt[,1],xlim = c(9,18), ylim =c(1,2))
##Surface plots of the fitted densities
density_matrix <- matrix(grid$density1, nrow = length(y_range), byrow = FALSE)
# Create the surface plot
plot_ly(x = ~y_range, y = ~x_range, z = ~density_matrix, type = "surface") %>%
layout(title = "Fitted Density Surface Plot for Bivariate Normal",
scene = list(xaxis = list(title = "X-Axis"),
yaxis = list(title = "Y-Axis"),
zaxis = list(title = "Density")))
density_matrix <- matrix(grid$density2, nrow = length(y_range), byrow = FALSE)
# Create the surface plot
plot_ly(x = ~y_range, y = ~x_range, z = ~density_matrix, type = "surface") %>%
layout(title = "Fitted Density Surface Plot for Bivariate Skew Normal",
scene = list(xaxis = list(title = "X-Axis"),
yaxis = list(title = "Y-Axis"),
zaxis = list(title = "Density")))
density_matrix <- matrix(grid$density3, nrow = length(y_range), byrow = FALSE)
# Create the surface plot
plot_ly(x = ~y_range, y = ~x_range, z = ~density_matrix, type = "surface") %>%
layout(title = "Fitted Density Surface Plot for Bivariate Skew Normal (df = 3)",
scene = list(xaxis = list(title = "X-Axis"),
yaxis = list(title = "Y-Axis"),
zaxis = list(title = "Density")))
density_matrix <- matrix(grid$density5, nrow = length(y_range), byrow = FALSE)
# Create the surface plot
plot_ly(x = ~y_range, y = ~x_range, z = ~density_matrix, type = "surface") %>%
layout(title = "Fitted Density Surface Plot for Bivariate Skew T (df =5)",
scene = list(xaxis = list(title = "X-Axis"),
yaxis = list(title = "Y-Axis"),
zaxis = list(title = "Density")))