Distribuciones de probabilidad multivariadas

Distribución de probabilidad normal multivariada \(\boldsymbol{X}{\sim}{\boldsymbol{t}}_{\boldsymbol{\nu}}(\boldsymbol{\mu},\boldsymbol{\Sigma})\)

Es una distribucion de probabilidad conjunta para un vector \(p\)-variado, \(\boldsymbol{X}=\left({X}_{1},{X}_{2},\ldots,{X}_{p}\right)^{t}\), tal que se tiene que:

Función de probabilidades puntuales \(f_{X}\)

\[ P(\boldsymbol{X}=\boldsymbol{x})=\frac{{\Gamma}\left(\frac{\nu+p}{2}\right)}{{\Gamma}\left(\frac{\nu+p}{2}\right)\sqrt{({\nu\pi})^{{p}}\left|\boldsymbol{\Sigma}\right|}}\exp{\left[1+\frac{1}{\nu}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)^t\boldsymbol{\Sigma}^{-1}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)\right]}^{-\frac{\nu+p}{2}}\text{ con }\boldsymbol{\Sigma}\text{ definida positiva }\text{ y }\boldsymbol{x}{\in}\mathbb{R}^{p} \]

Función de probabilidades acumuladas \(F_{X}\)

\[ P(\boldsymbol{X}{\leq}\boldsymbol{x})={\int}_{-{\infty}}^{{x}_{1}}{\int}_{-{\infty}}^{{x}_{2}}{\cdots}{\int}_{-{\infty}}^{{x}_{p}}{\frac{{\Gamma}\left(\frac{\nu+p}{2}\right)}{{\Gamma}\left(\frac{\nu+p}{2}\right)\sqrt{({\nu\pi})^{{p}}\left|\boldsymbol{\Sigma}\right|}}\exp{\left[1+\frac{1}{\nu}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)^t\boldsymbol{\Sigma}^{-1}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)\right]}^{-\frac{\nu+p}{2}}{{{dx}_{p}{\cdots}{dx}_{2}}{{dx}_{1}}}\text{ con }\boldsymbol{\Sigma}\text{ definida positiva }\text{ y }\boldsymbol{x}{\in}\mathbb{R}^{p}} \]

Esperanza \(\mu_{x}\)

\[ \begin{align} \boldsymbol{\mu}\text{ si }\nu>1 \end{align} \]

Varianza \(\sigma_{x}^{2}\)

\[ \begin{align} \frac{\nu}{\nu+2}\boldsymbol{\Sigma}\text{ si }\nu>2 \end{align} \]

Ejemplos

\[ {X}{\sim}N(\boldsymbol{\mu},\boldsymbol{\Sigma}) \]

Simular una variable aleatoria normal bivariada

\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {10}\\ {5} \end{bmatrix} \end{align} \]

\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {10} & {2}\\ {2} & {9} \end{bmatrix} \end{align} \]

library(MASS)
library(mvtnorm)

sample_size <- 100                                  
sample_meanvector <- c(10, 5)                               
sample_covariance_matrix <- matrix(c(10, 2, 2, 9),
                                ncol = 2)

sample_distribution <- rmvt(n = sample_size,
                            sigma = sample_covariance_matrix)

head(sample_distribution)

##              [,1]       [,2]
## [1,]   0.65176680  -4.131940
## [2,]  -0.09794373  -2.973278
## [3,]  -0.56102019   1.395545
## [4,]  -3.17918178   1.308432
## [5,] -53.54947537 -22.830093
## [6,]  -0.88525287 -12.302813

library(ggplot2)
m <- sample_meanvector
sigma <- sample_covariance_matrix
data.grid <- expand.grid(s.1 = seq(min(sample_distribution[,1]), max(sample_distribution[,1]), length.out=200), s.2 = seq(min(sample_distribution[,2]), max(sample_distribution[,2]), length.out=200))
q.samp <- cbind(data.grid, prob = mvtnorm::dmvnorm(data.grid, mean = m, sigma = sigma))
ggplot(q.samp, aes(x=s.1, y=s.2)) + 
    geom_raster(aes(fill = prob)) +
    coord_fixed(xlim = c(min(sample_distribution[,1]), max(sample_distribution[,1])), ylim = c(min(sample_distribution[,2]), max(sample_distribution[,2])), ratio = 1)

library(ggplot2)
m <- sample_meanvector
sigma <- sample_covariance_matrix
data.grid <- expand.grid(s.1 = seq(min(sample_distribution[,1]), max(sample_distribution[,1]), length.out=200), s.2 = seq(min(sample_distribution[,2]), max(sample_distribution[,2]), length.out=200))
q.samp <- cbind(data.grid, prob = mvtnorm::dmvnorm(data.grid, mean = m, sigma = sigma))
ggplot(q.samp, aes(x=s.1, y=s.2, z=prob)) + 
    geom_contour() +
    coord_fixed(xlim = c(min(sample_distribution[,1]), max(sample_distribution[,1])), ylim = c(min(sample_distribution[,2]), max(sample_distribution[,2])), ratio = 1)

Simular una variable aleatoria normal multivariada, con \(p=5\)

\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {0}\\ {0}\\ {0}\\ {0} \end{bmatrix} \end{align} \]

\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {5} & {4} & {3} & {2} & {1}\\ {4} & {5} & {4} & {3} & {2}\\ {3} & {4} & {5} & {4} & {3}\\ {2} & {3} & {4} & {5} & {4}\\ {1} & {2} & {3} & {4} & {5} \end{bmatrix} \end{align} \]

library(MASS)
library(mvtnorm)

sample_size <- 1000                                 
sample_meanvector <- c(0, 0, 0, 0, 0)                               
sample_covariance_matrix <- matrix(c(5, 4, 3, 2, 1, 4, 5, 4, 3, 2,
                                    3, 4, 5, 4, 3, 2, 3, 4, 5, 4, 1,
                                    2, 3, 4, 5), ncol = 5)

sample_distribution <- rmvt(n = sample_size,
                            sigma = sample_covariance_matrix)

head(sample_distribution)

##             [,1]        [,2]        [,3]        [,4]       [,5]
## [1,]   3.1088583  -2.8520505  -3.7667870  -6.4602392  -6.199657
## [2,]  -2.8416410  -3.3669288  -5.8318921  -6.4024676  -1.431370
## [3,]   0.3400872   0.6150435   0.1716662  -0.5150331   0.481132
## [4,] -36.1503163 -27.3108577 -39.4769277 -35.3203805 -40.074674
## [5,]  14.0693028  10.9462488  11.8960175   5.1427311   6.611403
## [6,]   0.7121452   0.7471845   1.1156762   1.4007639   1.584840

Simular una variable aleatoria normal bivariada

\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {0}\\ {0} \end{bmatrix} \end{align} \]

\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {2} & {-1}\\ {-1} & {2} \end{bmatrix} \end{align} \]

library(mnormt)
set.seed(0)
x1 <- seq(-4, 4, 0.1)
x2 <- seq(-5, 5, 0.1)
mean <- c(0, 0)
cov <- matrix(c(2, -1, -1, 2), nrow=2)
f <- function(x1, x2) dmt(cbind(x1, x2), mean, cov)
y <- outer(x1, x2, f)

a. Gráfico de contorno

contour(x1, x2, y)

b. Gráfico de perspectiva

persp(x1, x2, y, theta=-20, phi=20, col = 'blue',
      expand=0.8, ticktype='detailed')

x <- c(1.23, 4.56)
mu <- 1:2
Sigma <- diag(2)
x0 <- dmvt(x, delta = mu, sigma = Sigma, df = 0) # default log = TRUE!
x8 <- dmvt(x, delta = mu, sigma = Sigma, df = Inf) # default log = TRUE!
xn <- dmvnorm(x, mean = mu, sigma = Sigma, log = TRUE)
stopifnot(identical(x0, x8), identical(x0, xn))

x <- rmvt(100, sigma = diag(2), df = 3) # t_3(0, diag(2)) sample

x.kde <- kde2d(x[,1], x[,2], n = 50)

image(x.kde)
contour(x.kde, add = TRUE)

Simular una variable aleatoria multivariada t - student

num.rows <- 60
num.cols <- 60

simulate <- function(n.row, n.col) {
  # initiate the matrix
  prob.n <- matrix(0, nrow=num.rows, ncol=num.cols)
  
  x.seq <- seq(1, n.row)
  y.seq <- seq(1, n.col)
  
  xx <- dt(x.seq, df=2)
  
  for (i in 1:n.row) {
    
    y <- dt(i, df=2)
    prob.n[i,] <- y * xx
  }
  prob.n;
}
res <- simulate(num.rows, num.cols)

library(plotly)

fig.n <- plot_ly(z = ~res)
fig.n <- fig.n %>% add_surface()
fig.n

num.rows <- 100
num.cols <- 100

simulate <- function(n.row, n.col) {
  # initiate the matrix
  prob.n <- matrix(0, nrow=num.rows, ncol=num.cols)
  
  x.seq <- seq(1, n.row)
  y.seq <- seq(1, n.col)
  
  xx <- dt(x.seq, df=12)
  
  for (i in 1:n.row) {
    
    y <- dt(i, df=12)
    prob.n[i,] <- y * xx
  }
  prob.n;
}
res <- simulate(num.rows, num.cols)

fig.nc <- plot_ly(z = ~res,
     contours = list(
       z = list(
         show=TRUE,
         usecolormap=TRUE,
         highlightcolor="#ff0000",
         project=list(z=TRUE)
       ),
       y = list(
         show=TRUE,
         usecolormap=TRUE, 
         highlightcolor="#ff0000",
         project=list(y=TRUE)
       ),
       
       x = list(
         show=TRUE,
         usecolormap=TRUE,
         highlightcolor="#ff0000",
         project=list(x=TRUE)
       )
     )
  )
fig.nc <- fig.nc %>% add_surface()
fig.nc

color.vec2 <- rev(c("#F2DC5D", "#F2A359", "#DB9065", "#A4031F", "#240B36"))
# color.vec2 <- rev(c("#F7AEF8", "#B388EB", "#8093F1", "#72DDF7", "#F4F4ED"))
fig.n2 <- plot_ly(z = ~res, colors=color.vec2)
fig.n2 <- fig.n2 %>% add_surface()
fig.n2

Ejercicios

Simular una variable aleatoria continua que tenga distribución t - student con parámetros:

\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 4\\ 19\\ 10 \end{bmatrix} \end{align} \]

\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 81 & -2 & -3\\ -2 & 49 & 4\\ -3 & 4 & 81 \end{bmatrix} \end{align} \]

Simular una variable aleatoria continua que tenga distribución t - student con parámetros:

\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 35\\ 35\\ 30 \end{bmatrix} \end{align} \]

\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 4 & -3 & 0\\ -3 & 1 & 6\\ 0 & 6 & 1 \end{bmatrix} \end{align} \]

Simular una variable aleatoria continua que tenga distribución t - student con parámetros:

\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 52\\ 58\\ 54 \end{bmatrix} \end{align} \]

\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 9 & -3 & 4\\ -3 & 25 & 5\\ 4 & 5 & 64 \end{bmatrix} \end{align} \]

Simular una variable aleatoria continua que tenga distribución t - student con parámetros:

\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 62\\ 75\\ 67 \end{bmatrix} \end{align} \]

\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 16 & -7 & 4\\ -7 & 16 & -3\\ 4 & -3 & 9 \end{bmatrix} \end{align} \]

Simular una variable aleatoria continua que tenga distribución t - student con parámetros:

\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 88\\ 90\\ 97 \end{bmatrix} \end{align} \]

\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 4 & 2 & 0\\ 2 & 36 & 1\\ 0 & 1 & 25 \end{bmatrix} \end{align} \]