Es una distribucion de probabilidad conjunta para un vector \(p\)-variado, \(\boldsymbol{X}=\left({X}_{1},{X}_{2},\ldots,{X}_{p}\right)^{t}\), tal que se tiene que:
\[ P(\boldsymbol{X}=\boldsymbol{x})=\frac{1}{\sqrt{(2{\pi})^{{p}}\left|\boldsymbol{\Sigma}\right|}}\exp{\left[-\frac{1}{2}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)^t\boldsymbol{\Sigma}^{-1}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)\right]}\text{ con }\boldsymbol{\Sigma}\text{ definida positiva }\text{ y }\boldsymbol{x}{\in}\mathbb{R}^{p} \]
\[ P(\boldsymbol{X}{\leq}\boldsymbol{x})={\int}_{-{\infty}}^{{x}_{1}}{\int}_{-{\infty}}^{{x}_{2}}{\cdots}{\int}_{-{\infty}}^{{x}_{p}}{\frac{1}{\sqrt{(2{\pi})^{{p}}\left|\boldsymbol{\Sigma}\right|}}\exp{\left[-\frac{1}{2}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)^t\boldsymbol{\Sigma}^{-1}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)\right]}{{{dx}_{p}{\cdots}{dx}_{2}}{{dx}_{1}}}\text{ con }\boldsymbol{\Sigma}\text{ definida positiva }\text{ y }\boldsymbol{x}{\in}\mathbb{R}^{p}} \]
\[ \begin{align} E\left(\boldsymbol{X}\right)&=\begin{bmatrix} E\left({X}_{1}\right)\\ E\left({X}_{2}\right)\\ \vdots\\ E\left({X}_{p}\right)\\ \end{bmatrix}\\ &=\begin{bmatrix} {\mu}_{1}\\ {\mu}_{2}\\ \vdots\\ {\mu}_{p}\\ \end{bmatrix}\\ &=\boldsymbol{\mu} \end{align} \]
\[ \begin{align} V\left(\boldsymbol{X}\right)&=E\left\{\begin{bmatrix} {X}_{1}-E\left({X}_{1}\right)\\ {X}_{2}-E\left({X}_{2}\right)\\ \vdots\\ {X}_{p}-E\left({X}_{p}\right) \end{bmatrix}\begin{bmatrix} {X}_{1}-E\left({X}_{1}\right) & {X}_{2}-E\left({X}_{2}\right) & \cdots & {X}_{p}-E\left({X}_{p}\right) \end{bmatrix}\right\}\\ &=E\left\{\begin{bmatrix} {X}_{1}-{\mu}_{1}\\ {X}_{2}-{\mu}_{2}\\ \vdots\\ {X}_{p}-{\mu}_{p} \end{bmatrix}\begin{bmatrix} {X}_{1}-{\mu}_{1} & {X}_{2}-{\mu}_{2} & \cdots & {X}_{p}-{\mu}_{p} \end{bmatrix}\right\}\\ &=E\left\{\begin{bmatrix} \left({X}_{1}-{\mu}_{1}\right)\left({X}_{1}-{\mu}_{1}\right) & \left({X}_{1}-{\mu}_{1}\right)\left({X}_{2}-{\mu}_{2}\right) & \cdots & \left({X}_{1}-{\mu}_{1}\right)\left({X}_{p}-{\mu}_{p}\right)\\ \left({X}_{2}-{\mu}_{2}\right)\left({X}_{1}-{\mu}_{1}\right) & \left({X}_{2}-{\mu}_{2}\right)\left({X}_{2}-{\mu}_{2}\right) & \cdots & \left({X}_{2}-{\mu}_{2}\right)\left({X}_{p}-{\mu}_{p}\right)\\ \vdots & \vdots & \ddots & \vdots\\ \left({X}_{p}-{\mu}_{p}\right)\left({X}_{1}-{\mu}_{1}\right) & \left({X}_{p}-{\mu}_{p}\right)\left({X}_{2}-{\mu}_{2}\right) & \cdots & \left({X}_{p}-{\mu}_{p}\right)\left({X}_{p}-{\mu}_{p}\right)\\ \end{bmatrix}\right\}\\ &=E\left\{\begin{bmatrix} {\Sigma}_{11} & {\Sigma}_{12} & \cdots & {\Sigma}_{1p}\\ {\Sigma}_{21} & {\Sigma}_{22} & \cdots & {\Sigma}_{2p}\\ \vdots & \vdots & \ddots & \vdots\\ {\Sigma}_{p1} & {\Sigma}_{p2} & \cdots & {\Sigma}_{pp}\\ \end{bmatrix}\right\}\\&=\boldsymbol{\Sigma} \end{align} \]
\[ {X}{\sim}N(\boldsymbol{\mu},\boldsymbol{\Sigma}) \]
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {10}\\ {5} \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {10} & {2}\\ {2} & {9} \end{bmatrix} \end{align} \]
library(MASS)
sample_size <- 100
sample_meanvector <- c(10, 5)
sample_covariance_matrix <- matrix(c(10, 2, 2, 9),
ncol = 2)
sample_distribution <- mvrnorm(n = sample_size,
mu = sample_meanvector,
Sigma = sample_covariance_matrix)
head(sample_distribution)## [,1] [,2]
## [1,] 6.319688 5.8218644
## [2,] 11.378163 -0.6815746
## [3,] 13.881235 6.6938157
## [4,] 8.800203 4.5545226
## [5,] 6.307523 -1.6912667
## [6,] 13.217638 7.2254946
library(ggplot2)
m <- sample_meanvector
sigma <- sample_covariance_matrix
data.grid <- expand.grid(s.1 = seq(min(sample_distribution[,1]), max(sample_distribution[,1]), length.out=200), s.2 = seq(min(sample_distribution[,2]), max(sample_distribution[,2]), length.out=200))
q.samp <- cbind(data.grid, prob = mvtnorm::dmvnorm(data.grid, mean = m, sigma = sigma))
ggplot(q.samp, aes(x=s.1, y=s.2)) +
geom_raster(aes(fill = prob)) +
coord_fixed(xlim = c(min(sample_distribution[,1]), max(sample_distribution[,1])), ylim = c(min(sample_distribution[,2]), max(sample_distribution[,2])), ratio = 1)library(ggplot2)
m <- sample_meanvector
sigma <- sample_covariance_matrix
data.grid <- expand.grid(s.1 = seq(min(sample_distribution[,1]), max(sample_distribution[,1]), length.out=200), s.2 = seq(min(sample_distribution[,2]), max(sample_distribution[,2]), length.out=200))
q.samp <- cbind(data.grid, prob = mvtnorm::dmvnorm(data.grid, mean = m, sigma = sigma))
ggplot(q.samp, aes(x=s.1, y=s.2, z=prob)) +
geom_contour() +
coord_fixed(xlim = c(min(sample_distribution[,1]), max(sample_distribution[,1])), ylim = c(min(sample_distribution[,2]), max(sample_distribution[,2])), ratio = 1)\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {7}\\ {5}\\ {9}\\ {20} \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {5} & {4} & {3} & {2} & {1}\\ {4} & {5} & {4} & {3} & {2}\\ {3} & {4} & {5} & {4} & {3}\\ {2} & {3} & {4} & {5} & {4}\\ {1} & {2} & {3} & {4} & {5} \end{bmatrix} \end{align} \]
library(MASS)
sample_size <- 1000
sample_meanvector <- c(10, 5, 7, 9, 20)
sample_covariance_matrix <- matrix(c(5, 4, 3, 2, 1, 4, 5, 4, 3, 2,
3, 4, 5, 4, 3, 2, 3, 4, 5, 4, 1,
2, 3, 4, 5), ncol = 5)
sample_distribution <- mvrnorm(n = sample_size,
mu = sample_meanvector,
Sigma = sample_covariance_matrix)
head(sample_distribution)## [,1] [,2] [,3] [,4] [,5]
## [1,] 9.858567 5.409073 5.633253 9.422846 21.08710
## [2,] 10.543810 3.071987 4.965511 5.443857 17.24773
## [3,] 8.424290 1.182142 2.398722 5.668373 17.35142
## [4,] 12.290150 8.726828 10.608007 11.997714 21.33830
## [5,] 9.331325 1.854857 4.557036 6.740062 19.28182
## [6,] 14.522071 10.408433 12.280664 15.008497 25.05795
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {0}\\ {0} \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {2} & {-1}\\ {-1} & {2} \end{bmatrix} \end{align} \]
library(mnormt)
set.seed(0)
x1 <- seq(-4, 4, 0.1)
x2 <- seq(-5, 5, 0.1)
mean <- c(0, 0)
cov <- matrix(c(2, -1, -1, 2), nrow=2)
f <- function(x1, x2) dmnorm(cbind(x1, x2), mean, cov)
y <- outer(x1, x2, f)a. Gráfico de contorno
contour(x1, x2, y)b. Gráfico de perspectiva
persp(x1, x2, y, theta=-20, phi=20, col = 'blue',
expand=0.8, ticktype='detailed')\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {0}\\ {0} \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {1} & {\frac{1}{2}}\\ {\frac{1}{2}} & {1} \end{bmatrix} \end{align} \]
library(MASS)
# Simulate bivariate normal data
mu <- c(0,0) # Mean
Sigma <- matrix(c(1, .5, .5, 1), 2) # Covariance matrix
# > Sigma
# [,1] [,2]
# [1,] 1.0 0.1
# [2,] 0.1 1.0
# Generate sample from N(mu, Sigma)
bivn <- mvrnorm(5000, mu = mu, Sigma = Sigma ) # from Mass package
head(bivn) ## [,1] [,2]
## [1,] 0.8974715 1.29002952
## [2,] -0.5116497 -0.05340301
## [3,] 1.7614626 0.54181724
## [4,] 1.6630053 0.54090694
## [5,] -0.1375751 0.85575517
## [6,] -0.4170124 -2.25025936
# Calculate kernel density estimate
bivn.kde <- kde2d(bivn[,1], bivn[,2], n = 50) # from MASS packageimage(bivn.kde) # from base graphics package
contour(bivn.kde, add = TRUE) library(ellipse)##
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
##
## pairs
rho <- cor(bivn)
y_on_x <- lm(bivn[,2] ~ bivn[,1]) # Regressiion Y ~ X
x_on_y <- lm(bivn[,1] ~ bivn[,2]) # Regression X ~ Y
plot_legend <- c("99% CI green", "95% CI red","90% CI blue",
"Y on X black", "X on Y brown")
plot(bivn, xlab = "X", ylab = "Y",
col = "dark blue",
main = "Bivariate Normal with Confidence Intervals")
lines(ellipse(rho), col="red") # ellipse() from ellipse package
lines(ellipse(rho, level = .99), col="green")
lines(ellipse(rho, level = .90), col="blue")
abline(y_on_x)
abline(x_on_y, col="brown")
legend(3,1,legend=plot_legend,cex = .5, bty = "n")persp(bivn.kde, phi = 45, theta = 30, shade = .1, border = NA) # from base graphics package# RGL interactive plot
library(rgl)## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl.init' failed, running with 'rgl.useNULL = TRUE'.
col2 <- heat.colors(length(bivn.kde$z))[rank(bivn.kde$z)]
persp3d(x=bivn.kde, col = col2)5 Simular una variable aleatoria multivariada normal con \(p=60\)
num.rows <- 60
num.cols <- 60
simulate <- function(n.row, n.col) {
# initiate the matrix
prob.n <- matrix(0, nrow=num.rows, ncol=num.cols)
x.seq <- seq(1, n.row)
y.seq <- seq(1, n.col)
xx <- dnorm(x.seq, mean=n.row/2, sd=12)
for (i in 1:n.row) {
y <- dnorm(i, mean=n.row/2, sd=12)
prob.n[i,] <- y * xx
}
prob.n;
}
res <- simulate(num.rows, num.cols)library(plotly)##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
fig.n <- plot_ly(z = ~res)
fig.n <- fig.n %>% add_surface()
fig.nfig.nc <- plot_ly(z = ~res,
contours = list(
z = list(
show=TRUE,
usecolormap=TRUE,
highlightcolor="#ff0000",
project=list(z=TRUE)
),
y = list(
show=TRUE,
usecolormap=TRUE,
highlightcolor="#ff0000",
project=list(y=TRUE)
),
x = list(
show=TRUE,
usecolormap=TRUE,
highlightcolor="#ff0000",
project=list(x=TRUE)
)
)
)
fig.nc <- fig.nc %>% add_surface()
fig.nccolor.vec2 <- rev(c("#F2DC5D", "#F2A359", "#DB9065", "#A4031F", "#240B36"))
# color.vec2 <- rev(c("#F7AEF8", "#B388EB", "#8093F1", "#72DDF7", "#F4F4ED"))
fig.n2 <- plot_ly(z = ~res, colors=color.vec2)
fig.n2 <- fig.n2 %>% add_surface()
fig.n2\[\boldsymbol{W}=Cholesky(\boldsymbol{R})\boldsymbol{X}\]
R = matrix(cbind(1,.80,.2, .80,1,.7, .2,.7,1),nrow=3)
U = t(chol(R))
nvars = dim(U)[1]
numobs = 100000
set.seed(1)
random.normal = matrix(rnorm(nvars*numobs,0,1), nrow=nvars, ncol=numobs);
X = U %*% random.normal
newX = t(X)
raw = as.data.frame(newX)
orig.raw = as.data.frame(t(random.normal))
names(raw) = c("response","predictor1","predictor2")
cor(raw)## response predictor1 predictor2
## response 1.0000000 0.7997999 0.1998661
## predictor1 0.7997999 1.0000000 0.7000217
## predictor2 0.1998661 0.7000217 1.0000000
library(GGally)## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
raw %>%
head(n = 100) %>%
ggpairs(title="correlogram with ggpairs()") library(GGally)
orig.raw %>%
head(n = 100) %>%
ggpairs(title="correlogram with ggpairs()") \[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 15\\ 10\\ 5 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 16 & 1 & -5\\ 1 & 36 & 1\\ -5 & 1 & 36 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 37\\ 40\\ 31 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 100 & 1 & -1\\ 1 & 1 & 4\\ -1 & 4 & 4 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 58\\ 55\\ 49 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 4 & -1 & 1\\ -1 & 25 & -3\\ 1 & -3 & 25 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 65\\ 73\\ 74 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 25 & 4 & 2\\ 4 & 4 & 2\\ 2 & 2 & 49 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 97\\ 81\\ 89 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 16 & 9 & -2\\ 9 & 9 & 8\\ -2 & 8 & 4 \end{bmatrix} \end{align} \]