Es una distribucion de probabilidad conjunta para un vector \(p\)-variado, \(\boldsymbol{X}=\left({X}_{1},{X}_{2},\ldots,{X}_{p}\right)^{t}\), tal que se tiene que:
\[ P(\boldsymbol{X}=\boldsymbol{x})=\frac{1}{\sqrt{(2{\pi})^{{p}}\left|\boldsymbol{\Sigma}\right|}}\exp{\left[-\frac{1}{2}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)^t\boldsymbol{\Sigma}^{-1}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)\right]}\text{ con }\boldsymbol{\Sigma}\text{ definida positiva }\text{ y }\boldsymbol{x}{\in}\mathbb{R}^{p} \]
\[ P(\boldsymbol{X}{\leq}\boldsymbol{x})={\int}_{-{\infty}}^{{x}_{1}}{\int}_{-{\infty}}^{{x}_{2}}{\cdots}{\int}_{-{\infty}}^{{x}_{p}}{\frac{1}{\sqrt{(2{\pi})^{{p}}\left|\boldsymbol{\Sigma}\right|}}\exp{\left[-\frac{1}{2}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)^t\boldsymbol{\Sigma}^{-1}\left(\boldsymbol{x}-\boldsymbol{\mu}\right)\right]}{{{dx}_{p}{\cdots}{dx}_{2}}{{dx}_{1}}}\text{ con }\boldsymbol{\Sigma}\text{ definida positiva }\text{ y }\boldsymbol{x}{\in}\mathbb{R}^{p}} \]
\[ \begin{align} E\left(\boldsymbol{X}\right)&=\begin{bmatrix} E\left({X}_{1}\right)\\ E\left({X}_{2}\right)\\ \vdots\\ E\left({X}_{p}\right)\\ \end{bmatrix}\\ &=\begin{bmatrix} {\mu}_{1}\\ {\mu}_{2}\\ \vdots\\ {\mu}_{p}\\ \end{bmatrix}\\ &=\boldsymbol{\mu} \end{align} \]
\[ \begin{align} V\left(\boldsymbol{X}\right)&=E\left\{\begin{bmatrix} {X}_{1}-E\left({X}_{1}\right)\\ {X}_{2}-E\left({X}_{2}\right)\\ \vdots\\ {X}_{p}-E\left({X}_{p}\right) \end{bmatrix}\begin{bmatrix} {X}_{1}-E\left({X}_{1}\right) & {X}_{2}-E\left({X}_{2}\right) & \cdots & {X}_{p}-E\left({X}_{p}\right) \end{bmatrix}\right\}\\ &=E\left\{\begin{bmatrix} {X}_{1}-{\mu}_{1}\\ {X}_{2}-{\mu}_{2}\\ \vdots\\ {X}_{p}-{\mu}_{p} \end{bmatrix}\begin{bmatrix} {X}_{1}-{\mu}_{1} & {X}_{2}-{\mu}_{2} & \cdots & {X}_{p}-{\mu}_{p} \end{bmatrix}\right\}\\ &=E\left\{\begin{bmatrix} \left({X}_{1}-{\mu}_{1}\right)\left({X}_{1}-{\mu}_{1}\right) & \left({X}_{1}-{\mu}_{1}\right)\left({X}_{2}-{\mu}_{2}\right) & \cdots & \left({X}_{1}-{\mu}_{1}\right)\left({X}_{p}-{\mu}_{p}\right)\\ \left({X}_{2}-{\mu}_{2}\right)\left({X}_{1}-{\mu}_{1}\right) & \left({X}_{2}-{\mu}_{2}\right)\left({X}_{2}-{\mu}_{2}\right) & \cdots & \left({X}_{2}-{\mu}_{2}\right)\left({X}_{p}-{\mu}_{p}\right)\\ \vdots & \vdots & \ddots & \vdots\\ \left({X}_{p}-{\mu}_{p}\right)\left({X}_{1}-{\mu}_{1}\right) & \left({X}_{p}-{\mu}_{p}\right)\left({X}_{2}-{\mu}_{2}\right) & \cdots & \left({X}_{p}-{\mu}_{p}\right)\left({X}_{p}-{\mu}_{p}\right)\\ \end{bmatrix}\right\}\\ &=E\left\{\begin{bmatrix} {\Sigma}_{11} & {\Sigma}_{12} & \cdots & {\Sigma}_{1p}\\ {\Sigma}_{21} & {\Sigma}_{22} & \cdots & {\Sigma}_{2p}\\ \vdots & \vdots & \ddots & \vdots\\ {\Sigma}_{p1} & {\Sigma}_{p2} & \cdots & {\Sigma}_{pp}\\ \end{bmatrix}\right\}\\&=\boldsymbol{\Sigma} \end{align} \]
\[ {X}{\sim}N(\boldsymbol{\mu},\boldsymbol{\Sigma}) \]
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {10}\\ {5} \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {10} & {2}\\ {2} & {9} \end{bmatrix} \end{align} \]
library(MASS)
<- 100
sample_size <- c(10, 5)
sample_meanvector <- matrix(c(10, 2, 2, 9),
sample_covariance_matrix ncol = 2)
<- mvrnorm(n = sample_size,
sample_distribution mu = sample_meanvector,
Sigma = sample_covariance_matrix)
head(sample_distribution)
## [,1] [,2]
## [1,] 6.319688 5.8218644
## [2,] 11.378163 -0.6815746
## [3,] 13.881235 6.6938157
## [4,] 8.800203 4.5545226
## [5,] 6.307523 -1.6912667
## [6,] 13.217638 7.2254946
library(ggplot2)
<- sample_meanvector
m <- sample_covariance_matrix
sigma <- expand.grid(s.1 = seq(min(sample_distribution[,1]), max(sample_distribution[,1]), length.out=200), s.2 = seq(min(sample_distribution[,2]), max(sample_distribution[,2]), length.out=200))
data.grid <- cbind(data.grid, prob = mvtnorm::dmvnorm(data.grid, mean = m, sigma = sigma))
q.samp ggplot(q.samp, aes(x=s.1, y=s.2)) +
geom_raster(aes(fill = prob)) +
coord_fixed(xlim = c(min(sample_distribution[,1]), max(sample_distribution[,1])), ylim = c(min(sample_distribution[,2]), max(sample_distribution[,2])), ratio = 1)
library(ggplot2)
<- sample_meanvector
m <- sample_covariance_matrix
sigma <- expand.grid(s.1 = seq(min(sample_distribution[,1]), max(sample_distribution[,1]), length.out=200), s.2 = seq(min(sample_distribution[,2]), max(sample_distribution[,2]), length.out=200))
data.grid <- cbind(data.grid, prob = mvtnorm::dmvnorm(data.grid, mean = m, sigma = sigma))
q.samp ggplot(q.samp, aes(x=s.1, y=s.2, z=prob)) +
geom_contour() +
coord_fixed(xlim = c(min(sample_distribution[,1]), max(sample_distribution[,1])), ylim = c(min(sample_distribution[,2]), max(sample_distribution[,2])), ratio = 1)
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {7}\\ {5}\\ {9}\\ {20} \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {5} & {4} & {3} & {2} & {1}\\ {4} & {5} & {4} & {3} & {2}\\ {3} & {4} & {5} & {4} & {3}\\ {2} & {3} & {4} & {5} & {4}\\ {1} & {2} & {3} & {4} & {5} \end{bmatrix} \end{align} \]
library(MASS)
<- 1000
sample_size <- c(10, 5, 7, 9, 20)
sample_meanvector <- matrix(c(5, 4, 3, 2, 1, 4, 5, 4, 3, 2,
sample_covariance_matrix 3, 4, 5, 4, 3, 2, 3, 4, 5, 4, 1,
2, 3, 4, 5), ncol = 5)
<- mvrnorm(n = sample_size,
sample_distribution mu = sample_meanvector,
Sigma = sample_covariance_matrix)
head(sample_distribution)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 9.858567 5.409073 5.633253 9.422846 21.08710
## [2,] 10.543810 3.071987 4.965511 5.443857 17.24773
## [3,] 8.424290 1.182142 2.398722 5.668373 17.35142
## [4,] 12.290150 8.726828 10.608007 11.997714 21.33830
## [5,] 9.331325 1.854857 4.557036 6.740062 19.28182
## [6,] 14.522071 10.408433 12.280664 15.008497 25.05795
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {0}\\ {0} \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {2} & {-1}\\ {-1} & {2} \end{bmatrix} \end{align} \]
library(mnormt)
set.seed(0)
<- seq(-4, 4, 0.1)
x1 <- seq(-5, 5, 0.1)
x2 <- c(0, 0)
mean <- matrix(c(2, -1, -1, 2), nrow=2)
cov <- function(x1, x2) dmnorm(cbind(x1, x2), mean, cov)
f <- outer(x1, x2, f) y
a. Gráfico de contorno
contour(x1, x2, y)
b. Gráfico de perspectiva
persp(x1, x2, y, theta=-20, phi=20, col = 'blue',
expand=0.8, ticktype='detailed')
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} {0}\\ {0} \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} {1} & {\frac{1}{2}}\\ {\frac{1}{2}} & {1} \end{bmatrix} \end{align} \]
library(MASS)
# Simulate bivariate normal data
<- c(0,0) # Mean
mu <- matrix(c(1, .5, .5, 1), 2) # Covariance matrix
Sigma # > Sigma
# [,1] [,2]
# [1,] 1.0 0.1
# [2,] 0.1 1.0
# Generate sample from N(mu, Sigma)
<- mvrnorm(5000, mu = mu, Sigma = Sigma ) # from Mass package
bivn head(bivn)
## [,1] [,2]
## [1,] 0.8974715 1.29002952
## [2,] -0.5116497 -0.05340301
## [3,] 1.7614626 0.54181724
## [4,] 1.6630053 0.54090694
## [5,] -0.1375751 0.85575517
## [6,] -0.4170124 -2.25025936
# Calculate kernel density estimate
<- kde2d(bivn[,1], bivn[,2], n = 50) # from MASS package bivn.kde
image(bivn.kde) # from base graphics package
contour(bivn.kde, add = TRUE)
library(ellipse)
##
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
##
## pairs
<- cor(bivn)
rho <- lm(bivn[,2] ~ bivn[,1]) # Regressiion Y ~ X
y_on_x <- lm(bivn[,1] ~ bivn[,2]) # Regression X ~ Y
x_on_y <- c("99% CI green", "95% CI red","90% CI blue",
plot_legend "Y on X black", "X on Y brown")
plot(bivn, xlab = "X", ylab = "Y",
col = "dark blue",
main = "Bivariate Normal with Confidence Intervals")
lines(ellipse(rho), col="red") # ellipse() from ellipse package
lines(ellipse(rho, level = .99), col="green")
lines(ellipse(rho, level = .90), col="blue")
abline(y_on_x)
abline(x_on_y, col="brown")
legend(3,1,legend=plot_legend,cex = .5, bty = "n")
persp(bivn.kde, phi = 45, theta = 30, shade = .1, border = NA) # from base graphics package
# RGL interactive plot
library(rgl)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl.init' failed, running with 'rgl.useNULL = TRUE'.
<- heat.colors(length(bivn.kde$z))[rank(bivn.kde$z)]
col2 persp3d(x=bivn.kde, col = col2)
5 Simular una variable aleatoria multivariada normal con \(p=60\)
<- 60
num.rows <- 60
num.cols
<- function(n.row, n.col) {
simulate # initiate the matrix
<- matrix(0, nrow=num.rows, ncol=num.cols)
prob.n
<- seq(1, n.row)
x.seq <- seq(1, n.col)
y.seq
<- dnorm(x.seq, mean=n.row/2, sd=12)
xx
for (i in 1:n.row) {
<- dnorm(i, mean=n.row/2, sd=12)
y <- y * xx
prob.n[i,]
}
prob.n;
}<- simulate(num.rows, num.cols) res
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
<- plot_ly(z = ~res)
fig.n <- fig.n %>% add_surface()
fig.n fig.n
<- plot_ly(z = ~res,
fig.nc contours = list(
z = list(
show=TRUE,
usecolormap=TRUE,
highlightcolor="#ff0000",
project=list(z=TRUE)
),y = list(
show=TRUE,
usecolormap=TRUE,
highlightcolor="#ff0000",
project=list(y=TRUE)
),
x = list(
show=TRUE,
usecolormap=TRUE,
highlightcolor="#ff0000",
project=list(x=TRUE)
)
)
)<- fig.nc %>% add_surface()
fig.nc fig.nc
<- rev(c("#F2DC5D", "#F2A359", "#DB9065", "#A4031F", "#240B36"))
color.vec2 # color.vec2 <- rev(c("#F7AEF8", "#B388EB", "#8093F1", "#72DDF7", "#F4F4ED"))
<- plot_ly(z = ~res, colors=color.vec2)
fig.n2 <- fig.n2 %>% add_surface()
fig.n2 fig.n2
\[\boldsymbol{W}=Cholesky(\boldsymbol{R})\boldsymbol{X}\]
= matrix(cbind(1,.80,.2, .80,1,.7, .2,.7,1),nrow=3)
R = t(chol(R))
U = dim(U)[1]
nvars = 100000
numobs set.seed(1)
= matrix(rnorm(nvars*numobs,0,1), nrow=nvars, ncol=numobs);
random.normal = U %*% random.normal
X = t(X)
newX = as.data.frame(newX)
raw = as.data.frame(t(random.normal))
orig.raw names(raw) = c("response","predictor1","predictor2")
cor(raw)
## response predictor1 predictor2
## response 1.0000000 0.7997999 0.1998661
## predictor1 0.7997999 1.0000000 0.7000217
## predictor2 0.1998661 0.7000217 1.0000000
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
%>%
raw head(n = 100) %>%
ggpairs(title="correlogram with ggpairs()")
library(GGally)
%>%
orig.raw head(n = 100) %>%
ggpairs(title="correlogram with ggpairs()")
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 15\\ 10\\ 5 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 16 & 1 & -5\\ 1 & 36 & 1\\ -5 & 1 & 36 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 37\\ 40\\ 31 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 100 & 1 & -1\\ 1 & 1 & 4\\ -1 & 4 & 4 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 58\\ 55\\ 49 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 4 & -1 & 1\\ -1 & 25 & -3\\ 1 & -3 & 25 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 65\\ 73\\ 74 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 25 & 4 & 2\\ 4 & 4 & 2\\ 2 & 2 & 49 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\mu}&=\begin{bmatrix} 97\\ 81\\ 89 \end{bmatrix} \end{align} \]
\[ \begin{align} \boldsymbol{\Sigma}&=\begin{bmatrix} 16 & 9 & -2\\ 9 & 9 & 8\\ -2 & 8 & 4 \end{bmatrix} \end{align} \]