library(MASS)
library(tidyverse)
library(mvtnorm)
library(MVN)
library(plotly)
library(RColorBrewer)
library(metR)
library(plot3D)
library(ellipse)
Neka je \(\mathbf{X}=\begin{bmatrix}X_1 & X_2 & \cdots & X_k\end{bmatrix}^T\) slučajni vektor koji ima višedimenzionalnu Gaussovu distribuciju \(\mathbf{X}\sim N_k(\mathbf{\mu},\Sigma)\). Funkcija gustoće slučajnog vektora \(\mathbf{X}\) dana je s \[f(\mathbf{X})=\frac{1}{\sqrt{(2\pi)^k\det{\Sigma}}}\exp{\left(-\frac{1}{2}(\mathbf{X}-\mathbf{\mu})^T\Sigma^{-1}(\mathbf{X}-\mathbf{\mu})\right)}\] pri čemu je \(\mathbf{\mu}\) vektor očekivanja, a \(\Sigma\) pozitivno definitna matrica kovarijanci od slučajnog vektora \(\mathbf{X}\). \[\mathbf{\mu}=\begin{bmatrix}\mu_1\\ \mu_2\\ \vdots\\ \mu_k\end{bmatrix}\qquad \Sigma=\begin{bmatrix}\sigma_{11}&\sigma_{12}&\cdots& \sigma_{1k}\\ \sigma_{21}&\sigma_{22}&\cdots& \sigma_{2k}\\ \vdots&\vdots&\ddots&\vdots\\ \sigma_{k1}&\sigma_{k2}&\cdots& \sigma_{kk}\end{bmatrix}\]
Funkcija distribucije slučajnog vektora \(\mathbf{X}\) definirana je s \[F(x_1,x_2,\dotsc,x_n)=P(X_1\leqslant x_1, X_2\leqslant x_2,\dotsc, X_n\leqslant x_n)=\int\limits_{-\infty}^{x_1}\int\limits_{-\infty}^{x_2}\dots\int\limits_{-\infty}^{x_n}{f(u_1,u_2,\dotsc,u_n)\,\mathrm{d}u_1}\mathrm{d}u_2\cdots\mathrm{d}u_n.\]
Pogledajmo dvodimenzionalni slučaj s parametrima
\[\mathbf{\mu}=\begin{bmatrix}-1\\ 2\end{bmatrix}\qquad \Sigma=\begin{bmatrix}1&0.5\\ 0.5&3\end{bmatrix}\]
mu1 <- c(-1,2)
sigma1 <- matrix(c(1, 0.5, 0.5, 3), nrow = 2)
sigma1
## [,1] [,2]
## [1,] 1.0 0.5
## [2,] 0.5 3.0
Za računanje vrijednosti funkcije gustoće koristimo naredbu
dmvnorm iz biblioteke mvtnorm.
x1 <- seq(-6, 4, length = 50)
y1 <- seq(-5, 9, length = 50)
mreza1 <- expand_grid(x = x1, y = y1)
dens1 <- dmvnorm(as.matrix(mreza1), mean = mu1, sigma = sigma1)
dens1 <- matrix(dens1, nrow = 50, byrow = TRUE)
persp3D(x = x1, y = y1, z = dens1, col = "pink",
shade = NA, border = "black", ticktype = "detailed",
expand = 0.6, bty = "g", theta = 30, phi = 30,
xlab = "x", ylab = "y", zlab = "z",
lighting = list(ambient = 0.5, exponent = 70, specular = 0.2),
ltheta = 50)
plot_ly(x = x1, y = y1, z = dens1) %>% add_surface(colors = "Set2") %>%
layout(scene = list(aspectratio = list(x = 0.7, y = 1, z = 0.5)))
Nivo linije su općenito elipse sa središtem u vektoru očekivanih vrijednosti \((-1,2)\). U nastavku dajemo nekoliko primjera vizualizacija.
plot_ly(x = x1, y = y1, z = dens1,
type = "contour", autocontour = F,
contours = list(start = 0.01, end = 0.09, size = 0.01, showlabels = TRUE),
colors = "Set2") %>% layout(scene = list(aspectration=list(x = 1, y = 1)))
Interaktivna slika s nivo-linijama
mreza2 <- expand_grid(x = seq(-6, 4, length = 150),
y = seq(-5, 9, length = 150))
dens2 <- dmvnorm(as.matrix(mreza2), mean = mu1, sigma = sigma1)
mreza2 <- mreza2 %>% mutate(z = dens2)
ggplot(mreza2, aes(x = x, y = y, z = z)) + geom_contour(bins = 40) +
coord_equal() + scale_x_continuous(breaks = -6:4, limits = c(-6,4)) +
scale_y_continuous(breaks = -3:7, limits = c(-3,7))
ggplot2 slika bez bojanja
ggplot(mreza2, aes(x = x, y = y)) + geom_raster(aes(fill = z)) +
scale_fill_gradientn(colors = brewer.pal(8, "Set2")) + coord_fixed() +
scale_x_continuous(breaks = -6:4, limits = c(-6,4)) +
scale_y_continuous(breaks = -3:7, limits = c(-3,7))
ggplot2 slika s bojanjem
ggplot(mreza2, aes(x, y, z = z)) +
geom_contour_fill(bins = 10) +
geom_contour(color = "black", bins = 10) +
geom_text_contour(skip = 2, stroke = 0.15, label.placer = label_placer_minmax()) +
scale_fill_gradientn(name = "z", colors = brewer.pal(8, "Set2")) + coord_fixed() +
scale_x_continuous(breaks = -6:4, limits = c(-6,4)) +
scale_y_continuous(breaks = -3:7, limits = c(-3,7))
metR slika s oznakama
Pogledajmo kako parametar \(\Sigma\) utječe na izgled nivo-linija.
\[\mathbf{\mu}=\begin{bmatrix}-1\\ 2\end{bmatrix}, \Sigma_1=\begin{bmatrix}1&0\\ 0&1\end{bmatrix}, \Sigma_2=\begin{bmatrix}0.5&0\\ 0&1\end{bmatrix}, \Sigma_3=\begin{bmatrix}1&0.3\\ 0.3&1\end{bmatrix}\\[20pt] \Sigma_4=\begin{bmatrix}1&-0.6\\ -0.6&1\end{bmatrix}, \Sigma_5=\begin{bmatrix}1&0.8\\ 0.8&1\end{bmatrix}, \Sigma_6=\begin{bmatrix}1&0.4\\ 0.4&0.5\end{bmatrix}\]
sigme <- list(sigma1 = matrix(c(1, 0, 0, 1), nrow = 2),
sigma2 = matrix(c(0.5, 0, 0, 1), nrow = 2),
sigma3 = matrix(c(1, 0.3, 0.3, 1), nrow = 2),
sigma4 = matrix(c(1, -0.6, -0.6, 1), nrow = 2),
sigma5 = matrix(c(1, 0.8, 0.8, 1), nrow = 2),
sigma6 = matrix(c(1, 0.4, 0.4, 0.5), nrow = 2))
mreza3 <- expand_grid(x = seq(-4, 3, length = 150),
y = seq(-1, 5, length = 150))
podaci <- map_dfr(sigme, dmvnorm, x = as.matrix(mreza3), mean = c(-1,2))
podaci <- mreza3 %>% bind_cols(podaci) %>%
pivot_longer(-c("x", "y"), names_to = "sigma", values_to = "vrijednost")
ggplot(podaci, aes(x = x, y = y, z = vrijednost)) +
geom_contour2(aes(label = after_stat(level)), color = "blue", label_colour = "black",
binwidth = 0.05, label.placer = label_placer_fraction()) +
facet_wrap(vars(sigma)) + coord_fixed()
Osi elipse imaju smjerove svojstvenih vektora matrice \(\Sigma\), a omjer duljina osi jednak je omjeru svojstvenih vrijednosti matrice \(\Sigma\).
Vrijednost višedimenzionalne Gaussove funkcije distribucije možemo
odrediti pomoću naredbe pmvnorm iz biblioteke
mvtnorm. Pogledajmo opet dvodimenzionalni slučaj s istim
parametrima kao i kod funkcije gustoće.
mreza4 <- mesh(x1, y1)
mreza4 <- tibble(x = as.numeric(mreza4$x), y = as.numeric(mreza4$y))
mreza4 <- mreza4 %>% rowwise() %>%
mutate(rez1 = pmvnorm(upper = c(x,y), mean = mu1, sigma = sigma1, keepAttr = FALSE),
rez2 = pmvnorm(lower = c(x,y), mean = mu1, sigma = sigma1, keepAttr = FALSE))
par(mai = c(0,0.3,0,0.3))
persp3D(x = x1, y = y1, z = matrix(mreza4$rez1, nrow = 50), col = "pink",
shade = NA, border = "black", ticktype = "detailed",
expand = 0.6, bty = "g", theta = 30, phi = 30,
xlab = "x", ylab = "y", zlab = "z",
lighting = list(ambient = 0.5, exponent = 50, specular = 0.2),
ltheta = 30)
Funkcija distribucije \(F(x,y)=P(X\leqslant x, Y\leqslant y)\)
par(mai = c(0,0.3,0,0.3))
persp3D(x = x1, y = y1, z = matrix(mreza4$rez2, nrow = 50), col = "pink",
shade = NA, border = "black", ticktype = "detailed",
expand = 0.6, bty = "g", theta = 60, phi = 30,
xlab = "x", ylab = "y", zlab = "z",
lighting = list(ambient = 0.5, exponent = 50, specular = 0.2),
ltheta = 30)
\(G(x,y)=P(X\geqslant x, Y\geqslant y)\)
plot_ly(x = x1, y = y1, z = matrix(mreza4$rez1, nrow = 50)) %>%
add_surface(colors = "Set2") %>%
layout(scene = list(aspectratio = list(x = 0.7, y = 1, z = 0.5),
camera = list(eye = list(x = -1.25, y = -1.25, z = 1.25))),
title = "Funkcija distribucije F(x, y) = P(X ⩽ x, Y ⩽ y)")
plot_ly(x = x1, y = y1, z = matrix(mreza4$rez2, nrow = 50)) %>%
add_surface(colors = "Set2") %>%
layout(scene = list(aspectratio = list(x = 0.7, y = 1, z = 0.5),
camera = list(eye = list(x = -1.25, y = -1.25, z = 1.25))),
title = "Funkcija G(x, y) = P(X ⩾ x, Y ⩾ y)")
plot_ly(x = x1, y = y1, z = matrix(mreza4$rez1, nrow = 50),
type = "contour", autocontour = F,
contours = list(start = 0.001, end = 1, size = 0.1, showlabels = TRUE),
colors = "Set2") %>%
layout(scene = list(aspectratio = list(x = 1, y = 1)),
title = "Funkcija distribucije F(x, y) = P(X ⩽ x, Y ⩽ y)")
Interaktivna slika s nivo-linijama
plot_ly(x = x1, y = y1, z = matrix(mreza4$rez2, nrow = 50),
type = "contour", autocontour = F,
contours = list(start = 0.001, end = 1, size = 0.1, showlabels = TRUE),
colors = "Set2") %>%
layout(scene = list(aspectratio = list(x = 1, y = 1)),
title = "Funkcija G(x, y) = P(X ⩾ x, Y ⩾ y)")
Interaktivna slika s nivo-linijama
mreza5 <- mesh(seq(-6, 4, length = 150), seq(-5, 9, length = 150))
mreza5 <- tibble(x = as.numeric(mreza5$x), y = as.numeric(mreza5$y))
mreza5 <- mreza5 %>% rowwise() %>%
mutate(rez1 = pmvnorm(upper = c(x,y), mean = mu1, sigma = sigma1, keepAttr = FALSE),
rez2 = pmvnorm(lower = c(x,y), mean = mu1, sigma = sigma1, keepAttr = FALSE))
ggplot(mreza5, aes(x = x, y = y, z = rez1)) + geom_contour(bins = 40) +
coord_equal() + scale_x_continuous(breaks = -6:4, limits = c(-6,4)) +
scale_y_continuous(breaks = -3:7, limits = c(-3,7)) +
ggtitle("Funkcija distribucije F(x, y) = P(X ⩽ x, Y ⩽ y)") +
theme(plot.title = ggtext::element_markdown())
ggplot2 slika bez bojanja
ggplot(mreza5, aes(x = x, y = y, z = rez2)) + geom_contour(bins = 40) +
coord_equal() + scale_x_continuous(breaks = -6:4, limits = c(-6,4)) +
scale_y_continuous(breaks = -3:7, limits = c(-3,7)) +
ggtitle("Funkcija G(x, y) = P(X ⩾ x, Y ⩾ y)") +
theme(plot.title = ggtext::element_markdown())
ggplot2 slika bez bojanja
ggplot(mreza5, aes(x = x, y = y)) + geom_raster(aes(fill = rez1)) +
scale_fill_gradientn(name = "z", colors = brewer.pal(8, "Set2")) + coord_fixed() +
scale_x_continuous(breaks = -6:4, limits = c(-6,4)) +
scale_y_continuous(breaks = -3:7, limits = c(-3,7)) +
ggtitle("Funkcija distribucije F(x, y) = P(X ⩽ x, Y ⩽ y)") +
theme(plot.title = ggtext::element_markdown())
ggplot2 slika s bojanjem
ggplot(mreza5, aes(x = x, y = y)) + geom_raster(aes(fill = rez2)) +
scale_fill_gradientn(name = "z", colors = brewer.pal(8, "Set2")) + coord_fixed() +
scale_x_continuous(breaks = -6:4, limits = c(-6,4)) +
scale_y_continuous(breaks = -3:7, limits = c(-3,7)) +
ggtitle("Funkcija G(x, y) = P(X ⩾ x, Y ⩾ y)") +
theme(plot.title = ggtext::element_markdown())
ggplot2 slika s bojanjem
ggplot(mreza5, aes(x, y, z = rez1)) +
geom_contour_fill(bins = 15) +
geom_contour(color = "black", bins = 15) +
geom_text_contour(skip = 1, stroke = 0.15, label.placer = label_placer_minmax()) +
scale_fill_gradientn(name = "z", colors = brewer.pal(8, "Set2")) + coord_fixed() +
scale_x_continuous(breaks = -6:4, limits = c(-6,4)) +
scale_y_continuous(breaks = -3:7, limits = c(-3,7)) +
ggtitle("Funkcija distribucije F(x, y) = P(X ⩽ x, Y ⩽ y)") +
theme(plot.title = ggtext::element_markdown())
metR slika s oznakama
ggplot(mreza5, aes(x, y, z = rez2)) +
geom_contour_fill(bins = 15) +
geom_contour(color = "black", bins = 15) +
geom_text_contour(skip = 1, stroke = 0.15, label.placer = label_placer_minmax()) +
scale_fill_gradientn(name = "z", colors = brewer.pal(8, "Set2")) + coord_fixed() +
scale_x_continuous(breaks = -6:4, limits = c(-6,4)) +
scale_y_continuous(breaks = -3:7, limits = c(-3,7)) +
ggtitle("Funkcija G(x, y) = P(X ⩾ x, Y ⩾ y)") +
theme(plot.title = ggtext::element_markdown())
metR slika s oznakama
Napomena.
pmvnorm iz biblioteke mvtnorm nam
omogućuje da možemo izračunati vrijednosti svih takvih funkcija u nekoj
točki \((a,b)\) tako da postavimo
odgovarajuće vrijednosti parametara lower i
upper.| Vjerojatnost | postavke za lower i
upper |
|---|---|
| \(P(X\leqslant a, Y\leqslant b)\) | lower = c(-Inf, -Inf),
upper = c(a, b) |
| \(P(X\geqslant a, Y\geqslant b)\) | lower = c(a, b),
upper = c(Inf, Inf) |
| \(P(X\leqslant a, Y\geqslant b)\) | lower = c(-Inf, b),
upper = c(a, Inf) |
| \(P(X\geqslant a, Y\leqslant b)\) | lower = c(a, -Inf),
upper = c(Inf, b) |
lower = c(u, s) i
upper = c(v, t).lower i
upper nisu eksplicitno navedene, tada je
lower = c(-Inf, -Inf) i
upper = c(Inf, Inf).lower i upper moraju
biti \(n\)-dimenzionalni vektori.pmvnorm(upper = c(2, 4), mean = mu1, sigma = sigma1)
## [1] 0.8751033
## attr(,"error")
## [1] 1e-15
## attr(,"msg")
## [1] "Normal Completion"
pmvnorm(lower = c(-Inf, -Inf), upper = c(2, 4), mean = mu1, sigma = sigma1)
## [1] 0.8751033
## attr(,"error")
## [1] 1e-15
## attr(,"msg")
## [1] "Normal Completion"
pmvnorm(upper = c(2, 4), mean = mu1, sigma = sigma1, keepAttr = FALSE)
## [1] 0.8751033
Vjerojatnost \(P(X\leqslant2, Y\leqslant4)\) jednaka je volumenu tijela kojeg funkcija gustoće zatvara s \(xy\)-ravninom na beskonačnom pravokutniku \(\langle -\infty,2]\times\langle -\infty,4]\) kako je prikazano na donjoj slici.
t1 <- seq(-6, 2, length = 30)
t2 <- seq(-5, 4, length = 30)
grid_t1t2 <- mesh(t1, t2)
grid_t1t2 <- tibble(x = as.numeric(grid_t1t2$x), y = as.numeric(grid_t1t2$y))
t12 <- dmvnorm(grid_t1t2, mean = mu1, sigma = sigma1)
t12 <- matrix(t12, nrow = 30)
t3 <- seq(2, 4, length = 15)
t4 <- seq(-5, 9, length = 40)
grid_t3t4 <- mesh(t3, t4)
grid_t3t4 <- tibble(x = as.numeric(grid_t3t4$x), y = as.numeric(grid_t3t4$y))
t34 <- dmvnorm(grid_t3t4, mean = mu1, sigma = sigma1)
t34 <- matrix(t34, nrow = 15)
t5 <- seq(-6, 2, length = 30)
t6 <- seq(4, 9, length = 20)
grid_t5t6 <- mesh(t5, t6)
grid_t5t6 <- tibble(x = as.numeric(grid_t5t6$x), y = as.numeric(grid_t5t6$y))
t56 <- dmvnorm(grid_t5t6, mean = mu1, sigma = sigma1)
t56 <- matrix(t56, nrow = 30)
persp3D(x = t1, y = t2, z = t12, curtain = TRUE, col = "pink",
shade = 0.1, ticktype = "detailed", border = "black",
expand = 0.6, bty = "g", theta = 50, phi = 30,
xlab = "x", ylab = "y", zlab = "z",
xlim = c(-6,4), ylim = c(-5,9), plot = FALSE)
persp3D(x = t3, y = t4, z = t34, col = "lightgreen",
shade = NA, alpha = 0.2,
lighting = list(ambient = 0.5, exponent = 70, specular = 0.2),
ltheta = 50, add = TRUE)
persp3D(x = t5, y = t6, z = t56, col = "lightgreen",
shade = NA, alpha = 0.4,
lighting = list(ambient = 0.5, exponent = 70, specular = 0.2),
ltheta = 50, add = TRUE)
pmvnorm(lower = c(-3, -1), upper = c(2, 3), mean = mu1, sigma = sigma1)
## [1] 0.6586002
## attr(,"error")
## [1] 1e-15
## attr(,"msg")
## [1] "Normal Completion"
pmvnorm(lower = c(-3, -1), upper = c(2, 3), mean = mu1, sigma = sigma1, keepAttr = FALSE)
## [1] 0.6586002
Vjerojatnost \(P(-3\leqslant X\leqslant2, -1\leqslant Y\leqslant3)\) jednaka je volumenu tijela kojeg funkcija gustoće zatvara s \(xy\)-ravninom na pravokutniku \([-3,2]\times[-1,3]\) kako je prikazano na donjoj slici.
x2 <- seq(-3, 2, length = 20)
y2 <- seq(-1, 3, length = 20)
grid <- mesh(x2, y2)
grid <- tibble(x = as.numeric(grid$x), y = as.numeric(grid$y))
z2 <- dmvnorm(grid, mean = mu1, sigma = sigma1)
z2 <- matrix(z2, nrow = 20)
x3 <- seq(-6, 4, length = 40)
y3 <- seq(-5, -1, length = 30)
grid3 <- mesh(x3, y3)
grid3 <- tibble(x = as.numeric(grid3$x), y = as.numeric(grid3$y))
z3 <- dmvnorm(grid3, mean = mu1, sigma = sigma1)
z3 <- matrix(z3, nrow = 40)
x4 <- seq(-6, 4, length = 40)
y4 <- seq(3, 9, length = 30)
grid4 <- mesh(x4, y4)
grid4 <- tibble(x = as.numeric(grid4$x), y = as.numeric(grid4$y))
z4 <- dmvnorm(grid4, mean = mu1, sigma = sigma1)
z4 <- matrix(z4, nrow = 40)
x5 <- seq(2, 4, length = 10)
y5 <- seq(-1, 3, length = 20)
grid5 <- mesh(x5, y5)
grid5 <- tibble(x = as.numeric(grid5$x), y = as.numeric(grid5$y))
z5 <- dmvnorm(grid5, mean = mu1, sigma = sigma1)
z5 <- matrix(z5, nrow = 10)
x6 <- seq(-6, -3, length = 16)
y6 <- seq(-1, 3, length = 20)
grid6 <- mesh(x6, y6)
grid6 <- tibble(x = as.numeric(grid6$x), y = as.numeric(grid6$y))
z6 <- dmvnorm(grid6, mean = mu1, sigma = sigma1)
z6 <- matrix(z6, nrow = 16)
persp3D(x = x2, y = y2, z = z2, curtain = TRUE, col = "pink",
shade = 0.1, ticktype = "detailed", border = "black",
expand = 0.6, bty = "g", theta = 50, phi = 30,
xlab = "x", ylab = "y", zlab = "z",
xlim = c(-6,4), ylim = c(-5,9), plot = FALSE)
persp3D(x = x3, y = y3, z = z3, col = "lightgreen",
shade = NA, alpha = 0.2,
lighting = list(ambient = 0.5, exponent = 70, specular = 0.2),
ltheta = 50, add = TRUE)
persp3D(x = x4, y = y4, z = z4, col = "lightgreen",
shade = NA,alpha = 0.3,
lighting = list(ambient = 0.5, exponent = 70, specular = 0.2),
ltheta = 50, add = TRUE)
persp3D(x = x5, y = y5, z = z5, col = "lightgreen",
shade = NA, alpha = 0.4,
lighting = list(ambient = 0.5, exponent = 70, specular = 0.2),
ltheta = 50, add = TRUE)
persp3D(x = x6, y = y6, z = z6, col = "lightgreen",
shade = NA, alpha = 0.6,
lighting = list(ambient = 0.5, exponent = 70, specular = 0.2),
ltheta = 50, add = TRUE)
Provjerimo da je suma vjerojatnosti \(P(X\leqslant2, Y\leqslant4)\), \(P(X\geqslant2, Y\geqslant4)\), \(P(X\leqslant2, Y\geqslant4)\) i \(P(X\geqslant2, Y\leqslant4)\) jednaka \(1\).
p1 <- pmvnorm(lower = c(-Inf, -Inf), upper = c(2, 4), mean = mu1, sigma = sigma1, keepAttr = FALSE)
p2 <- pmvnorm(lower = c(2, 4), upper = c(Inf, Inf), mean = mu1, sigma = sigma1, keepAttr = FALSE)
p3 <- pmvnorm(lower = c(-Inf, 4), upper = c(2, Inf), mean = mu1, sigma = sigma1, keepAttr = FALSE)
p4 <- pmvnorm(lower = c(2, -Inf), upper = c(Inf, 4), mean = mu1, sigma = sigma1, keepAttr = FALSE)
cat("Vjerojatnost:", c(p1, p2, p3, p4), "\nSuma vjerojatnosti:", sum(c(p1, p2, p3, p4)))
Vjerojatnost: 0.8751033 0.0005597333 0.1235468 0.0007901647
Suma vjerojatnosti: 1
Funkcija distribucije \(F(x)=P(X\leqslant
x)\) Gaussove slučajne varijable \(X\) je bijekcija pa ima inverznu funkciju.
Vrijednosti funkcije distribucije računamo pomoću naredbe
pnorm, a vrijednosti njezine inverzne funkcije pomoću
naredbe qnorm.
U dvodimenzionalnom slučaju funkcija distribucije \(F(x,y)=P(X\leqslant x, Y\leqslant y)\)
Gaussovog slučajnog vektora \((X,Y)\)
nije bijekcija pa nema inverznu funkciju. Naime, kako je prikazano na
donjoj slici, sve točke koje leže na istoj nivo-liniji, funkcija
distribucije ih preslika u istu vrijednost. Usprkos tome, u biblioteki
mvtnorm postoji naredba qmvnorm. Što zapravo
radi ta naredba ako ne postoji inverzna funkcija od funkcije
distribucije?
Za danu vrijednost \(p\in\langle0,1\rangle\) postoji beskonačno
mnogo točaka \((x,y)\) za koje vrijedi
\(F(x,y)=p\). Sve takve točke leže na
odgovarajućoj nivo-liniji funkcije \(F\). Naredba qmvnorm za danu
vrijednost \(p\) ne može vratiti sve
točke na toj nivo-liniji, već vraća onu točku koja je presjek pravca
\(y=x\) i promatrane nivo-linije.
Pripadne točke na grafu funkcije \(F\)
leže na plavoj krivulji kako je prikazano na donjoj slici.
br <- 100
krivulja <- tibble(x = seq(-5, 4, length = br))
krivulja <- krivulja %>% rowwise() %>%
mutate(z = pmvnorm(upper = c(x,x), mean = mu1, sigma = sigma1, keepAttr = FALSE))
broj <- 300
xpl <- seq(-6, 4, length = broj)
ypl <- seq(-5, 9, length = broj)
mreza5 <- mesh(xpl, ypl)
mreza5 <- tibble(x = as.numeric(mreza5$x), y = as.numeric(mreza5$y))
mreza5 <- mreza5 %>% rowwise() %>%
mutate(rez = pmvnorm(upper = c(x,y), mean = mu1, sigma = sigma1, keepAttr = FALSE))
ploha <- mesh(krivulja$x, seq(0, 1, length = 40))
zp <- rep(krivulja$z, times = 40)
x_ploha <- ploha$x
y_ploha <- ploha$x
z_ploha <- (1 - ploha$y) * (-0.78) + ploha$y * zp
persp3D(x = xpl, y = ypl, z = matrix(mreza5$rez, nrow = broj), col = terrain.colors(nrow(mreza5)),
shade = 0.2, ticktype = "detailed", alpha = 0.9, border = NA,
expand = 0.6, bty = "g", theta = 40, phi = 20, colkey = FALSE,
xlab = "x", ylab = "y", zlab = "z", zlim = c(-0.8, 1),
contour = list(side = c("z", "-0.8"),
levels = c(0.0001, 0.001, 0.01, 0.2, 0.4, 0.6, 0.8, 0.9)),
image = list(side = -0.8),
lighting = list(ambient = 0.5, exponent = 50, specular = 0.2),
ltheta = 30, plot = FALSE)
surf3D(x = x_ploha, y = y_ploha, z = z_ploha, col = "magenta", shade = NA, alpha = 0.2, add = TRUE)
scatter3D(seq(-5, 4, length = 40), seq(-5, 4, length = 40), rep(-0.78, 40),
type = "l", lwd = 2, col = "red", add = TRUE)
scatter3D(krivulja$x + 0.02, krivulja$x - 0.02, krivulja$z + 0.02,
type = "l", lwd = 2, col = "blue", add = TRUE)
Nivo linije funkcije distribucije
Tražimo \(a\in\mathbb{R}\) za koji je \(P(X\leqslant a, Y\leqslant a)=0.58\).
(rez <- qmvnorm(0.58, mean = mu1, sigma = sigma1))
## $quantile
## [1] 2.350032
##
## $f.quantile
## [1] 2.309266e-08
##
## attr(,"message")
## [1] "Normal Completion"
Dobivamo da je \(a\approx
2.350032\). Pomoću naredbe pmvnorm možemo napraviti
provjeru. Obje funkcije numeričkim putem dolaze do rezultata pa se može
dogoditi da se ne dobije baš ista početna vrijednost \(p=0.58\). No, naredba pmvnorm
daje rezultat 0.5800001 što je dovoljno blizu početne vrijednosti \(0.58\).
pmvnorm(upper = c(rez$quantile, rez$quantile), mean = mu1, sigma = sigma1)
## [1] 0.5800001
## attr(,"error")
## [1] 1e-15
## attr(,"msg")
## [1] "Normal Completion"
Tražimo \(b\in\mathbb{R}\) za koji je \(P(X\geqslant b, Y\geqslant b)=0.58\).
(rez2 <- qmvnorm(0.58, mean = mu1, sigma = sigma1, tail = "upper.tail"))
## $quantile
## [1] -1.228332
##
## $f.quantile
## [1] -2.462009e-07
##
## attr(,"message")
## [1] "Normal Completion"
Dobivamo da je \(b\approx
-1.228332\). Pomoću naredbe pmvnorm možemo napraviti
provjeru.
pmvnorm(lower = c(rez2$quantile, rez2$quantile), mean = mu1, sigma = sigma1)
## [1] 0.580001
## attr(,"error")
## [1] 1e-15
## attr(,"msg")
## [1] "Normal Completion"
Tražimo \(c\in\mathbb{R}\) za koji je \(P(-c\leqslant X\leqslant c, -c\leqslant Y\leqslant c)=0.58\).
(rez3 <- qmvnorm(0.58, mean = mu1, sigma = sigma1, tail = "both.tails"))
## $quantile
## [1] 2.578307
##
## $f.quantile
## [1] 6.008251e-06
##
## attr(,"message")
## [1] "Normal Completion"
Dobivamo da je \(c\approx
2.578307\). Pomoću naredbe pmvnorm možemo napraviti
provjeru.
pmvnorm(lower = c(-rez3$quantile, -rez3$quantile), upper = c(rez3$quantile, rez3$quantile),
mean = mu1, sigma = sigma1)
## [1] 0.5801342
## attr(,"error")
## [1] 1e-15
## attr(,"msg")
## [1] "Normal Completion"
Kao što smo ranije vidjeli, nivo-linije funkcije gustoće od Gaussove distribucije su elipse koje imaju središte u vektoru očekivanja \(\mathbf{\mu}\), a njihove osi imaju smjerove svojstvenih vektora matrice kovarijanci \(\Sigma\). Svaka takva elipsa definira minimalno područje oko vektora očekivanja tako da funkcije gustoće nad tim područjem zatvara zadani volumen \(p\in\langle0,1\rangle\), tj. da se unutar tog područja nalazi \(p\%\) podataka. Ovo je zapravo dvodimenzionalni analogon intervalima pouzdanosti u jednodimenzionalnom slučaju.
Za dani \(p\in\langle0,1\rangle\), pripadna elipsa ima vektorsku jednadžbu \[\mathbf{r}=\mathbf{\mu}+\sqrt{\alpha\lambda_1}\cos{t}\cdot\mathbf{u}+\sqrt{\alpha\lambda_2}\sin{t}\cdot\mathbf{v},\quad t\in[0,2\pi]\] pri čemu
R-kodu, \(\alpha\) je jednak
qchisq(1-p, df = 2, lower.tail = FALSE).Pomoću naredbe ellipse iz istoimene biblioteke možemo
nacrtati takve elipse za zadanu Gaussovu distribuciju i pozdanost \(p\in\langle0,1\rangle\). Na taj način ne
moramo sami pisati parametarske jednadžbe i računati sve potrebne
podatke.
Na donjoj slici su nacrtane nivo-linije već ranije promatrane funkcije gustoće i osjenčana je elipsa za vrijednost \(p=0.9\). Nad tom elipsom funkcija gustoće zatvara volumen jednak \(0.9\). Ukoliko generiramo uzorak iz dane distribucije, \(90\%\) podataka nalazit će se unutar osjenčane elipse.
elipsa90 <- ellipse(x = sigma1, centre = mu1, npoints = 100, level = 0.9)
ggplot(data.frame(elipsa90)) + geom_polygon(mapping = aes(x = x, y = y), fill = "red", alpha = 0.3) +
geom_contour(data = mreza2, mapping = aes(x = x, y = y, z = z), bins = 40) +
coord_equal() + scale_x_continuous(breaks = -6:4, limits = c(-6,4)) +
scale_y_continuous(breaks = -3:7, limits = c(-3,7))
Nadalje, na donjoj 3D slici je vizualiziran pripadni volumen koji je jednak \(0.9\).
sv_decomp <- eigen(sigma1)
lambda1 <- sv_decomp$values[1]
lambda2 <- sv_decomp$values[2]
vek1 <- sv_decomp$vectors[,1]
vek2 <- sv_decomp$vectors[,2]
hi <- qchisq(0.1, df = 2, lower.tail = FALSE)
LpCurve_x <- function(p, mu_x, t) {
rez <- case_when(
t <= pi / 2 ~ mu_x + 5 * abs(cos(t))^(2/p),
t <= pi ~ mu_x - 5 * abs(cos(t))^(2/p),
t <= 3 * pi / 2 ~ mu_x - 5 * abs(cos(t))^(2/p),
.default = mu_x + 5 * abs(cos(t))^(2/p)
)
return(rez)
}
LpCurve_x <- Vectorize(LpCurve_x)
LpCurve_y <- function(p, mu_y, t) {
rez <- case_when(
t <= pi / 2 ~ mu_y + 7 * abs(sin(t))^(2/p),
t <= pi ~ mu_y + 7 * abs(sin(t))^(2/p),
t <= 3 * pi / 2 ~ mu_y - 7 * abs(sin(t))^(2/p),
.default = mu_y - 7 * abs(sin(t))^(2/p)
)
return(rez)
}
LpCurve_y <- Vectorize(LpCurve_y)
elipsa90_t <- seq(0, 2*pi, length = 40)
par_u <- seq(0, 1, length = 40)
mreza <- mesh(elipsa90_t, par_u)
mreza_tablica <- tibble(u = as.numeric(mreza$y), v = as.numeric(mreza$x)) %>%
mutate(x = mu1[1] + sqrt(hi * lambda1) * u * cos(v+1.338973) * vek1[1] +
sqrt(hi * lambda2) * u * sin(v+1.338973) * vek2[1],
y = mu1[2] + sqrt(hi * lambda1) * u * cos(v+1.338973) * vek1[2] +
sqrt(hi * lambda2) * u * sin(v+1.338973) * vek2[2])
fun_dens <- dmvnorm(mreza_tablica %>% select(x, y), mean = mu1, sigma = sigma1)
fun_dens <- matrix(fun_dens, nrow = 40)
kut <- atan(vek1[2] / vek1[1])
krivulje <- tibble(v = seq(0, 2*pi, length = 100),
x1 = mu1[1] + sqrt(hi * lambda1) * cos(v-kut) * vek1[1] +
sqrt(hi * lambda2) * sin(v-kut) * vek2[1],
y1 = mu1[2] + sqrt(hi * lambda1) * cos(v-kut) * vek1[2] +
sqrt(hi * lambda2) * sin(v-kut) * vek2[2],
x2 = LpCurve_x(10, mu1[1], v),
y2 = LpCurve_y(10, mu1[2], v))
ploha_elipsa <- krivulje %>% select(-v) %>% slice(rep(1:n(), each = 40)) %>%
mutate(t = rep(seq(0, 1, length = 40), times = 100)) %>% rowwise() %>%
mutate(x = (1-t) * x1 + t * x2, y = (1-t) * y1 + t * y2,
z = dmvnorm(c(x, y), mean = mu1, sigma = sigma1)) %>% ungroup()
surf3D(x = matrix(mreza_tablica$x, nrow = 40),
y = matrix(mreza_tablica$y, nrow = 40),
z = fun_dens, col = "pink",
ticktype = "detailed", border = "darkgray",
expand = 0.6, bty = "g", theta = 30, phi = 30,
xlab = "x", ylab = "y", zlab = "z",
xlim = c(-6,4), ylim = c(-5,9), zlim=c(0,0.1), alpha = 0.75, plot = FALSE)
surf3D(x = matrix(ploha_elipsa$x, nrow = 100, byrow = TRUE),
y = matrix(ploha_elipsa$y, nrow = 100, byrow = TRUE),
z = matrix(ploha_elipsa$z, nrow = 100, byrow = TRUE),
col = "lightgreen",
shade = 0.3, ticktype = "detailed",
alpha = 0.3, add = TRUE)
Pomoću naredbe dmvnorm iz biblioteke
mvtnorm možemo generirati uzorak željene veličine iz zadane
višedimenzionalne Gaussove distribucije.
set.seed(178)
uzorak1 <- rmvnorm(1000, mean = mu1, sigma = sigma1)
uzorak1_tibble <- tibble(x = uzorak1[,1], y = uzorak1[,2])
uzorak1_tibble
ggplot(uzorak1_tibble, aes(x = x, y = y)) +
geom_hex(color = "white") + scale_fill_viridis_c()
2d histogram od uzorak1
Pomoću naredbe kde2d iz biblioteke MASS
možemo dobiti funkciju gustoće.
kdens1 <- kde2d(uzorak1[,1], uzorak1[,2], n = 50, lims = c(-6, 4, -5, 9))
kdens2 <- kde2d(uzorak1[,1], uzorak1[,2], n = 25, lims = c(-6, 4, -5, 9))
U nastavku je dano nekoliko različitih prikaza dobivene funkcije gustoće.
plot_ly(x = kdens1$x, y = kdens1$y, z = kdens1$z) %>% add_surface(colors = "Set2") %>%
layout(scene = list(aspectratio = list(x = 0.7, y = 1, z = 0.5)),
title = "Interaktivna 3D slika")
plot_ly(x = kdens1$x, y = kdens1$y, z = kdens1$z, type = "contour", autocontour = F,
contours = list(start = 0.01, end = 0.09, size = 0.01, showlabels = TRUE),
colors = "Set2") %>% layout(scene = list(aspectration = list(x = 1, y = 1)),
title = "Interaktivna slika s nivo-linijama")
par(mai = c(0, 0.35, 0, 0.3))
persp3D(x = kdens1$x, y = kdens1$y, z = kdens1$z, col = "pink",
shade = NA, border = "black", ticktype = "detailed",
expand = 0.6, bty = "g", theta = 30, phi = 30,
xlab = "x", ylab = "y", zlab = "z",
lighting = list(ambient = 0.5, exponent = 70, specular = 0.2),
ltheta = 50)
Statična 3D slika
par(mai = c(0, 0.35, 0, 0.3))
hist3D(x = kdens2$x, y = kdens2$y, z = kdens2$z, col = "pink",
shade = 0.2, border = "black", ticktype = "detailed",
expand = 0.6, bty = "g", theta = 30, phi = 30,
xlab = "x", ylab = "y", zlab = "z",
lighting = list(ambient = 0.65, exponent = 30, specular = 0.1),
ltheta = 170, space = 0.15)
Ploha kao histogram
par(mai = c(0, 0.35, 0, 0.3))
persp3D(x = kdens1$x, y = kdens1$y, z = kdens1$z, col = terrain.colors(nrow(kdens1$z)),
shade = 0.2, ticktype = "detailed", alpha = 0.9, border = NA,
expand = 0.6, bty = "g", theta = 40, phi = 20, colkey = FALSE,
xlab = "x", ylab = "y", zlab = "z",
contour = list(side = c("z"),
levels = c(0.0001, 0.001, seq(0.01, 0.08, by = 0.01))),
lighting = list(ambient = 0.5, exponent = 50, specular = 0.2),
ltheta = 30)
Nivo-linije na plohi
Pomoću naredbe mvn iz biblioteke MVN možemo
testirati ima li naš slučajni uzorak dvodimenzionalnu Gaussovu
distribuciju. Naredba mvn može provesti više različitih
testova i vratiti različite vizualne prikaze. Za detalje pogledajte
dokumentaciju. Ovdje navodimo primjer s Henze-Zirklerovim testom i nekim
opcijama za grafičke prikaze.
mvn(uzorak1, mvnTest = "hz", multivariatePlot = "qq")
## $multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 0.3779166 0.9940704 YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling Column1 0.2084 0.8645 YES
## 2 Anderson-Darling Column2 0.3242 0.5241 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th
## 1 1000 -1.032575 1.024339 -1.034071 -4.081815 1.655941 -1.7350115 -0.3442964
## 2 1000 2.008868 1.714611 2.077125 -3.345662 6.747147 0.8436949 3.1672093
## Skew Kurtosis
## 1 -0.0496322 -0.1024630
## 2 -0.1053113 -0.1512159
mvn(uzorak1, mvnTest = "hz", multivariatePlot = "persp", univariatePlot = "histogram")
## $multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 0.3779166 0.9940704 YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling Column1 0.2084 0.8645 YES
## 2 Anderson-Darling Column2 0.3242 0.5241 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th
## 1 1000 -1.032575 1.024339 -1.034071 -4.081815 1.655941 -1.7350115 -0.3442964
## 2 1000 2.008868 1.714611 2.077125 -3.345662 6.747147 0.8436949 3.1672093
## Skew Kurtosis
## 1 -0.0496322 -0.1024630
## 2 -0.1053113 -0.1512159
Napomena. Ako slučajni vektor ima (višedimenzionalnu) Gaussovu distribuciju, tada svaka njegova komponenta ima (jednodimenzionalnu) Gaussovu distribuciju. Vrijedi i više, svaka linearna kombinacija njegovih komponenti je slučajna varijabla s Gaussovom distribucijom.
Međutim, obrnuto ne vrijedi. Ako komponente slučajnog vektora imaju (jednodimenzionalne) Gaussove distribucije, slučajni vektor ne mora imati (višedimenzionalnu) Gaussovu distribuciju. U nastavku dajemo jedan primjer.
uzorak2 <- tibble(x = rnorm(1000, mean = 1, sd = 2.5),
y = 2 * x + runif(1000, min = -0.01, max = 0.01))
uzorak2
mvn(as.matrix(uzorak2), mvnTest = "hz", multivariatePlot = "qq", univariatePlot = "histogram")
## $multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 5.150063 1.713074e-13 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling x 0.5051 0.2023 YES
## 2 Anderson-Darling y 0.5043 0.2032 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th
## x 1000 1.042194 2.552291 1.141990 -6.077306 9.133303 -0.6615215 2.818094
## y 1000 2.084452 5.104651 2.283958 -12.151753 18.262111 -1.3191937 5.629993
## Skew Kurtosis
## x -0.1033298 -0.2510197
## y -0.1032893 -0.2510522