Calcular probabilidades de distribuciones
library(ggplot2)
library(cowplot)
library(ggthemes)
library(cowplot) # Gráficos
library(mosaic)
library(dplyr) # Para proesar filtrar ordenar con arrange
Notación normal
options(scipen=999) # Notación normal
f.graf.dist <- function(tabla) {
gfrecuencias <- ggplot(data = tabla) +
geom_col(aes(x = x, y = f.prob), fill= 'lightblue')
gacumulada <- ggplot(data = tabla) +
geom_line(aes(x = x, y = f.acum), col='blue') +
geom_point(aes(x = x, y = f.acum), col='red')
plot_grid(gfrecuencias, gacumulada, nrow=1)
}
dbinom(x = 4, size = 4, prob = 0.70)
## [1] 0.2401
dbinom(x = 0:4, size = 4, prob = 0.70)
## [1] 0.0081 0.0756 0.2646 0.4116 0.2401
pbinom(q = 0:4, size = 4, prob = 0.70)
## [1] 0.0081 0.0837 0.3483 0.7599 1.0000
# n es N = 40 y m es m = N - n = 40 - 10 = 30
dhyper(x = 3, m = 10, n = 30, k = 8)
## [1] 0.2223625
# Los argumentos significan:
# k es la seleccion
# m es las que están mal o sea 10 del total de N = 40
# n es la resta de N - m o sea 40 - 10
dhyper(x = 0:8, m = 10, n = 30, k = 8)
## [1] 0.0761062216171 0.2647172925811 0.3474414465127 0.2223625257681
## [5] 0.0748335423258 0.0133037408579 0.0011878340052 0.0000468111923
## [9] 0.0000005851399
phyper(q = 0:8, m = 10, n = 30, k = 8)
## [1] 0.07610622 0.34082351 0.68826496 0.91062749 0.98546103 0.99876477 0.99995260
## [8] 0.99999941 1.00000000
dpois(x = 0, lambda = 2)
## [1] 0.1353353
dpois(x = 0:15, lambda = 2)
## [1] 0.135335283236613 0.270670566473225 0.270670566473225 0.180447044315484
## [5] 0.090223522157742 0.036089408863097 0.012029802954366 0.003437086558390
## [9] 0.000859271639598 0.000190949253244 0.000038189850649 0.000006943609209
## [13] 0.000001157268201 0.000000178041262 0.000000025434466 0.000000003391262
ppois(q = 0:15, lambda = 2)
## [1] 0.1353353 0.4060058 0.6766764 0.8571235 0.9473470 0.9834364 0.9954662
## [8] 0.9989033 0.9997626 0.9999535 0.9999917 0.9999986 0.9999998 1.0000000
## [15] 1.0000000 1.0000000
tabla <- data.frame(x = 0:15, f.prob = dpois(x = 0:15, lambda = 2))
tabla <- cbind(tabla, f.acum = cumsum(tabla$f.prob))
tabla
## x f.prob f.acum
## 1 0 0.135335283236613 0.1353353
## 2 1 0.270670566473225 0.4060058
## 3 2 0.270670566473225 0.6766764
## 4 3 0.180447044315484 0.8571235
## 5 4 0.090223522157742 0.9473470
## 6 5 0.036089408863097 0.9834364
## 7 6 0.012029802954366 0.9954662
## 8 7 0.003437086558390 0.9989033
## 9 8 0.000859271639598 0.9997626
## 10 9 0.000190949253244 0.9999535
## 11 10 0.000038189850649 0.9999917
## 12 11 0.000006943609209 0.9999986
## 13 12 0.000001157268201 0.9999998
## 14 13 0.000000178041262 1.0000000
## 15 14 0.000000025434466 1.0000000
## 16 15 0.000000003391262 1.0000000
f.graf.dist(tabla)
media = 150
desv = 2
rnorm(n = 5, mean = media, sd = desv)
## [1] 148.1919 145.3373 147.5247 152.3992 147.9159
dnorm(x = 154, mean = media, sd = desv)
## [1] 0.02699548
dnorm(x = 150, mean = media, sd = desv)
## [1] 0.1994711
pnorm(q = 154, mean = media, sd = desv) - pnorm(q = 150, mean = media, sd = desv)
## [1] 0.4772499
x <- rnorm(n = 1000, mean = media, sd = desv)
dens.x <- dnorm(x, mean = media, sd = desv)
dens.x.150.154 <- dnorm(x = c(150,154), mean = media, sd = desv)
ggplot() +
geom_point (aes(x = x, y = dens.x), col= 'red') +
geom_line (aes(x = x, y = dens.x), col= 'red') +
geom_point (aes(x = c(150,154), y = dens.x.150.154), col= 'black') +
labs(title = "Distribución normal", subtitle = paste("Media=",media, " Desv.Std = ",desv))
set.seed(2021)
x <- rnorm(n = 1000, mean = media, sd = desv)
dens.x <- dnorm(x, mean = media, sd = desv)
rango=c(media-3*desv, media+3*desv)
datos <- data.frame(
x = x,
y = dens.x)
ggplot(data = datos, mapping = aes(x = x, y = y)) +
geom_line(col='blue') +
geom_area(fill = "blue") +
geom_area(mapping = aes(x = ifelse(x>150 & x< 154 , x, 0)), fill = "pink") +
xlim(min(rango),max(rango))
## Warning: Removed 4 rows containing missing values (position_stack).
## Warning: Removed 519 rows containing missing values (position_stack).
## Warning: Removed 4 row(s) containing missing values (geom_path).
pnorm(q = 154, mean = media, sd = desv, lower.tail = FALSE)
## [1] 0.02275013
# o
1 - pnorm(q = 154, mean = media, sd = desv)
## [1] 0.02275013
ggplot(data = datos, mapping = aes(x = x, y = y)) +
geom_line(col='blue') +
geom_area(fill = "blue") +
geom_area(mapping = aes(x = ifelse(x>154 , x, 0)), fill = "pink") +
xlim(min(rango),max(rango))
¿Cual es punto de la curva que está al 0.95 de la distribución normal con media = 150 y desv = 5
x in (q = 0.95)
set.seed(2021)
media = 150; desv = 5
x <- rnorm(n = 1000, mean = media, sd = desv)
prob.x <- dnorm(x = x, mean = media, sd = desv)
punto.x <- qnorm(p = 0.95, mean = media, sd = desv)
punto.x
## [1] 158.2243
dens.punto.x <- dnorm(x = punto.x, mean = media, sd = desv)
dens.punto.x
## [1] 0.02062713
ggplot() +
geom_point(aes(x = x, y = prob.x), col='lightblue') +
geom_point(aes(x = punto.x, y = dens.punto.x, col='red'))
set.seed(2021)
media = 1000; desv=100
gnormal <- xpnorm(q = c(900, 1100), mean = media, sd = desv)
##
## If X ~ N(1000, 100), then
## P(X <= 900) = P(Z <= -1) = 0.1587 P(X <= 1100) = P(Z <= 1) = 0.8413
## P(X > 900) = P(Z > -1) = 0.8413 P(X > 1100) = P(Z > 1) = 0.1587
##
gnormal
## [1] 0.1586553 0.8413447
round(gnormal[1] - gnormal[2], 4)
## [1] -0.6827
set.seed(2021)
media = 0; desv=1
gnormalstd <- xpnorm(q = c(-1, 1), mean = media, sd = desv)
##
## If X ~ N(0, 1), then
## P(X <= -1) = P(Z <= -1) = 0.1587 P(X <= 1) = P(Z <= 1) = 0.8413
## P(X > -1) = P(Z > -1) = 0.8413 P(X > 1) = P(Z > 1) = 0.1587
##
gnormalstd
## [1] 0.1586553 0.8413447
round(gnormal[1] - gnormal[2], 4)
## [1] -0.6827
# plot_grid(gnormal, gnormalstd, nrow = 2, ncol=1)