Objetivo

Calcular probabilidades de distribuciones

Desarrollo

Cargar librerías

library(ggplot2)
library(cowplot)
library(ggthemes)
library(cowplot) # Gráficos
library(mosaic)
library(dplyr)  # Para proesar filtrar ordenar con arrange

Notación normal

options(scipen=999) # Notación normal

Funciones para gráficas

f.graf.dist <- function(tabla) {
  gfrecuencias <- ggplot(data = tabla) +
    geom_col(aes(x = x, y = f.prob), fill= 'lightblue') 
  
  gacumulada <- ggplot(data = tabla) +
    geom_line(aes(x = x, y = f.acum), col='blue') +
    geom_point(aes(x = x, y = f.acum), col='red')
  
  plot_grid(gfrecuencias, gacumulada, nrow=1)  
}

Binomial

P(x=4)

dbinom(x = 4, size = 4, prob = 0.70)
## [1] 0.2401

P(x={0,1,2,3,4})

dbinom(x = 0:4, size = 4, prob = 0.70)
## [1] 0.0081 0.0756 0.2646 0.4116 0.2401

Acumulada

pbinom(q = 0:4, size = 4, prob = 0.70)
## [1] 0.0081 0.0837 0.3483 0.7599 1.0000

Hipergeométrica

P(x=3)

# n es N = 40 y m es m = N - n = 40 - 10 = 30
dhyper(x = 3, m = 10, n = 30, k = 8)
## [1] 0.2223625

P(x={0,1,2,3…8})

# Los argumentos significan:
# k es la seleccion
# m es las que están mal o sea 10 del total de N = 40 
# n es la resta de N - m o sea 40 - 10
dhyper(x = 0:8, m = 10, n = 30, k = 8)
## [1] 0.0761062216171 0.2647172925811 0.3474414465127 0.2223625257681
## [5] 0.0748335423258 0.0133037408579 0.0011878340052 0.0000468111923
## [9] 0.0000005851399

Acumulada F(x={0,1,2,3…8})

phyper(q = 0:8, m = 10, n = 30, k = 8)
## [1] 0.07610622 0.34082351 0.68826496 0.91062749 0.98546103 0.99876477 0.99995260
## [8] 0.99999941 1.00000000

Poisson

P(x=0)

dpois(x = 0, lambda = 2)
## [1] 0.1353353

P(x={0,1,2,3…20})

dpois(x = 0:15, lambda = 2)
##  [1] 0.135335283236613 0.270670566473225 0.270670566473225 0.180447044315484
##  [5] 0.090223522157742 0.036089408863097 0.012029802954366 0.003437086558390
##  [9] 0.000859271639598 0.000190949253244 0.000038189850649 0.000006943609209
## [13] 0.000001157268201 0.000000178041262 0.000000025434466 0.000000003391262

Acumulada F(x={0,1,2,3…20})

ppois(q = 0:15, lambda = 2)
##  [1] 0.1353353 0.4060058 0.6766764 0.8571235 0.9473470 0.9834364 0.9954662
##  [8] 0.9989033 0.9997626 0.9999535 0.9999917 0.9999986 0.9999998 1.0000000
## [15] 1.0000000 1.0000000

Tabla

tabla <- data.frame(x = 0:15, f.prob = dpois(x = 0:15, lambda = 2))
tabla <- cbind(tabla, f.acum = cumsum(tabla$f.prob))
tabla
##     x            f.prob    f.acum
## 1   0 0.135335283236613 0.1353353
## 2   1 0.270670566473225 0.4060058
## 3   2 0.270670566473225 0.6766764
## 4   3 0.180447044315484 0.8571235
## 5   4 0.090223522157742 0.9473470
## 6   5 0.036089408863097 0.9834364
## 7   6 0.012029802954366 0.9954662
## 8   7 0.003437086558390 0.9989033
## 9   8 0.000859271639598 0.9997626
## 10  9 0.000190949253244 0.9999535
## 11 10 0.000038189850649 0.9999917
## 12 11 0.000006943609209 0.9999986
## 13 12 0.000001157268201 0.9999998
## 14 13 0.000000178041262 1.0000000
## 15 14 0.000000025434466 1.0000000
## 16 15 0.000000003391262 1.0000000

Visualizar probabilidades

f.graf.dist(tabla)

Distribución normal

media = 150
desv = 2

Aleatorios normal

rnorm(n = 5, mean = media, sd = desv)
## [1] 148.1919 145.3373 147.5247 152.3992 147.9159

f(x = 154)

dnorm(x = 154, mean = media, sd = desv)
## [1] 0.02699548

f(x = 150)

dnorm(x = 150, mean = media, sd = desv)
## [1] 0.1994711

Area bajo la curva

pnorm(q = 154, mean = media, sd = desv) - pnorm(q = 150, mean = media, sd = desv)
## [1] 0.4772499

Visualizar curva

x <- rnorm(n = 1000, mean = media, sd = desv)
dens.x <- dnorm(x, mean = media, sd = desv)
dens.x.150.154 <- dnorm(x = c(150,154), mean = media, sd = desv)

ggplot() + 
  geom_point (aes(x = x, y = dens.x), col= 'red') +
  geom_line (aes(x = x, y = dens.x), col= 'red') +
    geom_point (aes(x = c(150,154), y = dens.x.150.154), col= 'black') +
    labs(title = "Distribución normal", subtitle = paste("Media=",media, " Desv.Std = ",desv)) 

Area bajo la curva F(x=154) - F(x=150)

set.seed(2021)
x <- rnorm(n = 1000, mean = media, sd = desv)
dens.x <- dnorm(x, mean = media, sd = desv)

rango=c(media-3*desv, media+3*desv)

datos <- data.frame(
    x = x,
    y = dens.x)

ggplot(data = datos, mapping = aes(x = x, y = y)) +
    geom_line(col='blue') +
  geom_area(fill = "blue") +
      geom_area(mapping = aes(x = ifelse(x>150 & x< 154 , x, 0)), fill = "pink") +
    xlim(min(rango),max(rango)) 
## Warning: Removed 4 rows containing missing values (position_stack).
## Warning: Removed 519 rows containing missing values (position_stack).
## Warning: Removed 4 row(s) containing missing values (geom_path).

F(x>154)

pnorm(q = 154, mean = media, sd = desv, lower.tail = FALSE)
## [1] 0.02275013
# o
1 - pnorm(q = 154, mean = media, sd = desv)
## [1] 0.02275013

Area bajo la curva F(x>154

ggplot(data = datos, mapping = aes(x = x, y = y)) +
    geom_line(col='blue') +
  geom_area(fill = "blue") +
      geom_area(mapping = aes(x = ifelse(x>154 , x, 0)), fill = "pink") +
    xlim(min(rango),max(rango)) 

Interpretar qnorm()

¿Cual es punto de la curva que está al 0.95 de la distribución normal con media = 150 y desv = 5

x in (q = 0.95)

set.seed(2021)
media = 150; desv = 5

x <- rnorm(n = 1000, mean = media, sd = desv)
prob.x <- dnorm(x = x, mean = media, sd = desv)

punto.x <- qnorm(p = 0.95, mean = media, sd = desv)

punto.x
## [1] 158.2243
dens.punto.x <- dnorm(x = punto.x, mean = media, sd = desv)

dens.punto.x
## [1] 0.02062713
ggplot() +
  geom_point(aes(x = x, y = prob.x), col='lightblue') +
  geom_point(aes(x = punto.x, y = dens.punto.x, col='red'))

Normal Area bajo la curva

set.seed(2021)
media = 1000; desv=100
gnormal <- xpnorm(q = c(900, 1100), mean = media, sd = desv)
## 
## If X ~ N(1000, 100), then
##  P(X <=  900) = P(Z <= -1) = 0.1587  P(X <= 1100) = P(Z <=  1) = 0.8413
##  P(X >   900) = P(Z >  -1) = 0.8413  P(X >  1100) = P(Z >   1) = 0.1587
## 

gnormal
## [1] 0.1586553 0.8413447
round(gnormal[1] - gnormal[2], 4)
## [1] -0.6827

Normal Estándar. Area bajo la curva

set.seed(2021)
media = 0; desv=1
gnormalstd <- xpnorm(q = c(-1, 1), mean = media, sd = desv)
## 
## If X ~ N(0, 1), then
##  P(X <= -1) = P(Z <= -1) = 0.1587    P(X <=  1) = P(Z <=  1) = 0.8413
##  P(X >  -1) = P(Z >  -1) = 0.8413    P(X >   1) = P(Z >   1) = 0.1587
## 

gnormalstd
## [1] 0.1586553 0.8413447
round(gnormal[1] - gnormal[2], 4)
## [1] -0.6827

Ambas graficas

# plot_grid(gnormal, gnormalstd, nrow = 2, ncol=1)