EJERCICIO DISTRIBUCIONES

DISTRIBUCIÓN EXPONENCIAL

\[ Ho:~Los~datos~se~ajustan~a~una~distribución~exponencial\\ Ha:~Los~datos~no~se~ajustan~a~una~distribución~exponencial \]

require(vcd)

## Loading required package: vcd

## Loading required package: grid

require(MASS)

## Loading required package: MASS

# Generación de datos
exponencial <- rexp(1000, rate = 1.85) # generar datos con distribución exponencial
control <- abs(rnorm(1000)) # generar datos con cualquier otra distribución

# Estimación de parámetros
fit1 <- fitdistr(exponencial, "exponential") 
fit2 <- fitdistr(control, "exponential")

# Bondad del ajuste del test
(pr_ex1=ks.test(exponencial, "pexp", fit1$estimate)) # p-value > 0.05 -> Ho no rechazada

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  exponencial
## D = 0.025309, p-value = 0.5436
## alternative hypothesis: two-sided

ifelse(pr_ex1$p.value<0.05, "Rechazo Ho", "No rechazo Ho")

## [1] "No rechazo Ho"

(pr_ex2=ks.test(control, "pexp", fit2$estimate)) #  significant p-value -> Ho rechazada

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  control
## D = 0.084311, p-value = 1.339e-06
## alternative hypothesis: two-sided

ifelse(pr_ex2$p.value<0.05, "Rechazo Ho", "No rechazo Ho")

## [1] "Rechazo Ho"

# Graficar
hist(exponencial, freq = FALSE, breaks = 100, xlim = c(0, quantile(exponencial, 0.99)))
curve(dexp(x, rate = fit1$estimate), from = 0, col = "red", add = TRUE)

DISTRIBUCIÓN NORMAL

\[ Ho:~Los~datos~se~ajustan~a~una~distribución~normal\\ Ha:~Los~datos~no~se~ajustan~a~una~distribución~normal \]

#Generación de datos
normal = rnorm(1000) #Distribución normal
control2 = rt(1000, df=3) #Distribución t

#Graficar densidad
plot(density(normal));plot(density(control2))

#Realizar el test
(pr_nor1=shapiro.test(normal))

## 
##  Shapiro-Wilk normality test
## 
## data:  normal
## W = 0.99811, p-value = 0.3307

ifelse(pr_nor1$p.value<0.05, "Rechazo Ho", "No rechazo Ho")

## [1] "No rechazo Ho"

(pr_nor2=shapiro.test(control2))

## 
##  Shapiro-Wilk normality test
## 
## data:  control2
## W = 0.90091, p-value < 2.2e-16

ifelse(pr_nor2$p.value<0.05, "Rechazo Ho", "No rechazo Ho")

## [1] "Rechazo Ho"

#Graficar usando qqplot
qqnorm(normal)
qqline(normal, col = 2)

qqnorm(control2)
qqline(control2, col = 2)

DISTRIBUCIÓN UNIFORME

\[ Ho:~Los~datos~se~ajustan~a~una~distribución~uniforme\\ Ha:~Los~datos~no~se~ajustan~a~una~distribución~uniforme\]

#Generación de datos
library(swfscMisc)

## 
## Attaching package: 'swfscMisc'

## The following object is masked from 'package:vcd':
## 
##     odds

uniforme <- runif(1000) #Uniforme
control3 <- rlnorm(1000) #Normal

#Realizar el test
(pr_uni1=uniform.test(hist(uniforme), B = 1000))

## 
##  Chi-squared test for given probabilities with simulated p-value (based
##  on 1000 replicates)
## 
## data:  hist.output$counts
## X-squared = 6.72, df = NA, p-value = 0.6523

ifelse(pr_uni1$p.value<0.05, "Rechazo Ho", "No rechazo Ho")

## [1] "No rechazo Ho"

(pr_uni2=uniform.test(hist(control3), B = 1000))

## 
##  Chi-squared test for given probabilities with simulated p-value (based
##  on 1000 replicates)
## 
## data:  hist.output$counts
## X-squared = 7989.5, df = NA, p-value = 0.000999

ifelse(pr_uni2$p.value<0.05, "Rechazo Ho", "No rechazo Ho")

## [1] "Rechazo Ho"

DISTRIBUCIÓN POISSON

\[ Ho:~Los~datos~se~ajustan~a~una~distribución~poisson\\ Ha:~Los~datos~no~se~ajustan~a~una~distribución~poisson\]

#Crear función que se basa en el hecho de que la media de la distribución de Poisson es igual a su varianza, y la relación entre la varianza y la media en una muestra de n recuentos de una distribución de Poisson debe seguir una distribución de Chi-cuadrado, con n-1 grados de libertad.
dispersion_test <- function(x) 
{
  res <- 1-2 * abs((1 - pchisq((sum((x - mean(x))^2)/mean(x)), length(x) - 1))-0.5)

  cat("Dispersion test of count data:\n",
      length(x), " data points.\n",
      "Mean: ",mean(x),"\n",
      "Variance: ",var(x),"\n",
      "Probability of being drawn from Poisson distribution: ", 
      round(res, 3),"\n", sep = "")

  invisible(res)
}
#Crear datos
poisson=rpois(1000,1)
control4=rnorm(1000)

#Probar la función
dispersion_test(poisson)

## Dispersion test of count data:
## 1000 data points.
## Mean: 0.983
## Variance: 0.9096206
## Probability of being drawn from Poisson distribution: 0.09

dispersion_test(control4)

## Dispersion test of count data:
## 1000 data points.
## Mean: 0.02478714
## Variance: 1.039307
## Probability of being drawn from Poisson distribution: 0

DISTRIBUCIÓN BINOMIAL

\[ Ho:~Los~datos~se~ajustan~a~una~distribución~binomial\\ Ha:~Los~datos~no~se~ajustan~a~una~distribución~binomial\]

require("fitdistrplus")

## Loading required package: fitdistrplus

## Loading required package: survival

#Crear datos
set.seed(100)
binomial = rbinom(1000,50,0.4)

#Probar el test
fit1 = fitdist(binomial, dist="binom", #método "mle"
                   fix.arg=list(size = 50), 
                   start=list(prob = 0.1))

summary(fit1)

## Fitting of the distribution ' binom ' by maximum likelihood 
## Parameters : 
##       estimate  Std. Error
## prob 0.4044602 0.002194857
## Fixed parameters:
##      value
## size    50
## Loglikelihood:  -2678.442   AIC:  5358.884   BIC:  5363.792

plot(fit1)

DISTRIBUCIÓN LOGNORMAL

\[ Ho:~Los~datos~se~ajustan~a~una~distribución~lognormal\\ Ha:~Los~datos~no~se~ajustan~a~una~distribución~lognormal \]

require("fitdistrplus")
#Crear datos
set.seed(100)
lognormal = rlnorm(1000,50,0.3)

#Probar el test
fit2 = fitdist(lognormal, dist="lnorm") #método "mle"

summary(fit2)

## Fitting of the distribution ' lnorm ' by maximum likelihood 
## Parameters : 
##           estimate  Std. Error
## meanlog 50.0050415 0.009772126
## sdlog    0.3090218 0.006909611
## Loglikelihood:  -50249.64   AIC:  100503.3   BIC:  100513.1 
## Correlation matrix:
##         meanlog sdlog
## meanlog       1     0
## sdlog         0     1

plot(fit2)

DISTRIBUCIÓN BETA

\[ Ho:~Los~datos~se~ajustan~a~una~distribución~lognormal\\ Ha:~Los~datos~no~se~ajustan~a~una~distribución~lognormal \]

require("fitdistrplus")
#Crear datos
set.seed(100)
beta <- rbeta(1000,0.9,0.5)

#Probar el test
fit3 = fitdist(beta, dist="beta")

summary(fit3)

## Fitting of the distribution ' beta ' by maximum likelihood 
## Parameters : 
##         estimate Std. Error
## shape1 0.9449265 0.04250818
## shape2 0.4809618 0.01824865
## Loglikelihood:  320.3926   AIC:  -636.7851   BIC:  -626.9696 
## Correlation matrix:
##           shape1    shape2
## shape1 1.0000000 0.5452701
## shape2 0.5452701 1.0000000

plot(fit3)

DISTRIBUCIÓN BETA

\[ Ho:~Los~datos~se~ajustan~a~una~distribución~lognormal\\ Ha:~Los~datos~no~se~ajustan~a~una~distribución~lognormal \]

require("fitdistrplus")
#Crear datos
set.seed(100)
gamma <- rgamma(1000,0.9,0.6)

#Probar el test
fit4 = fitdist(gamma, dist="gamma")

summary(fit4)

## Fitting of the distribution ' gamma ' by maximum likelihood 
## Parameters : 
##        estimate Std. Error
## shape 0.8553526 0.03320887
## rate  0.5492155 0.02841333
## Loglikelihood:  -1434.735   AIC:  2873.471   BIC:  2883.286 
## Correlation matrix:
##           shape      rate
## shape 1.0000000 0.7504593
## rate  0.7504593 1.0000000

plot(fit4)

DISTRIBUCIÓN CHI cuadrado

\[ Ho:~Los~datos~se~ajustan~a~una~distribución~chicuadrado\\ Ha:~Los~datos~no~se~ajustan~a~una~distribución~chicuadrado \]

require("fitdistrplus")
#Crear datos
set.seed(100)
chi <- rchisq(n = 1000,df = 20,ncp = 2)
hist(chi)

#Probar el test
print(chisq.test(chi))

## 
##  Chi-squared test for given probabilities
## 
## data:  chi
## X-squared = 2047.4, df = 999, p-value < 2.2e-16

```

EJERCICIO DISTRIBUCIONES

Ana María Florián Pulido

11/4/2021