Ayudantía 25 de oct. (RECUPERATIVA)

#install.packages("qcc")
library(qcc)
library(robustbase)
library(nortest)
library(MASS)
library(univariateML)
library(tidyverse)
library(ggplot2)
library(fpp)
library(zoo)

Función de confiabilidad y Tasa de fallas

El tiempo de funcionamiento de una ampolleta es una una VA \(T\) modelada por una distribución exponencial con parámetro \(\lambda = 4.28 \times 10^{-4}\).

Sea \(T \sim Exponencial(\lambda)\) donde

\[ f_{T}(t) = \lambda e^{-\lambda t}, \quad \lambda,t >0 \]

\[ F_{T}(t) = 1 - e^{-\lambda t}. \]

Obtenga la función de confiabilidad y tasa de fallas de la VA \(T\), en general y considerando el parámetro \(\lambda\) del problema.

Por lo que la función de confiabilidad está dada por

\[ \begin{split} R_{T}(t) & = 1- F_{T}(t) \\ & = 1 - (1-e^{-\lambda t}) \\ & = e^{-\lambda t} \end{split} \]

y la tasa de fallas

\[ \begin{split} h_{T}(t) & = \dfrac{f_{T}(t)}{R_{T}(t)} \\ & = \dfrac{\lambda e^{-\lambda t}}{e^{-\lambda t}} \\ & = \lambda. \end{split} \]

Grafique las funciones de confiabilidad y tasa de fallas en R.

h_t <- function(t, lambda){
  set.seed(123)
  dexp(t, rate = lambda)/(1 - pexp(t, rate = lambda))
}

R_t <- function(t, lambda){
  set.seed(123)
  1 - pexp(t, rate = lambda)
}

TMF <- function(lambda){
  1/lambda
}

curve(h_t(x, 4.28*10^-4), xlim = c(0, qexp(0.999, 4.28*10^-4)), ylim = c(0, 2*4.28*10^-4), xlab = 't', ylab = 'h(t)', col = 'blue')
title(main = 'Función de riesgo o tasa de fallas')
legend(8000, 2*4.28*10^-4, c("T ~ exp(4.28E-4)"), lty = 1, col = 'blue')
abline(v = TMF(4.28*10^-4), lty = 2, lwd = 1, col = 'red')

¿Como intrepretaría la función de confiabilidad y tasa de fallas en este caso?

Función de confiabilidad: A medida que pasa el tiempo la confiabilidad o sobrevivencia del funcionamiento de la ampolleta disminuye.
Tasa de fallas: La tasa de fallas es constante, ya que la distribución exponencial no tiene memoria. (El riesgo es cte. a lo largo del tiempo, propiedad de pérdida de memoria).

Obtenga la probabilidad de que la ampolleta funcione 500 horas continuas sin fallar.

R_t(500, 4.28E-4)

## [1] 0.8073484

Obtenga la chance de que la ampolleta falle a las 1000 horas de funcionamiento.

h_t(1000, 4.28E-4)

## [1] 0.000428

Bondad de ajuste

Encontrar la mejor distribución para el conjunto de datos S5, para esto realice todo el proceso del modelado estadistico de datos.

S5 <- c(466.750, 179.8500, 300.6500, 688.3000, 520.1499, 424.6001,                     278.0999, 202.250 , 230.7002, 384.2998, 464.5000, 281.3999,                     387.9502, 601.4502, 805.000 , 330.8496, 613.2500, 426.4004,                     185.5996, 141.2002, 710.2002, 328.799 , 296.5996, 489.8506,                     481.3994, 499.7002, 246.5498, 561.2002, 1028.00 , 481.0498,                     306.8008, 226.9492, 582.2002, 622.5498, 355.1504, 345.200 ,                     542.2998, 489.0508, 477.0488, 229.2012, 316.7500, 534.3984,
       140.900 , 420.4512, 400.7988, 242.5000, 512.7500, 379.2500,                     401.3008, 254.150 , 343.5996, 474.1504, 215.4492, 871.3496,                     354.3008, 130.4492, 479.099 , 548.2500, 266.2500, 888.7500,                     238.1504, 681.7012, 501.7500, 236.949 , 302.6992, 153.8516,                     235.2500, 292.2500, 172.9492, 317.9492, 574.500 , 292.0508,                     194.3008, 137.4492, 537.7500, 186.2500, 213.1504, 289.500 ,                     244.0488, 552.7500, 122.1504, 317.5000, 277.7012, 184.2500,
       329.250 , 550.5000, 152.5977, 346.1016, 212.5508, 610.8008,                     218.8984, 458.148 , 286.9023, 189.8984, 460.0508, 242.6016,                     699.6992, 449.6484, 246.250 , 236.4492,1004.6523, 322.8984,                     945.9023, 727.5977, 626.1016, 323.500 , 666.8008, 445.7500,                     287.3477, 244.5000, 795.3008, 303.2500, 281.949 , 396.6016,                     598.8984, 249.8516, 219.5000, 360.3477, 648.7500, 236.203 ,                     222.0977, 277.6992, 356.9023, 432.1484, 238.1992, 335.7500,
       711.352 , 334.8008, 242.5508, 421.5977, 384.3008, 684.6484,                     635.7500, 382.000 , 248.7500,1109.0508, 166.9492, 224.4023,                     150.8984, 346.4023, 280.000 , 645.1484, 619.8984, 549.4531,                     539.5977, 401.0000, 243.8516, 344.000)

Paso 1: Elección del modelo de probabilidad

# ---------- HISTOGRAMA ---------- #

hist1 <- function(x,
                  boxPlot         = "TRUE",
                  mainTitle       = "Histogram and adjusted boxplot",
                  xLabel          = "data",
                  yLabel          = "density",
                  yRange          = NULL,
                  colourDensity   = "black"){
  
  mat       <- matrix(c(1, 1, 2, 2), 2, 2, byrow = TRUE)
  nf        <- layout(mat, c(0.7, 7.0), c(7.0, 0.7), TRUE)
  layout.show(nf)
  par(mai = c(0, 0, 0, 0), mar = c(5, 5, 8, 5), mgp = c(4, 1, 0))
  minimum   <- min(x) - sd(x)
  maximum   <- max(x) + sd(x)
  axisx     <- seq(minimum, maximum, by = 0.1)
  hist(x,
       freq     = FALSE,
       main     = mainTitle,
       xlab     = xLabel,
       ylab     = yLabel,
       ylim     = yRange,
       cex.main = 1.5,
       las      = 1,
       col      =  "cadetblue2")
  
  rug(x, side = 1)
  
  par(mar = c(1, 5, 0, 5))
  
  if(boxPlot == "TRUE"){
    adjbox(x,
           horizontal = TRUE,
           notch = FALSE,
           col = "#B4EEB4",
           border = "#2F4F4F")
  }
}

hist1(S5, mainTitle = "")

# ---------- ESTADÍSTICAS DESCRIPTIVAS ---------- #

searchMode <- function(x){
  x <- unlist(x)
  x <- x[!is.na(x)]
  u <- unique(x)
  frequencies <- rep(0, length(u))
  for(i in seq(along = u)){count <- 0
  for(j in seq(along = x)){
    if(u[i] == x[j]){
      count <- (count + 1)
    }
  }
  frequencies[i] <- count
  }
  maximumFreq  <- max(frequencies)
  indexMaximum <- which(frequencies == maximumFreq)
  modeResult   <- sort(x = u[indexMaximum], decreasing = FALSE)
  if(length(x) == length(u)){
    modeResult <- maximumFreq <- NA
  }
  results <- list(modalValue  = modeResult, frequencies = maximumFreq)
  return(results)
}


descriptiveSummary <- function(x){
  n            <- length(x)
  skewness     <- (1 / n) * sum(((x - mean(x)) / sd(x)) ^ 3)
  kurtosis     <- ((1 / n) * sum(((x - mean(x)) / sd(x)) ^ 4)) - 3
  cVariation   <- (sd(x) / mean(x))
  modeCalculus <- searchMode(x)$modalValue
  statistics   <- list(mean                 = round(mean(x), 3),
                       median               = round(median(x), 3),
                       mode                 = round(modeCalculus, 3),
                       StandardDeviation    = round(sd(x), 3),
                       CoefficientVariation = round(cVariation * 100, 3),
                       CoefficientSkewness  = round(skewness, 3),
                       CoefficientKurtosis  = round(kurtosis, 3),
                       range                = round(max(x) - min(x), 3),
                       minimum              = round(min(x), 3),
                       maximum              = round(max(x), 3),
                       n                    = length(x))
  return(statistics)
}

summary(S5)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   122.2   244.0   345.7   402.4   523.7  1109.1

descriptiveSummary(S5)

## $mean
## [1] 402.365
## 
## $median
## [1] 345.651
## 
## $mode
## [1] NA
## 
## $StandardDeviation
## [1] 200.447
## 
## $CoefficientVariation
## [1] 49.817
## 
## $CoefficientSkewness
## [1] 1.078
## 
## $CoefficientKurtosis
## [1] 0.982
## 
## $range
## [1] 986.9
## 
## $minimum
## [1] 122.15
## 
## $maximum
## [1] 1109.051
## 
## $n
## [1] 148

Paso 2: Utilizar técnicas de bondad de ajuste

# ---------- FUNCIONES PARA GRÁFICO PP  ---------- #

approx.ksD <- function(n, rho){
  siglevel  = c(0.150, 0.100, 0.050, 0.025, 0.010)
  uppertail = c(0.775, 0.819, 0.895, 0.995, 1.035)
  if(length(uppertail[siglevel == rho]) == 0){
    warning("enter a value of rho = c(0.15, 0.1, 0.05, 0.025, 0.25, 0.01), 
        rho = 0.05 default") 
    rho = 0.05}
  uppertail[siglevel == rho]/(sqrt(n) - 0.01 + 0.85/sqrt(n))
  }

# ---------- PARA DISTRIBUCIÓN WEIBULL ---------- #

kswe <- function(x){
  x  <- sort(x)
  shape.w <- as.numeric(fitdistr(x, "weibull")$estimate[1])
  scale.w <- as.numeric(fitdistr(x, "weibull")$estimate[2])
  v       <- pweibull(x, shape = shape.w, scale = scale.w)
  y       <- qnorm(v)
  y       <- y[is.finite(y) == TRUE]
  u       <- (y - mean(y))/sd(y)
  ks      <- lillie.test(u)  
  return(ks$p.value)
  }

ppwe <- function(x, rho = 0.05,
                 line   = FALSE,
                 xLabel = "Theoretical CDF",
                 yLabel = "Empirical CDF"){
  n       <- length(x)
  shape.w <- as.numeric(fitdistr(x, "weibull")$estimate[1])
  scale.w <- as.numeric(fitdistr(x, "weibull")$estimate[2])
  cdf     <- pweibull(x, shape = shape.w, scale = scale.w)
  y       <- qnorm(cdf)
  y       <- y[is.finite(y) == TRUE]
  u       <- (y - mean(y))/sd(y)
  p         <- pnorm((u - mean(u))/sd(u))
  teoprob <- sort(p)
  k       <- seq(1, n, by = 1)
  empprob <- (k - 0.5) / n
  D       <- approx.ksD(n, rho)  
  plot(teoprob ~ empprob, las = 1,
       xlab = xLabel,
       ylab = yLabel,
       col  = 1,
       xlim = c(0, 1),
       ylim = c(0, 1),
       pch = 20)
  lines(c(0, 1), c(0, 1), lwd = 1.5, lty = 2, col = "gray70")
  
  Dplus <- max(seq(1:n)/n - p)
  Dminus <- max(p - (seq(1:n) - 1)/n)
  K <- max(Dplus, Dminus)
  if (n <= 100) {
    Kd <- K
    nd <- n
  }
  else {
    Kd <- K * ((n/100)^0.49)
    nd <- 100
  }
  
  pvalue <- exp(-7.01256 * Kd^2 * (nd + 2.78019) + 2.99587 * 
                  Kd * sqrt(nd + 2.78019) - 0.122119 + 0.974598/sqrt(nd) + 
                  1.67997/nd)
  logp <- log(rho)
  if (pvalue < 0.10){
    DD <-  polyroot( c( - logp - 0.122119 + 0.974598/sqrt(nd) + 1.67997/nd, 
                        2.99587 * sqrt(nd + 2.78019), 
                        -7.01256 * (nd + 2.78019) ) ) 
    r <- Re(DD)
    DD <- min(r[r>0])
    if (n <= 100) D <- DD
    if (n > 100) D <- DD/(n/100)^0.49 }
  
  if(line){
    lines(c(0, 1), c(D-0.5/n, 1+D-0.5/n), lwd = 2, col = "gray66")
    lines(c(0, 1), c(-D+0.5/n, 1-D+0.5/n), lwd = 2, col = "gray66")
    legend("bottomright", 
           legend = substitute(list("KS p-value" == kspval), 
                               list(kspval=round(kswe(x),4))), bty = 'n')
  }
}

ppwe(S5, line = TRUE)

# ---------- PARA DISTRIBUCIÓN LOG-NORMAL ---------- #

ksln <- function(x){
  x  <- sort(x)
  v         <- plnorm(x, mean(log(x)), sd(log(x)))
  y         <- qnorm(v)
  y         <- y[is.finite(y) == TRUE]
  u         <- (y - mean(y))/sd(y)
  ks        <- lillie.test(u)  
  return(ks$p.value)}

ppln <- function(x, rho = 0.05,
                 line   = FALSE,
                 xLabel = "Theoretical CDF",
                 yLabel = "Empirical CDF"){
  n         <- length(x)
  cdf       <- plnorm(x, mean(log(x)), sd(log(x)))
  y         <- qnorm(cdf)
  y         <- y[is.finite(y) == TRUE]
  u         <- (y - mean(y))/sd(y)
  p           <- pnorm((u - mean(u))/sd(u))
  teoprob   <- sort(p)
  k         <- seq(1, n, by = 1)
  empprob   <- (k - 0.5) / n
  D         <- approx.ksD(n, rho)  
  plot(teoprob ~ empprob, las = 1,
       xlab = xLabel,
       ylab = yLabel,
       col  = 1,
       xlim = c(0, 1),
       ylim = c(0, 1),
       pch = 20)
  lines(c(0, 1), c(0, 1), lwd = 1.5, lty = 2, col = "gray70")
  
  Dplus <- max(seq(1:n)/n - p)
  Dminus <- max(p - (seq(1:n) - 1)/n)
  K <- max(Dplus, Dminus)
  if (n <= 100) {
    Kd <- K
    nd <- n
  }
  else {
    Kd <- K * ((n/100)^0.49)
    nd <- 100
  }
  
  pvalue <- exp(-7.01256 * Kd^2 * (nd + 2.78019) + 2.99587 * 
                  Kd * sqrt(nd + 2.78019) - 0.122119 + 0.974598/sqrt(nd) + 
                  1.67997/nd)
  logp <- log(rho)
  if (pvalue < 0.10){
    DD <-  polyroot( c( - logp - 0.122119 + 0.974598/sqrt(nd) + 1.67997/nd, 
                        2.99587 * sqrt(nd + 2.78019), 
                        -7.01256 * (nd + 2.78019) ) ) 
    r <- Re(DD)
    DD <- min(r[r>0])
    if (n <= 100) D <- DD
    if (n > 100) D <- DD/(n/100)^0.49 }
  
  if(line){
    lines(c(0, 1), c(D-0.5/n, 1+D-0.5/n), lwd = 2, col = "gray66")
    lines(c(0, 1), c(-D+0.5/n, 1-D+0.5/n), lwd = 2, col = "gray66")
    legend("bottomright", 
           legend = substitute(list("KS p-value" == kspval), 
                               list(kspval=round(ksln(x),4))), bty = 'n')
  }
}

ppln(S5, line = TRUE)

# ---------- PARA DISTRIBUCIÓN GAMMA ---------- #

ksga <- function(x){
  x  <- sort(x)
  a    <- as.numeric(fitdistr(x, "gamma")$estimate[1])
  b    <- as.numeric(fitdistr(x, "gamma")$estimate[2])    
  v    <- pgamma(x, a, b)
  y    <- qnorm(v)
  y    <- y[is.finite(y) == TRUE]
  u    <- (y - mean(y))/sd(y)
  ks   <- lillie.test(u)  
  return(ks$p.value)}

ppga <- function(x, rho = 0.05,
                 line   = FALSE,
                 xLabel = "Theoretical CDF",
                 yLabel = "Empirical CDF"){
  n    <- length(x)
  a    <- as.numeric(fitdistr(x, "gamma")$estimate[1])
  b    <- as.numeric(fitdistr(x, "gamma")$estimate[2])    
  cdf  <- pgamma(x, a, b)
  y    <- qnorm(cdf)
  y    <- y[is.finite(y) == TRUE]
  u    <- (y - mean(y))/sd(y)
  p           <- pnorm((u - mean(u))/sd(u))
  teoprob   <- sort(p)
  k         <- seq(1, n, by = 1)
  empprob   <- (k - 0.5) / n
  D         <- approx.ksD(n, rho)  
  plot(teoprob ~ empprob, las = 1,
       xlab = xLabel,
       ylab = yLabel,
       col  = 1,
       xlim = c(0, 1),
       ylim = c(0, 1),
       pch = 20)
  lines(c(0, 1), c(0, 1), lwd = 1.5, lty = 2, col = "gray70")
  
  Dplus <- max(seq(1:n)/n - p)
  Dminus <- max(p - (seq(1:n) - 1)/n)
  K <- max(Dplus, Dminus)
  if (n <= 100) {
    Kd <- K
    nd <- n
  }
  else {
    Kd <- K * ((n/100)^0.49)
    nd <- 100
  }
  
  pvalue <- exp(-7.01256 * Kd^2 * (nd + 2.78019) + 2.99587 * 
                  Kd * sqrt(nd + 2.78019) - 0.122119 + 0.974598/sqrt(nd) + 
                  1.67997/nd)
  logp <- log(rho)
  if (pvalue < 0.10){
    DD <-  polyroot( c( - logp - 0.122119 + 0.974598/sqrt(nd) + 1.67997/nd, 
                        2.99587 * sqrt(nd + 2.78019), 
                        -7.01256 * (nd + 2.78019) ) ) 
    r <- Re(DD)
    DD <- min(r[r>0])
    if (n <= 100) D <- DD
    if (n > 100) D <- DD/(n/100)^0.49 }
  
  if(line){
    lines(c(0, 1), c(D-0.5/n, 1+D-0.5/n), lwd = 2, col = "gray66")
    lines(c(0, 1), c(-D+0.5/n, 1-D+0.5/n), lwd = 2, col = "gray66")
    legend("bottomright", 
           legend = substitute(list("KS p-value" == kspval), 
                               list(kspval=round(ksga(x),4))), bty = 'n')
  }
}

ppga(S5, line = TRUE)

# ---------- PARA DISTRIBUCIÓN GAUSSIANA INVERSA ----------

digt <- function(x,
                 mu     = 1.0,
                 lambda = 1.0,
                 nu     = 1.0,
                 kernel = "normal",
                 log    = FALSE){
  
  quantity <- sqrt(lambda / mu) * (sqrt(x / mu) - sqrt(mu / x))
  jacobian <- sqrt(lambda) / sqrt(x ^ 3)
  density  <- switch(kernel,
                     "normal"   = dnorm(x = quantity, 0, 1) * jacobian,
                     "t"        = dt(x = quantity, df = nu) * jacobian,
                     "laplace"  = dlaplace(x = quantity)    * jacobian,
                     "logistic" = dlogis(x = quantity)      * jacobian
  )
  if(log == TRUE){
    density <- log(density)
  }
  return(density)
}

pigt <- function(q,
                 mu         = 1.0,
                 lambda     = 1.0,
                 nu         = 1.0,
                 kernel     = "normal",
                 lower.tail = TRUE,
                 log.p      = FALSE){    
  cumulative  <- function(value, m, l, n, k){
    integration <- integrate(digt,
                             lower  = 0,
                             upper  = value,
                             mu     = m,
                             lambda = l,
                             nu     = n,
                             kernel = k)$value
    return(integration)
  }
  cdf <- switch(kernel,
                "normal"   = mapply(cumulative, q, l = lambda, m = mu, 
                                    n = 1.0, k = "normal"),
                "t"        = mapply(cumulative, q, l = lambda, m = mu, 
                                    n = nu,  k = "t"),
                "laplace"  = mapply(cumulative, q, l = lambda, m = mu, 
                                    n = 1.0, k = "laplace"),
                "logistic" = mapply(cumulative, q, l = lambda, m = mu, 
                                    n = 1.0, k = "logistic")
  )
  if(lower.tail == FALSE){
    cdf <- (1 - cdf)
  }
  if(log.p == TRUE){
    cdf <- log(cdf)
  }
  return(cdf)
}

mleigt <- function(x, kernel = "normal"){
  
  initials    <- mleig(x)
  thetaStart   <- c(initials$mu, initials$lambda)
  
  if(kernel == "normal"){
    
    logLik <- function(theta, x){
      
      sum(-digt(x, mu = theta[1], lambda = theta[2], nu = 1.0,
                kernel = "normal", log = TRUE))
    }
  }
  
  if(kernel == "logistic"){
    
    logLik <- function(theta, x){
      
      sum(-digt(x, mu = theta[1], lambda = theta[2], nu = 1.0,
                kernel = "logistic", log = TRUE))
    }
  }
  
  if(kernel == "laplace"){
    logLik <- function(theta, x){
      
      sum(-digt(x, mu = theta[1], lambda = theta[2], nu = 1.0,
                kernel = "laplace", log = TRUE))
    }
  }
  maximization <- nlm(f = logLik, p = thetaStart, x = x)
  estimates    <- maximization$estimate
  results      <- list(muEstimate     = estimates[1],
                       lambdaEstimate = estimates[2],
                       logLikelihood   = logLik(estimates, x))
  return(results)
}


mleig <- function(x){
  
  l         <- length(x) / sum((1 / x) - (1 / mean(x)))
  estimates <- list(muEstimate     = mean(x), 
                    lambdaEstimate = l)
  return(estimates)
}


ksig <- function(x){
  x  <- sort(x)
  estimates <- mleigt(x, kernel = "normal")
  a         <- estimates$muEstimate
  b         <- estimates$lambdaEstimate
  v         <- pigt(x, mu = a, lambda = b, nu = 1.0, kernel = "normal")
  y         <- qnorm(v)
  y         <- y[is.finite(y) == TRUE]
  u         <- (y - mean(y))/sd(y)
  ks        <- lillie.test(u)  
  return(ks$p.value)}

ppig <- function(x, rho = 0.05,
                 line   = FALSE,
                 xLabel = "Theoretical CDF",
                 yLabel = "Empirical CDF"){
  n         <- length(x)
  estimates <- mleigt(x, kernel = "normal")
  a         <- estimates$muEstimate
  b         <- estimates$lambdaEstimate
  cdf       <- pigt(x, mu = a, lambda = b, nu = 1.0, kernel = "normal")
  y         <- qnorm(cdf)
  y         <- y[is.finite(y) == TRUE]
  u         <- (y - mean(y))/sd(y)
  p           <- pnorm((u - mean(u))/sd(u))
  teoprob   <- sort(p)
  k         <- seq(1, n, by = 1)
  empprob   <- (k - 0.5) / n
  D         <- approx.ksD(n, rho)  
  plot(teoprob ~ empprob, las = 1,
       xlab = xLabel,
       ylab = yLabel,
       col  = 1,
       xlim = c(0, 1),
       ylim = c(0, 1),
       pch = 20)
  lines(c(0, 1), c(0, 1), lwd = 1.5, lty = 2, col = "gray70")
  
  Dplus <- max(seq(1:n)/n - p)
  Dminus <- max(p - (seq(1:n) - 1)/n)
  K <- max(Dplus, Dminus)
  if (n <= 100) {
    Kd <- K
    nd <- n
  }
  else {
    Kd <- K * ((n/100)^0.49)
    nd <- 100
  }
  
  pvalue <- exp(-7.01256 * Kd^2 * (nd + 2.78019) + 2.99587 * 
                  Kd * sqrt(nd + 2.78019) - 0.122119 + 0.974598/sqrt(nd) + 
                  1.67997/nd)
  logp <- log(rho)
  if (pvalue < 0.10){
    DD <-  polyroot( c( - logp - 0.122119 + 0.974598/sqrt(nd) + 1.67997/nd, 
                        2.99587 * sqrt(nd + 2.78019), 
                        -7.01256 * (nd + 2.78019) ) ) 
    r <- Re(DD)
    DD <- min(r[r>0])
    if (n <= 100) D <- DD
    if (n > 100) D <- DD/(n/100)^0.49 }
  
  if(line){
    lines(c(0, 1), c(D-0.5/n, 1+D-0.5/n), lwd = 2, col = "gray66")
    lines(c(0, 1), c(-D+0.5/n, 1-D+0.5/n), lwd = 2, col = "gray66")
    legend("bottomright", 
           legend = substitute(list("KS p-value" == kspval), 
                               list(kspval=round(ksig(x),4))), bty = 'n')
  }
}

ppig(S5, line = TRUE)

Paso 3: Utilizar técnicas de bondad de ajuste

# ---------- PARA DISTRIBUCIÓN WEIBULL ---------- #

kswe1 <- function(x){
  x  <- sort(x)
  shape.w <- as.numeric(fitdistr(x, "weibull")$estimate[1])
  scale.w <- as.numeric(fitdistr(x, "weibull")$estimate[2])
  v       <- pweibull(x, shape = shape.w, scale = scale.w)
  y       <- qnorm(v)
  y       <- y[is.finite(y) == TRUE]
  u       <- (y - mean(y))/sd(y)
  ks      <- lillie.test(u)  
  return(ks)
  }

kswe1(S5)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  u
## D = 0.097883, p-value = 0.001436

# ---------- PARA DISTRIBUCION LOG-NORMAL ---------- #

ksln1 <- function(x){
  x  <- sort(x)
  v         <- plnorm(x, mean(log(x)), sd(log(x)))
  y         <- qnorm(v)
  y         <- y[is.finite(y) == TRUE]
  u         <- (y - mean(y))/sd(y)
  ks        <- lillie.test(u)  
  return(ks)
  }

ksln1(S5)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  u
## D = 0.062046, p-value = 0.1765

# ---------- PARA DISTRIBUCIÓN GAMMA ---------- #

ksga1 <- function(x){
  x  <- sort(x)
  a    <- as.numeric(fitdistr(x, "gamma")$estimate[1])
  b    <- as.numeric(fitdistr(x, "gamma")$estimate[2])    
  v    <- pgamma(x, a, b)
  y    <- qnorm(v)
  y    <- y[is.finite(y) == TRUE]
  u    <- (y - mean(y))/sd(y)
  ks   <- lillie.test(u)  
  return(ks)
}

ksga1(S5)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  u
## D = 0.071301, p-value = 0.06302

# ---- PARA DISTRIBUCIÓN GAUSSIANA INVERSA ---------- #

ksig1 <- function(x){
  x  <- sort(x)
  estimates <- mleigt(x, kernel = "normal")
  a         <- estimates$muEstimate
  b         <- estimates$lambdaEstimate
  v         <- pigt(x, mu = a, lambda = b, nu = 1.0, kernel = "normal")
  y         <- qnorm(v)
  y         <- y[is.finite(y) == TRUE]
  u         <- (y - mean(y))/sd(y)
  ks        <- lillie.test(u)  
  return(ks)
  }

ksig1(S5)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  u
## D = 0.057427, p-value = 0.272

Paso 4: Selección del modelo de probabilidad

Criterio de información Akaike (para variedad de modelos)

comparacion_aic <- AIC(mlweibull(S5),
                       mllnorm(S5),
                       mlgamma(S5),
                       mlinvgauss(S5)) 
comparacion_aic %>% 
  rownames_to_column(var = "distribucion") %>% 
  arrange(AIC)

##     distribucion df      AIC
## 1 mlinvgauss(S5)  2 1947.814
## 2    mllnorm(S5)  2 1949.340
## 3    mlgamma(S5)  2 1954.401
## 4  mlweibull(S5)  2 1968.234

Histograma de los datos y modelo Gaussiano Inverso

datos <- as.data.frame(S5)
hist(datos$S5,
     main = "",
     freq = FALSE,
     xlab = "S5",
     ylim = c(0,0.0030))
lines(mlinvgauss(datos$S5), lwd = 2, lty = 1, col = "blue")
legend(x = 15000, y = 0.0001, legend = c("lnorm", "invgauss"),
       col = c("blue", "red"), lty = 1:2)
rug(datos$S5)

Estimación de parámetros por máxima verosimilitud

mlinvgauss(S5)

## Maximum likelihood estimates for the Inverse Gaussian model 
##   mean   shape  
##  402.4  1552.6

Transformaciones de datos

nicotina <-c(0.755, 0.77 , 0.775, 0.782, 0.787, 0.793, 0.797, 0.802, 0.806,                  0.814, 0.756, 0.77 , 0.776, 0.782, 0.788, 0.793, 0.798, 0.802,                  0.807, 0.815, 0.76 , 0.77 , 0.777, 0.782, 0.789, 0.793, 0.799,                  0.803, 0.807, 0.815, 0.763, 0.771, 0.778, 0.782,   0.789, 0.795,                    0.799, 0.803, 0.807, 0.817, 0.764, 0.771, 0.778, 0.782,    0.79,                    0.795, 0.8  , 0.803, 0.808, 0.817, 0.765, 0.772,   0.779, 0.783,                     0.79, 0.795, 0.801,   0.804, 0.808,    0.82, 0.767, 0.772, 0.779,                  0.783, 0.79 , 0.795, 0.801, 0.804, 0.808, 0.823, 0.768, 0.773,                   0.78, 0.785, 0.791,   0.795, 0.801,   0.804, 0.809,   0.823, 0.768,                  0.774,    0.78, 0.786, 0.792, 0.796, 0.802, 0.804, 0.81 , 0.824,
             0.768, 0.774, 0.781, 0.787, 0.792, 0.797, 0.802, 0.805, 0.812,                  0.826)

hist(nicotina, freq = FALSE, main = "", ylim = c(0, 31), xlab = "nicotina", col = 4, border = "blue1")
curve(dnorm(x, mean(nicotina), sd(nicotina)), add = T, type = 'l', xlab = 'x', ylab = 'f(x)', lty = 3, col = 2, lwd = 2)
legend(0.8, 32, c("Z ~ Normal"), lty = 3, col = 2, lwd = 2)

Gráficos de bondad de ajuste

qqnorm(nicotina)
qqline(nicotina, col = 2)

Test de bondad de ajuste para normalidad

shapiro.test(nicotina)

## 
##  Shapiro-Wilk normality test
## 
## data:  nicotina
## W = 0.98414, p-value = 0.275

lillie.test(nicotina)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  nicotina
## D = 0.070681, p-value = 0.2527

cvm.test(nicotina)

## 
##  Cramer-von Mises normality test
## 
## data:  nicotina
## W = 0.079197, p-value = 0.2098

TRANSFORMACIONES

Datos

w <- rexp(100, 1)
hist(w, main = "", col = 2)

shapiro.test(w)

## 
##  Shapiro-Wilk normality test
## 
## data:  w
## W = 0.76107, p-value = 1.856e-11

lillie.test(w)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  w
## D = 0.15807, p-value = 1.821e-06

cvm.test(w)

## 
##  Cramer-von Mises normality test
## 
## data:  w
## W = 0.67038, p-value = 9.839e-08

T. Logarítmica

transf1 <- log(w)
shapiro.test(transf1)

## 
##  Shapiro-Wilk normality test
## 
## data:  transf1
## W = 0.91536, p-value = 8.051e-06

T. Raíz Cuadrada

transf2 <- sqrt(w)
shapiro.test(transf2)

## 
##  Shapiro-Wilk normality test
## 
## data:  transf2
## W = 0.9625, p-value = 0.006083

T. Box-Cox

lbda   <- BoxCox.lambda(w, method = "loglik")
transf3 <- BoxCox(w, lbda)
shapiro.test(transf3)

## 
##  Shapiro-Wilk normality test
## 
## data:  transf3
## W = 0.98491, p-value = 0.313

Carta Xbarra y R

muestra <- rep(1:40, each = 5, len = 200)
muestra

##   [1]  1  1  1  1  1  2  2  2  2  2  3  3  3  3  3  4  4  4  4  4  5  5  5  5  5
##  [26]  6  6  6  6  6  7  7  7  7  7  8  8  8  8  8  9  9  9  9  9 10 10 10 10 10
##  [51] 11 11 11 11 11 12 12 12 12 12 13 13 13 13 13 14 14 14 14 14 15 15 15 15 15
##  [76] 16 16 16 16 16 17 17 17 17 17 18 18 18 18 18 19 19 19 19 19 20 20 20 20 20
## [101] 21 21 21 21 21 22 22 22 22 22 23 23 23 23 23 24 24 24 24 24 25 25 25 25 25
## [126] 26 26 26 26 26 27 27 27 27 27 28 28 28 28 28 29 29 29 29 29 30 30 30 30 30
## [151] 31 31 31 31 31 32 32 32 32 32 33 33 33 33 33 34 34 34 34 34 35 35 35 35 35
## [176] 36 36 36 36 36 37 37 37 37 37 38 38 38 38 38 39 39 39 39 39 40 40 40 40 40

diametros <- c(74.030, 74.002, 74.019, 73.992, 74.008, 73.995, 73.992,                         74.001, 74.011, 74.004, 73.988, 74.024, 74.021, 74.005,                         74.002, 74.002, 73.996, 73.993, 74.015, 74.009, 73.992,                         74.007, 74.015, 73.989, 74.014, 74.009, 73.994, 73.997,                         73.985, 73.993, 73.995, 74.006, 73.994, 74.000, 74.005,                         73.985, 74.003, 73.993, 74.015, 73.988, 74.008, 73.995,                         74.009, 74.005, 74.004, 73.998, 74.000, 73.990, 74.007,                         73.995, 73.994, 73.998, 73.994, 73.995, 73.990, 74.004,                         74.000, 74.007, 74.000, 73.996, 73.983, 74.002, 73.998,                         73.997, 74.012, 74.006, 73.967, 73.994, 74.000, 73.984,                         74.012, 74.014, 73.998, 73.999, 74.007, 74.000, 73.984,                         74.005, 73.998, 73.996, 73.994, 74.012, 73.986, 74.005,                         74.007, 74.006, 74.010, 74.018, 74.003, 74.000, 73.984,                         74.002, 74.003, 74.005, 73.997, 74.000, 74.010, 74.013,                         74.020, 74.003, 73.988, 74.001, 74.009, 74.005, 73.996,                         74.004, 73.999, 73.990, 74.006, 74.009, 74.010, 73.989,                         73.990, 74.009, 74.014, 74.015, 74.008, 73.993, 74.000,
               74.010, 73.982, 73.984, 73.995, 74.017, 74.013, 74.012,                         74.015, 74.030, 73.986, 74.000, 73.995, 74.010, 73.990,                         74.015, 74.001, 73.987, 73.999, 73.985, 74.000, 73.990,                         74.008, 74.010, 74.003, 73.991, 74.006, 74.003, 74.000,                         74.001, 73.986, 73.997, 73.994, 74.003, 74.015, 74.020,                         74.004, 74.008, 74.002, 74.018, 73.995, 74.005, 74.001,                         74.004, 73.990, 73.996, 73.998, 74.015, 74.000, 74.016,                         74.025, 74.000, 74.030, 74.005, 74.000, 74.016, 74.012,                         74.001, 73.990, 73.995, 74.010, 74.024, 74.015, 74.020,                         74.024, 74.005, 74.019, 74.035, 74.010, 74.012, 74.015,                         74.026, 74.017, 74.013, 74.036, 74.025, 74.026, 74.010,
               74.005, 74.029, 74.000, 74.020)

dia <- qcc.groups(diametros, muestra)
dia

##      [,1]   [,2]   [,3]   [,4]   [,5]
## 1  74.030 74.002 74.019 73.992 74.008
## 2  73.995 73.992 74.001 74.011 74.004
## 3  73.988 74.024 74.021 74.005 74.002
## 4  74.002 73.996 73.993 74.015 74.009
## 5  73.992 74.007 74.015 73.989 74.014
## 6  74.009 73.994 73.997 73.985 73.993
## 7  73.995 74.006 73.994 74.000 74.005
## 8  73.985 74.003 73.993 74.015 73.988
## 9  74.008 73.995 74.009 74.005 74.004
## 10 73.998 74.000 73.990 74.007 73.995
## 11 73.994 73.998 73.994 73.995 73.990
## 12 74.004 74.000 74.007 74.000 73.996
## 13 73.983 74.002 73.998 73.997 74.012
## 14 74.006 73.967 73.994 74.000 73.984
## 15 74.012 74.014 73.998 73.999 74.007
## 16 74.000 73.984 74.005 73.998 73.996
## 17 73.994 74.012 73.986 74.005 74.007
## 18 74.006 74.010 74.018 74.003 74.000
## 19 73.984 74.002 74.003 74.005 73.997
## 20 74.000 74.010 74.013 74.020 74.003
## 21 73.988 74.001 74.009 74.005 73.996
## 22 74.004 73.999 73.990 74.006 74.009
## 23 74.010 73.989 73.990 74.009 74.014
## 24 74.015 74.008 73.993 74.000 74.010
## 25 73.982 73.984 73.995 74.017 74.013
## 26 74.012 74.015 74.030 73.986 74.000
## 27 73.995 74.010 73.990 74.015 74.001
## 28 73.987 73.999 73.985 74.000 73.990
## 29 74.008 74.010 74.003 73.991 74.006
## 30 74.003 74.000 74.001 73.986 73.997
## 31 73.994 74.003 74.015 74.020 74.004
## 32 74.008 74.002 74.018 73.995 74.005
## 33 74.001 74.004 73.990 73.996 73.998
## 34 74.015 74.000 74.016 74.025 74.000
## 35 74.030 74.005 74.000 74.016 74.012
## 36 74.001 73.990 73.995 74.010 74.024
## 37 74.015 74.020 74.024 74.005 74.019
## 38 74.035 74.010 74.012 74.015 74.026
## 39 74.017 74.013 74.036 74.025 74.026
## 40 74.010 74.005 74.029 74.000 74.020

Carta R

xrango <- qcc(dia[1:25,], type = "R")

Carta Xbarra

xbarra <- qcc(dia[1:25,], type = "xbar")

Capacidad del proceso

process.capability(xbarra, spec.limits = c(73.95, 74.05), confidence.level = 0.95)

## 
## Process Capability Analysis
## 
## Call:
## process.capability(object = xbarra, spec.limits = c(73.95, 74.05),     confidence.level = 0.95)
## 
## Number of obs = 125          Target = 74
##        Center = 74              LSL = 73.95
##        StdDev = 0.009785        USL = 74.05
## 
## Capability indices:
## 
##       Value   2.5%  97.5%
## Cp    1.703  1.491  1.915
## Cp_l  1.743  1.555  1.932
## Cp_u  1.663  1.483  1.844
## Cp_k  1.663  1.448  1.878
## Cpm   1.691  1.480  1.902
## 
## Exp<LSL 0%    Obs<LSL 0%
## Exp>USL 0%    Obs>USL 0%

Carta de obs. individuales, medias móviles y rangos móviles

cloro <-c(49.1336,
          48.7158,
          47.4405,
          48.2146,
          50.3580,
          51.1769,
          51.6364,
          52.0311,
          50.5666,
          51.3640,
          49.6251,
          47.2201,
          49.1065,
          48.7078,
          51.2428,
          51.7740,
          51.6988,
          48.7881,
          51.7291,
          50.2267,
          50.2299,
          50.3402,
          50.2523,
          51.4223)

Carta de obs. individuales

uno <- qcc(cloro, type = "xbar.one")

Carta Rangos móviles

rmovil<-c(0.4178,
          1.2753,
          0.7741,
          2.1434,
          0.8189,
          0.4595,
          0.3947,
          1.4645,
          0.7974,
          1.7389,
          2.4050,
          1.8864,
          0.3987,
          2.5350,
          0.5312,
          0.0752,
          2.9107,
          2.9410,
          1.5024,
          0.0032,
          0.1103,
          0.0879,
          1.1700)

mmovil  <- as.vector(rollmean(cloro, k = 2, fill = NA))
mmovil2 <- mmovil[-length(cloro)]

LCS = 3.267*mean(rmovil)
LCI = 0*mean(rmovil)

plot(rmovil, type = "o", cex = 1, pch = 20, ylim = c(LCI, LCS+1), main = "", xlab = "subgrupo", ylab = "rango móvil")
abline(h = LCS, col = "blue", lty = 2, lwd = 2)
text(length(mmovil)-2, LCS + 0.1, "LCS", lwd = 2, col = "blue", lwd = 2, adj = c(0, -.1))
abline(h = mean(rmovil), col = "blue", lwd = 2)
text(length(mmovil)-1.5, mean(rmovil)+0.1, "LC", col = "blue", lwd = 2,  adj = c(0, -.1))
abline(h = LCI, col = "blue", lty = 2, lwd = 2)
text(length(mmovil)-1.7, LCI + 0.1, "LCI", lwd = 2, col = "blue", adj = c(0, -.1))

Carta Medias móviles

mmovil  <- as.vector(rollmean(cloro, k = 2, fill = NA))
mmovil2 <- mmovil[-length(cloro)]

LCS = mean(mmovil2) + 1.880*mean(rmovil)
LCI = mean(mmovil2) - 1.880*mean(rmovil) 

plot(mmovil2, type = "o", cex = 1, pch = 20, ylim = c(LCI-1, LCS+1), main = "", xlab = "subgrupo", ylab = "media móvil")
abline(h = mean(mmovil2) + 1.880*mean(rmovil), col = "blue", lty = 2, lwd = 2)
text(length(mmovil2)-1, LCS + 0.1, "LCS", lwd = 2, col = "blue", lwd = 2, adj = c(0, -.1))
abline(h = mean(mmovil2),col = "blue", lwd = 2)
text(length(mmovil2)-0.5, mean(mmovil2)+0.1, "LC", col = "blue", lwd = 2,  adj = c(0, -.1))
abline(h = LCI, col = "blue", lty = 2, lwd = 2)
text(length(mmovil2)-1, LCI + 0.1, "LCI", lwd = 2, col = "blue", adj = c(0, -.1))

Carta p

nn    <- c(50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 48,                 50, 50, 49, 50, 48, 48, 50, 50, 50, 50, 50, 50, 50, 50)
disc  <- c(12, 15,  8, 10,  4,  7, 16,  9, 14, 10,  5,  6, 17, 12, 22, 8,                  10,  5, 13, 11, 20, 18, 24, 15,  9, 12,  7, 13,  9, 6)
grap <- qcc(disc, type = "p", sizes = nn, rules = shewhart.rules)

La Carta p nos permite identificar proporción de individuos defectuosos en el proceso. Supuesto distribucional \(X_{i} \sim Binomial (n_{i}, p_{i})\)

Carta np

nn2   <- rep(50, 30)
granp <- qcc(disc, type = "np", sizes = nn2, rules = shewhart.rules)

La Carta np nos permite identificar número de individuos defectuosos en el proceso. Supuesto distribucional \(X \sim Binomial (n, p)\)