Pruebas de Hipótesis

En el presente analisis se prueba cual de las tres alternativas de mejora diseñada por el científico de datos se debe implementar.

#importar la base de datos de lo tiempos de fallas de las máquinas de un proceso de elaboración de galletas

arbol <-c(23.81,    22.13,  22.64,  21.69,  23.58,  22.14,  18.73,  21.59,
          20.36,    20.53,  20.11,  20.34,  19.19,  22.92,  18.65,  20.6,
          19.83,    20.09,  19.43,  22.06,  21.15,  19.26,  18.08,  20.24,
          18.75,    20.69,  21.62,  23.69,  23.93,  23.19)

redn <-c(23.24, 20.08,  18.01,  23.28,  19.23,  21.22,  21.47,  20.6,
         21.11, 21.27,  21.03,  17.34,  22.8,   21.85,  17.85,  23.15,
         19.57, 19.56,  20.79,  18.04,  20.95,  21.83,  18.17,  22.66,
         18.29, 18.89,  19.49,  19.19,  26.47,  25.25)

regresion <-c(16.13,    17.84,  18.28,  15.61,  17.62,  16.12,  17.29,  16.13,
              16.64,    15.03,  18.16,  16.82,  17.44,  16.76,  17.26,  15.55,
              17.49,    18.42,  17.54,  17.13,  15.5,    16.8,  18.47,  18.42, 
              18.43,    15.56,  16.03,  15.39,  15.12, 17.77)

actual <-c(17.09,   15.77,  18.45,  16.55,  22.23,  22.11,  18.26,  18.04,  
           19.66,   19.76,  18.74,  19.02,  18.54,  16.7,   17.57,  19.89,
           19.06,   18.7,   19.39,  19.68,  19.2,   16.85,  19.91,  19.82,  18.08,
           19.38,   20.3,   21.6,   23.39,  19.33)


data2  <- data.frame(ÁrbolC =arbol, Red_N = redn, Regresion = regresion, SActual = actual)

#Resumen Numérico
numSummary(data2[,c("ÁrbolC", "Red_N", "Regresion", "SActual"), drop=FALSE], statistics=c("mean", "sd", "IQR","quantiles"), quantiles=c(0,.25,.5,.75,1))
##               mean       sd    IQR    0%     25%    50%     75%  100%  n
## ÁrbolC    21.03400 1.705609 2.2425 18.08 19.8950 20.645 22.1375 23.93 30
## Red_N     20.75600 2.224344 2.6450 17.34 19.2000 20.870 21.8450 26.47 30
## Regresion 16.89167 1.103312 1.6800 15.03 16.0525 16.975 17.7325 18.47 30
## SActual   19.10233 1.729648 1.6800 15.77 18.1250 19.130 19.8050 23.39 30
#La media de tiempo actual es 19.10233

#Pruebas de Hipótesis para árbol de Clasificación
with(data2, (t.test(ÁrbolC, alternative='less', mu=19.10233, conf.level=.95)))
## 
##  One Sample t-test
## 
## data:  ÁrbolC
## t = 6.2032, df = 29, p-value = 1
## alternative hypothesis: true mean is less than 19.10233
## 95 percent confidence interval:
##      -Inf 21.56311
## sample estimates:
## mean of x 
##    21.034
#H0 mu = 19.10233
#H1 mu < 19.10233
#Se rechaza la h1, el tiempo no es menor a 19.10 
with(data2, (t.test(ÁrbolC, alternative='greater', mu=19.10233, conf.level=.95)))
## 
##  One Sample t-test
## 
## data:  ÁrbolC
## t = 6.2032, df = 29, p-value = 4.568e-07
## alternative hypothesis: true mean is greater than 19.10233
## 95 percent confidence interval:
##  20.50489      Inf
## sample estimates:
## mean of x 
##    21.034
#H0 mu = 19.10233
#H1 mu > 19.10233
#Se acepta la H1, el tiempo es mayor 19.10 


#Pruebas de Hipótesis para Redes Neuronales
with(data2, (t.test(Red_N, alternative='less', mu=19.10233, conf.level=.95)))
## 
##  One Sample t-test
## 
## data:  Red_N
## t = 4.072, df = 29, p-value = 0.9998
## alternative hypothesis: true mean is less than 19.10233
## 95 percent confidence interval:
##      -Inf 21.44603
## sample estimates:
## mean of x 
##    20.756
#H0 mu = 19.10233
#H1 mu < 19.10233
#Se rechaza la h1, el tiempo no es menor a 19.10 
with(data2, (t.test(Red_N, alternative='greater', mu=19.10233, conf.level=.95)))
## 
##  One Sample t-test
## 
## data:  Red_N
## t = 4.072, df = 29, p-value = 0.0001645
## alternative hypothesis: true mean is greater than 19.10233
## 95 percent confidence interval:
##  20.06597      Inf
## sample estimates:
## mean of x 
##    20.756
#H0 mu = 19.10233
#H1 mu > 19.10233
#Se acepta la H1, el tiempo es mayor 19.10 

#Pruebas de Hipótesis para Métodos de Regresión
with(data2, (t.test(Regresion, alternative='less', mu=19.10233, conf.level=.95)))
## 
##  One Sample t-test
## 
## data:  Regresion
## t = -10.975, df = 29, p-value = 3.842e-12
## alternative hypothesis: true mean is less than 19.10233
## 95 percent confidence interval:
##      -Inf 17.23393
## sample estimates:
## mean of x 
##  16.89167
#H0 mu = 19.10233
#H1 mu < 19.10233
#Se acepta la h1, el tiempo es menor a 19.10 
with(data2, (t.test(Regresion, alternative='greater', mu=19.10233, conf.level=.95)))
## 
##  One Sample t-test
## 
## data:  Regresion
## t = -10.975, df = 29, p-value = 1
## alternative hypothesis: true mean is greater than 19.10233
## 95 percent confidence interval:
##  16.5494     Inf
## sample estimates:
## mean of x 
##  16.89167
#H0 mu = 19.10233
#H1 mu > 19.10233
#Se rechaza la H1, el tiempo no es mayor 19.10 

Conclusión: El científico debería implementar el método de Regresión, ya que es el que tiene tiempo promedio más bajo y es estadísticamente mejor que las otras alternativas.