diseño

ejercicio 1

Presion=gl(4, 1, 4, paste0('p_', c(0.05, 0.10, 0.20, 0.25)))
t_factura=c(0.88, 0.92, 0.95, 0.98, 0.87, 0.94, 0.94, 0.99, 0.89, 0.93, 0.95, 0.98)
df1=data.frame(Presion, t_factura)
df1

##    Presion t_factura
## 1   p_0.05      0.88
## 2    p_0.1      0.92
## 3    p_0.2      0.95
## 4   p_0.25      0.98
## 5   p_0.05      0.87
## 6    p_0.1      0.94
## 7    p_0.2      0.94
## 8   p_0.25      0.99
## 9   p_0.05      0.89
## 10   p_0.1      0.93
## 11   p_0.2      0.95
## 12  p_0.25      0.98

library(collapsibleTree)

## Warning: package 'collapsibleTree' was built under R version 4.2.2

collapsibleTreeSummary(df1,
                       c('Presion',
                         't_factura'),
                          collapsed = FALSE)

library(ggplot2)

ggplot(df1, aes(x = Presion, y = t_factura, fill = Presion)) +
  stat_summary(fun = mean, geom = "bar")

ggplot(df1, aes(x = Presion, y = t_factura, fill = Presion)) +
  geom_boxplot()

mod1=aov(t_factura~Presion, df1)
summary(mod1)

##             Df   Sum Sq  Mean Sq F value  Pr(>F)    
## Presion      3 0.016567 0.005522   82.83 2.3e-06 ***
## Residuals    8 0.000533 0.000067                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

al menos una de las presiones tiene un efecto en el tiempo de factura

shapiro.test(mod1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod1$residuals
## W = 0.94102, p-value = 0.5114

hay normalidad de residuos ya que pvalor>0.05

bartlett.test(mod1$residuals, df1$Presion)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod1$residuals and df1$Presion
## Bartlett's K-squared = 0.95233, df = 3, p-value = 0.8128

se cumple supuesto de homocedasticidad ya que pvalor>0.05

pt1=TukeyHSD(mod1)
pt1

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = t_factura ~ Presion, data = df1)
## 
## $Presion
##                     diff          lwr        upr     p adj
## p_0.1-p_0.05  0.05000000  0.028650987 0.07134901 0.0003178
## p_0.2-p_0.05  0.06666667  0.045317653 0.08801568 0.0000396
## p_0.25-p_0.05 0.10333333  0.081984320 0.12468235 0.0000014
## p_0.2-p_0.1   0.01666667 -0.004682347 0.03801568 0.1344163
## p_0.25-p_0.1  0.05333333  0.031984320 0.07468235 0.0002012
## p_0.25-p_0.2  0.03666667  0.015317653 0.05801568 0.0025538

Existe diferencias significativas entre todos los tratamientos exepto en los de presion 0.1 y 0.2

library(outliers)
grubbs.test(df1$t_factura)

## 
##  Grubbs test for one outlier
## 
## data:  df1$t_factura
## G = 1.64859, U = 0.73046, p-value = 0.5021
## alternative hypothesis: lowest value 0.87 is an outlier

No se detectaron valores atipicos en los datos

ejercicio 2

Catalizador = c("A","B","C","D")
Rendimiento = c(60,65,69,58,63,67,66,63,62,70,73,61,61,68,68,63,63,66,66,62,62,65,67,65)
df2 = data.frame(Catalizador, Rendimiento)
df2

##    Catalizador Rendimiento
## 1            A          60
## 2            B          65
## 3            C          69
## 4            D          58
## 5            A          63
## 6            B          67
## 7            C          66
## 8            D          63
## 9            A          62
## 10           B          70
## 11           C          73
## 12           D          61
## 13           A          61
## 14           B          68
## 15           C          68
## 16           D          63
## 17           A          63
## 18           B          66
## 19           C          66
## 20           D          62
## 21           A          62
## 22           B          65
## 23           C          67
## 24           D          65

collapsibleTreeSummary(df2,
                       c('Catalizador',
                         'Rendimiento'),
                          collapsed = FALSE)

ggplot(df2, aes(x = Catalizador, y = Rendimiento, fill = Catalizador)) +
  stat_summary(fun = mean, geom = "bar")

ggplot(df1, aes(x = Presion, y = t_factura, fill = Presion)) +
  geom_boxplot()

mod2=aov(Rendimiento~Catalizador, df2)
summary(mod2)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Catalizador  3  192.5   64.15    14.5 3.01e-05 ***
## Residuals   20   88.5    4.43                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

al menos hay un catalizador que tiene efecto en el rendimiento

shapiro.test(mod2$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod2$residuals
## W = 0.96494, p-value = 0.5454

bartlett.test(mod2$residuals, df2$Catalizador)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod2$residuals and df2$Catalizador
## Bartlett's K-squared = 2.9844, df = 3, p-value = 0.394

pt2=TukeyHSD(mod2)
pt2

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Rendimiento ~ Catalizador, data = df2)
## 
## $Catalizador
##           diff       lwr       upr     p adj
## B-A  5.0000000  1.600704  8.399296 0.0027711
## C-A  6.3333333  2.934037  9.732629 0.0002282
## D-A  0.1666667 -3.232629  3.565963 0.9990452
## C-B  1.3333333 -2.065963  4.732629 0.6948686
## D-B -4.8333333 -8.232629 -1.434037 0.0037860
## D-C -6.1666667 -9.565963 -2.767371 0.0003110

grubbs.test(df2$Rendimiento)

## 
##  Grubbs test for one outlier
## 
## data:  df2$Rendimiento
## G = 2.37238, U = 0.74466, p-value = 0.1413
## alternative hypothesis: highest value 73 is an outlier

ejercicio 3

Sitio = c("Norte","Centro","Sur")
Duracion = c(96,85,80,90,88,76,85,82,78)
Bloque = gl(3, 3, 9,  paste0('B_', 1:3))
df3 = data.frame(Bloque,Sitio, Duracion)
df3

##   Bloque  Sitio Duracion
## 1    B_1  Norte       96
## 2    B_1 Centro       85
## 3    B_1    Sur       80
## 4    B_2  Norte       90
## 5    B_2 Centro       88
## 6    B_2    Sur       76
## 7    B_3  Norte       85
## 8    B_3 Centro       82
## 9    B_3    Sur       78

collapsibleTreeSummary(df3,
                       c('Sitio',
                         'Bloque','Duracion'),
                          collapsed = FALSE)

ggplot(df3, aes(x = Sitio, y = Duracion, fill = Sitio)) +
  stat_summary(fun = mean, geom = "bar")

library(lattice)
bwplot(Duracion ~ Sitio | Bloque, df3, xlab = "Sitio",
       ylab = "Duracion")

mod3 = aov(Duracion ~ Bloque + Sitio, df3)
summary(mod3)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## Bloque       2  42.89   21.44   1.959 0.2552  
## Sitio        2 229.56  114.78  10.487 0.0257 *
## Residuals    4  43.78   10.94                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

shapiro.test(mod3$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod3$residuals
## W = 0.84657, p-value = 0.0683

bartlett.test(mod3$residuals, df3$Sitio)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod3$residuals and df3$Sitio
## Bartlett's K-squared = 0.024485, df = 2, p-value = 0.9878

pt3=TukeyHSD(mod3)
pt3

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Duracion ~ Bloque + Sitio, data = df3)
## 
## $Bloque
##              diff       lwr      upr     p adj
## B_2-B_1 -2.333333 -11.96026 7.293591 0.6883491
## B_3-B_1 -5.333333 -14.96026 4.293591 0.2337058
## B_3-B_2 -3.000000 -12.62692 6.626924 0.5576971
## 
## $Sitio
##                    diff        lwr       upr     p adj
## Norte-Centro   5.333333  -4.293591 14.960258 0.2337058
## Sur-Centro    -7.000000 -16.626924  2.626924 0.1241404
## Sur-Norte    -12.333333 -21.960258 -2.706409 0.0223133

grubbs.test(df3$Duracion)

## 
##  Grubbs test for one outlier
## 
## data:  df3$Duracion
## G = 1.83798, U = 0.52495, p-value = 0.18
## alternative hypothesis: highest value 96 is an outlier

ejercicio 4

Region= gl(3, 3, 9, paste0('R_', 1:3))
Producto=  c("A","B","C")
Estacion= c("E_3","E_2","E_1","E_1","E_3","E_2","E_2","E_1","E_3")
Ventas= c(220,410,265,280,384,300,240,360,251)
df4=data.frame(Region,Producto,Estacion,Ventas)
df4

##   Region Producto Estacion Ventas
## 1    R_1        A      E_3    220
## 2    R_1        B      E_2    410
## 3    R_1        C      E_1    265
## 4    R_2        A      E_1    280
## 5    R_2        B      E_3    384
## 6    R_2        C      E_2    300
## 7    R_3        A      E_2    240
## 8    R_3        B      E_1    360
## 9    R_3        C      E_3    251

collapsibleTreeSummary(df4,
                       c('Producto',
                         'Region','Estacion','Ventas'),
                          collapsed = FALSE)

ggplot(df4, aes(x = Producto, y = Ventas, fill = Producto)) +
  stat_summary(fun = mean, geom = "bar")

bwplot(Ventas ~ Producto | Region + Estacion, df4, xlab = "Producto",
       ylab = "Ventas")

mod4 = aov(Ventas ~ Producto + Region + Estacion, df4)
summary(mod4)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## Producto     2  32380   16190  44.792 0.0218 *
## Region       2   2163    1081   2.992 0.2505  
## Estacion     2   1506     753   2.083 0.3244  
## Residuals    2    723     361                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

shapiro.test(mod4$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod4$residuals
## W = 0.70608, p-value = 0.001671

bartlett.test(mod4$residuals, df4$Producto)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod4$residuals and df4$Producto
## Bartlett's K-squared = 0, df = 2, p-value = 1

pt4= TukeyHSD(mod4)
pt4

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Ventas ~ Producto + Region + Estacion, data = df4)
## 
## $Producto
##           diff        lwr       upr     p adj
## B-A  138.00000   46.55795 229.44205 0.0225911
## C-A   25.33333  -66.10872 116.77538 0.4061820
## C-B -112.66667 -204.10872 -21.22462 0.0335115
## 
## $Region
##              diff        lwr       upr     p adj
## R_2-R_1  23.00000  -68.44205 114.44205 0.4534211
## R_3-R_1 -14.66667 -106.10872  76.77538 0.6707084
## R_3-R_2 -37.66667 -129.10872  53.77538 0.2365160
## 
## $Estacion
##              diff        lwr       upr     p adj
## E_2-E_1  15.00000  -76.44205 106.44205 0.6607232
## E_3-E_1 -16.66667 -108.10872  74.77538 0.6121017
## E_3-E_2 -31.66667 -123.10872  59.77538 0.3046323

grubbs.test(df4$Ventas)

## 
##  Grubbs test for one outlier
## 
## data:  df4$Ventas
## G = 1.60611, U = 0.63724, p-value = 0.3875
## alternative hypothesis: highest value 410 is an outlier

diseño

Danilo Patarroyo

2023-05-16

ejercicio 1

ejercicio 2

ejercicio 3

ejercicio 4