El ANOVA consiste en el análisis de las diferencias entre medias de tratamientos. Separa la cariación total en la variabilidad debida a los tartamientos y la variabilidad debida a el error.
factor con dos o más niveles -> hipótesis -> ANOVA
La variable respuesta se distribuye normal
Mediciones independientes entre sí
Homogeneidad de varianza
Predomina la variabilidad debida a los tratamientos -> los tratamientos tienen efecto = las medias son diferentes
La variabilidad debida a los tratamientos contribuye igual o menos que el error -> las medias son iguales
set.seed(123)
# 10 muestras
# 15 granos
# 0:entero, 1:partido
df = replicate(10, round(runif(15, 0, 1)))
# Conteo partidos por muestra
part_col = colSums(df); part_col## [1] 9 10 5 5 8 6 6 9 8 6
# Variabilidad de grano partido
var(part_col) # Variabilidad ENTRE-columnas## [1] 3.288889
# Varianza por columna
var_col = apply(df, 2, var); var_col## [1] 0.2571429 0.2380952 0.2380952 0.2380952 0.2666667 0.2571429 0.2571429
## [8] 0.2571429 0.2666667 0.2571429
sum(var_col) # Variabilidad INTRA-columnas## [1] 2.533333
# 200 simulaciones
# 4 tiempos (columnas)
# 15 muestras (filas)
set.seed(123)
nsim = 2000
# porcen = replicate(200, replicate(4, runif(15,11,14))) # Forma 1
# porcen = array(round(runif(12000,11,14)), c(15, 4, 200)) # Forma 2
porcen = replicate(nsim, cbind(
round(runif(15, 9, 11)),
round(runif(15, 11, 13)),
round(runif(15, 13, 14)),
round(runif(15, 14, 16))
))
class(porcen)## [1] "array"
dim(porcen)## [1] 15 4 2000
Cociente >1: los tratamientos causan la variabilidad
Cociente <1: las respeticiones causan la variabilidad
medias_col = matrix(NA, nsim, 4)
var_muestras = NULL
var_col = NULL
for(i in seq(nsim)){
medias_col[i,] = colMeans(porcen[,,i])
var_muestras[i] = sum(apply(porcen[,,i], 2, var))
var_col[i] = var(medias_col[i,])
}
head(medias_col)## [,1] [,2] [,3] [,4]
## [1,] 10.200000 12.06667 13.33333 14.86667
## [2,] 10.200000 11.93333 13.40000 15.13333
## [3,] 9.866667 11.86667 13.26667 15.00000
## [4,] 10.333333 11.86667 13.46667 15.00000
## [5,] 10.000000 12.20000 13.40000 14.86667
## [6,] 9.866667 12.06667 13.53333 15.06667
head(var_col)## [1] 3.906296 4.414815 4.724444 4.056296 4.232222 4.902222
head(var_muestras)## [1] 1.885714 1.904762 1.600000 1.342857 1.838095 1.952381
f_cociente = var_col/var_muestras
head(f_cociente)## [1] 2.071521 2.317778 2.952778 3.020646 2.302504 2.510894
hist(f_cociente, breaks = 30, probability = T, xlim = c(0,6),col = "#CAFF70", border = "#BCEE68")
abline(v = 1, col = '#BCEE68')
curve(df(x, 4-1, 15-1-2), add = T)curve(df(x, 3, 12), xlim = c(0, 7), col= '#838B8B')
x = seq(1,6, 0.01)
y = df(x, 3, 12)
polygon(c(x,6,1), c(y,0,0), col='#FFF68F')
abline(v=1, col = 'darkorange', lty = 2)
text(1.5, 0.1, round(pf(1, 3, 12, lower.tail = F),2))
x = seq(qf(0.95,3,12),6, 0.01)
y = df(x, 3, 12)
polygon(c(x,6,qf(0.95,3,12)), c(y,0,0), col='darkolivegreen2')
abline(v = qf(0.95,3,12), col='#76EE00', lty = 2)
text(3.5, 0.02, expression(alpha), pos = 4)
text(2, 0.6, paste0('Zona donde\nlas diferencias\nse atribuyen\na las muestras'))
text(5, 0.6, paste0('Zona donde\nlas diferencias\nse atribuyen\na los tratamientos'))
arrows(6.41, 0.15, 6.41, 0)
text(6.41, 0.15, 'F = 6.41')
text(6, 0.3, 'p-value:')
arrows(7, 0.3, 6.41, 0.3, angle = 90)
arrows(6.41, 0.3, 7, 0.3,)# RESPUESTA: Porcentaje de Arroz Partido
set.seed(123)
pap = c(
rnorm(15, 9, 0.5),
rnorm(15, 9.2, 0.5),
rnorm(15, 9.4, 0.5),
rnorm(15, 9.6, 0.5)
)
# FACTOR: Tiempo en el microondas
tiempo = gl(4, 15, 60, c('5seg','10seg','15seg','20seg'))
df1 = data.frame(tiempo, pap)
df1## tiempo pap
## 1 5seg 8.719762
## 2 5seg 8.884911
## 3 5seg 9.779354
## 4 5seg 9.035254
## 5 5seg 9.064644
## 6 5seg 9.857532
## 7 5seg 9.230458
## 8 5seg 8.367469
## 9 5seg 8.656574
## 10 5seg 8.777169
## 11 5seg 9.612041
## 12 5seg 9.179907
## 13 5seg 9.200386
## 14 5seg 9.055341
## 15 5seg 8.722079
## 16 10seg 10.093457
## 17 10seg 9.448925
## 18 10seg 8.216691
## 19 10seg 9.550678
## 20 10seg 8.963604
## 21 10seg 8.666088
## 22 10seg 9.091013
## 23 10seg 8.686998
## 24 10seg 8.835554
## 25 10seg 8.887480
## 26 10seg 8.356653
## 27 10seg 9.618894
## 28 10seg 9.276687
## 29 10seg 8.630932
## 30 10seg 9.826907
## 31 15seg 9.613232
## 32 15seg 9.252464
## 33 15seg 9.847563
## 34 15seg 9.839067
## 35 15seg 9.810791
## 36 15seg 9.744320
## 37 15seg 9.676959
## 38 15seg 9.369044
## 39 15seg 9.247019
## 40 15seg 9.209764
## 41 15seg 9.052647
## 42 15seg 9.296041
## 43 15seg 8.767302
## 44 15seg 10.484478
## 45 15seg 10.003981
## 46 20seg 9.038446
## 47 20seg 9.398558
## 48 20seg 9.366672
## 49 20seg 9.989983
## 50 20seg 9.558315
## 51 20seg 9.726659
## 52 20seg 9.585727
## 53 20seg 9.578565
## 54 20seg 10.284301
## 55 20seg 9.487115
## 56 20seg 10.358235
## 57 20seg 8.825624
## 58 20seg 9.892307
## 59 20seg 9.661927
## 60 20seg 9.707971
boxplot(df1$pap ~ df1$tiempo, col= c('#CAFF70','#BCEE68','#A2CD5A','#6E8B3D'))# Analysis Of Variance
mod1 = aov(pap ~ tiempo, df1)
summary(mod1)## Df Sum Sq Mean Sq F value Pr(>F)
## tiempo 3 3.995 1.3317 6.41 0.000824 ***
## Residuals 56 11.633 0.2077
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hipotesis
\[H_0: \mu_{5seg}=\mu_{10seg}=\mu_{15seg}=\mu_{20seg}\\ H_a: H_0\text{ falsa}\]
Si el \(Pvalue < 0.05\) rechazo \(H_0\)
DE_231121.xlsx pestaña EXPERIMENTO 1DE_231121 <- read_excel("E:/Diseno de experimentos/Clases 2021-2/clases/DE_231121.xlsx",
sheet = "EXPERIMENTO 1")
head(DE_231121)## # A tibble: 6 x 2
## rto dosis
## <dbl> <dbl>
## 1 1.77 0
## 2 1.97 0
## 3 2.08 0
## 4 2.12 0
## 5 2.13 0
## 6 2.41 0
mean_var <- DE_231121 %>%
group_by(dosis) %>%
summarize(media_col=mean(rto),
var_col= var(rto))
mean_var## # A tibble: 5 x 3
## dosis media_col var_col
## <dbl> <dbl> <dbl>
## 1 0 2.31 0.111
## 2 5 2.83 0.00210
## 3 10 3.00 0.00569
## 4 15 3.18 0.00493
## 5 20 3.60 0.120
var_medias<- var(mean_var$media_col)
var_medias## [1] 0.2253678
sum_var<-sum(mean_var$var_col)
sum_var## [1] 0.2437277
co <- var_medias/sum_var
co## [1] 0.9246704
Conclusión: las repeticiones causan la variabilidad
DE_231121$dosis<- as.factor(DE_231121$dosis)
levels(DE_231121$dosis)## [1] "0" "5" "10" "15" "20"
anova <- aov(rto~dosis, data = DE_231121)
summary(anova)## Df Sum Sq Mean Sq F value Pr(>F)
## dosis 4 9.015 2.2537 46.23 2.2e-15 ***
## Residuals 45 2.194 0.0487
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
se rechaza la \(H_0\): por lo menos una media es diferente -> la variabilidad se debe a los tratamientos
pf(q = 46.23,df1 =4,df2 = 45 )## [1] 1
curve(df(x, 4, 45), xlim = c(0, 50), col= '#838B8B')
abline(v = qf(0.95,4,45), col='#76EE00', lty = 2)
abline(v = qf(1-2.2e-15,4,45), col='6', lty = 2)
text(4, 0.3, 'F0.05', col = "#76EE00",font = 2,cex = 0.7)
text(44, 0.3, 'Fvalue', col = "6",font = 2,cex = 0.7) \(F_{value}\) > \(F_{0.05}\) -> Rechazo \(H_0\)
dist_f(p = 0.05,deg.f1 = 4, deg.f2 = 45) #library(sjPlot)