#Diseño 1: Factorial simple en arreglo completamemte al azar.
#Único factor
#Sin razón para bloquear
#Imaginado del experimento en el campo: Creamos un dataframe a partir de todas las combinaciones posibles de la variable FACTOR
xy = expand.grid(x = seq(0,5), y = seq(0,5))
plot(xy, pch = 15, cex = 3, asp = 1)
#FACTOR
genotipo = gl(n = 6, k = 6, length = 36,
labels= paste0("gen", 1:6))
#VARIABLE RESPUESTA
set.seed(123)
PS= c(
rnorm(12, 1200, 100),
rnorm(12, 1500, 80),
rnorm(12, 1420, 90)
)
aleat=sample(36)
datos= data.frame(xy[aleat,], genotipo, PS)
head(datos)
## x y genotipo PS
## 15 2 2 gen1 1143.952
## 26 1 4 gen1 1176.982
## 31 0 5 gen1 1355.871
## 16 3 2 gen1 1207.051
## 20 1 3 gen1 1212.929
## 30 5 4 gen1 1371.506
library(ggplot2)
ggplot(datos)+
aes(x,y, fill = genotipo)+
geom_tile()
#Análisis descriptivo
ggplot(datos)+
aes(genotipo, PS)+
geom_boxplot()
ggplot(datos)+
aes(genotipo, PS)+
geom_violin()
#Análisis inferencial
\[H_0: \mu_{g_1}= \mu_{g_2}= \mu_{g_3}= \mu_{g_4}= \mu_{g_5}= \mu_{g_6}\\ H_a: H_0\text{ es falsa}\]
Modelo
\[y_{i j} = \mu_i + \epsilon_{ij}\\ i=1,2,3,4,5,6~; j=1,2,3,4,5,6\]
\[y-{ij} = \text{Peso seco i-esimo genotipo y j-isema repetición}\] \[\mu:_i=\text{ La media de cada i_esimo genotipo}\] \[\epsilon_{i} = \text{residuals}\]
Modelo en forma de efectos
\[y_{ij} = \mu + \tau_i + \epsilon{ij} \] \(\mu\) media global \(\tau_i\) efecto de cada genotipo
modelo en forma matricial
\[Y = X\beta + E \] \(X\) matriz del diseño 36 filas y 7 columnas (1 columna representa la media y las 6 restantes una por genotipo)
\(\beta\) vector de parametros (\[{\mu; \tau_1; \tau_2; \tau_3; \tau_4; \tau_5; \tau_6}\])
\[H_0:\tau_1=\tau_2= \tau_3= \tau_4= \tau_5= \tau_6=0 \]
mod1 = aov(PS ~ genotipo, data =datos)
smod1 = summary(mod1)
pv1 = smod1[[1]][1,5]
ifelse(pv1 < 0.05, 'Rechazo Ho', 'No rechazo Ho')
## [1] "Rechazo Ho"
#Como el valor de F es 14.22, la variabilidad causada por los genotipos es 14.22 veces más grande que la causada por el error.
#p-value: Se rechaza la hipótesis nula, lo que sugiere que existen diferencias en al menos uno de los tratamientos en cuánto al peso seco.
#ESTIMANDO LOS EFECTOS
#Media global
mu = mean(datos$PS)
#Media por genotipo
mu_i = tapply(datos$PS, datos$genotipo, mean)
#Efecto por genotipo
tau_i = mu_i - mu
tau_i
## gen1 gen2 gen3 gen4 gen5 gen6
## -134.97483 -185.56951 123.96002 82.81483 22.23648 91.53300
boxplot(PS ~ genotipo, datos, ylim=c(1000, 1800), las=1)
points(1:6, mu_i, pch=16, col='red')
abline(h = mu, lty=2, col='red')
segments(1:6-.2, mu_i, 1:6-.2, mu, col='blue', lwd=2, lty=2)
text(1:6, rep(1700,6), round(tau_i, 2))
text(1:6, rep(1000,6), round(tau_i, 2))
#REVISIÓN DE SUPUESTOS \[H_O:
\sigma^2_{g1}=\sigma^2_{g2}=\sigma^2_{g3}=\sigma^2_{g4}=\sigma^2_{g5}=\sigma^2_{g6}\]
\[H_O:\epsilon \sim N(0, \sigma^2_e)\]
#VARIANZA:
hist(mod1$residuals)
var_res = tapply(mod1$residuals, datos$genotipo, var)
# Igualdad de varianzas
bartlett.test(mod1$residuals, datos$genotipo)
##
## Bartlett test of homogeneity of variances
##
## data: mod1$residuals and datos$genotipo
## Bartlett's K-squared = 5.5895, df = 5, p-value = 0.3482
#Normalidad de residuos
shapiro.test(mod1$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod1$residuals
## W = 0.97311, p-value = 0.5164
#Como el p-value en la prueba de igualdad de varianzas es mayor al 5% estadisticamente se pueden considerar iguales. Como el p-value en la prueba de normalidad es 51.64% (> 5%) se considera que los residuales siguen una distribución normal
#COMPARACIÓN DE MEDIAS POSTERIOR AL ANÁLISIS DE VARIANZA
#Prueba de máxima diferencia de Tukeypar(mar=c(4, 6, 3, 1))
tt = TukeyHSD(mod1, 'genotipo')
plot(tt, las=1)
abline(v=0, lty=2, col='red',lwd=2)
tt
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = PS ~ genotipo, data = datos)
##
## $genotipo
## diff lwr upr p adj
## gen2-gen1 -50.594675 -198.214230 97.02488 0.8995877
## gen3-gen1 258.934855 111.315300 406.55441 0.0001225
## gen4-gen1 217.789664 70.170110 365.40922 0.0012678
## gen5-gen1 157.211312 9.591757 304.83087 0.0316170
## gen6-gen1 226.507827 78.888272 374.12738 0.0007764
## gen3-gen2 309.529530 161.909976 457.14908 0.0000068
## gen4-gen2 268.384340 120.764785 416.00389 0.0000713
## gen5-gen2 207.805987 60.186433 355.42554 0.0022109
## gen6-gen2 277.102502 129.482947 424.72206 0.0000433
## gen4-gen3 -41.145191 -188.764745 106.47436 0.9557570
## gen5-gen3 -101.723543 -249.343098 45.89601 0.3163216
## gen6-gen3 -32.427028 -180.046583 115.19253 0.9841426
## gen5-gen4 -60.578352 -208.197907 87.04120 0.8096942
## gen6-gen4 8.718162 -138.901392 156.33772 0.9999711
## gen6-gen5 69.296515 -78.323040 216.91607 0.7103650
library(agricolae)
dt = duncan.test(mod1, 'genotipo', console = T)
##
## Study: mod1 ~ "genotipo"
##
## Duncan's new multiple range test
## for PS
##
## Mean Square Error: 7066.534
##
## genotipo, means
##
## PS std r Min Max
## gen1 1244.715 95.50024 6 1143.952 1371.506
## gen2 1194.121 90.44675 6 1073.494 1322.408
## gen3 1503.650 99.77789 6 1342.671 1642.953
## gen4 1462.505 52.69259 6 1414.574 1556.108
## gen5 1401.927 103.29604 6 1268.198 1532.843
## gen6 1471.223 41.18360 6 1393.444 1500.561
##
## Alpha: 0.05 ; DF Error: 30
##
## Critical Range
## 2 3 4 5 6
## 99.11886 104.16376 107.43409 109.76839 111.53078
##
## Means with the same letter are not significantly different.
##
## PS groups
## gen3 1503.650 a
## gen6 1471.223 a
## gen4 1462.505 a
## gen5 1401.927 a
## gen1 1244.715 b
## gen2 1194.121 b
plot(dt)
#DISEÑO NÚMERO 1. INCUMPLIENDO SUPUESTOS.
# Único factor
# Sin razón de bloquear
xy = expand.grid(x = seq(0,5), y = seq(0,5))
plot(xy, pch = 15, cex = 3, asp = 1)
genotipo = gl(n = 6, k = 6, length = 36,
labels = paste0('gen', 1:6))
genotipo
## [1] gen1 gen1 gen1 gen1 gen1 gen1 gen2 gen2 gen2 gen2 gen2 gen2 gen3 gen3 gen3
## [16] gen3 gen3 gen3 gen4 gen4 gen4 gen4 gen4 gen4 gen5 gen5 gen5 gen5 gen5 gen5
## [31] gen6 gen6 gen6 gen6 gen6 gen6
## Levels: gen1 gen2 gen3 gen4 gen5 gen6
#Variable respuesta
set.seed(123)
PS = c(
rnorm(12, 1200, 120),
rnorm(12, 1500, 100),
rnorm(12, 1420, 250)
)
datos = data.frame(xy[aleat,], genotipo, PS)
head(datos)
## x y genotipo PS
## 15 2 2 gen1 1132.743
## 26 1 4 gen1 1172.379
## 31 0 5 gen1 1387.045
## 16 3 2 gen1 1208.461
## 20 1 3 gen1 1215.515
## 30 5 4 gen1 1405.808
ggplot(datos)+
aes(genotipo, PS)+
geom_boxplot()
mod1b = aov(PS~genotipo, datos)
smod1b = summary(mod1b)
smod1b
## Df Sum Sq Mean Sq F value Pr(>F)
## genotipo 5 627712 125542 5.372 0.00121 **
## Residuals 30 701126 23371
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(mod1b$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod1b$residuals
## W = 0.98349, p-value = 0.8558
bartlett.test(mod1b$residuals, datos$genotipo)
##
## Bartlett test of homogeneity of variances
##
## data: mod1b$residuals and datos$genotipo
## Bartlett's K-squared = 12.401, df = 5, p-value = 0.02969
Se rechaza la hipótesis de igualdad de varianzas, se incumple el supuesto y esto complica la interpretación
#ANÁLISIS DE VARIANZA PARA UN DISEÑO FACTORIAL SIMPLE EN ARREGLO COMPLETAMENTE AL AZAR, EN PRESENCIA DE HETEROCEDASTICIDAD
mod1c = oneway.test(PS~genotipo, datos)
mod1c
##
## One-way analysis of means (not assuming equal variances)
##
## data: PS and genotipo
## F = 8.6764, num df = 5.000, denom df = 13.702, p-value = 0.0006918
Cuando se incumple normalidad e igualdad de varianzas, entonces…
Análisis de varianza no parametrico para un diseño en arreglo factorial simple en arreglo completamente al azar
#Prueba de Kruskal-Walles –> Cuando no se cumplen dos supuestos. Solo es válido para este experimento
\[H_0: R_1=R_2=R_3=R_4=R_5=R_6\]
mod1d = kruskal.test(PS, genotipo)
mod1d
##
## Kruskal-Wallis rank sum test
##
## data: PS and genotipo
## Kruskal-Wallis chi-squared = 17.204, df = 5, p-value = 0.004128
comparacion de rangos promedios posterior a kruskal. wallis
#library(PMCMR)
#posthoc.kruskal.nemeyi.test(PS, genotipo)
library(PMCMRplus)
library(FSA)
## ## FSA v0.9.4. See citation('FSA') if used in publication.
## ## Run fishR() for related website and fishR('IFAR') for related book.
dunnTest(PS, genotipo)
## Dunn (1964) Kruskal-Wallis multiple comparison
## p-values adjusted with the Holm method.
## Comparison Z P.unadj P.adj
## 1 gen1 - gen2 0.4383973 0.6610983037 0.66109830
## 2 gen1 - gen3 -2.3563855 0.0184537565 0.22144508
## 3 gen2 - gen3 -2.7947828 0.0051934597 0.06751498
## 4 gen1 - gen4 -1.8357887 0.0663889146 0.66388915
## 5 gen2 - gen4 -2.2741860 0.0229548062 0.25250287
## 6 gen3 - gen4 0.5205968 0.6026476838 1.00000000
## 7 gen1 - gen5 -1.2603922 0.2075279011 1.00000000
## 8 gen2 - gen5 -1.6987895 0.0893588460 0.80422961
## 9 gen3 - gen5 1.0959932 0.2730817290 1.00000000
## 10 gen4 - gen5 0.5753965 0.5650232007 1.00000000
## 11 gen1 - gen6 -2.8769823 0.0040149814 0.05620974
## 12 gen2 - gen6 -3.3153796 0.0009151876 0.01372781
## 13 gen3 - gen6 -0.5205968 0.6026476838 1.00000000
## 14 gen4 - gen6 -1.0411936 0.2977857118 1.00000000
## 15 gen5 - gen6 -1.6165900 0.1059668029 0.84773442
rangos = rank(PS, ties.method = "average")
rangos
## [1] 4 7 16 8 9 19 11 2 3 6 15 10 27 25 21 35 28 13 29 22 17 24 18 20 12
## [26] 1 32 23 5 36 26 14 34 33 31 30
boxplot(rangos ~ genotipo)
library(RVAideMemoire)
## *** Package RVAideMemoire v 0.9-81-2 ***
##
## Attaching package: 'RVAideMemoire'
## The following object is masked from 'package:FSA':
##
## se
perm.anova(PS ~ genotipo, data = datos, nperm =10000)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Permutation Analysis of Variance Table
##
## Response: PS
## 10000 permutations
## Sum Sq Df Mean Sq F value Pr(>F)
## genotipo 627712 5 125542 5.3717 0.0013 **
## Residuals 701126 30 23371
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1