#Solo un factor
#Sin razón de bloquear
xy = expand.grid(x=seq(0,5), y= seq(0,5))
plot(xy)
#Factor
genotipo = gl(n=6,k=6, length = 36,
labels= paste0('gen', 1:6))
genotipo
## [1] gen1 gen1 gen1 gen1 gen1 gen1 gen2 gen2 gen2 gen2 gen2 gen2 gen3 gen3 gen3
## [16] gen3 gen3 gen3 gen4 gen4 gen4 gen4 gen4 gen4 gen5 gen5 gen5 gen5 gen5 gen5
## [31] gen6 gen6 gen6 gen6 gen6 gen6
## Levels: gen1 gen2 gen3 gen4 gen5 gen6
#Variable respuesta
set.seed(123)
ps = c(
rnorm(12, 1200, 100),
rnorm(12, 1500, 80),
rnorm(12, 1420, 90)
)
aleat = sample(36)
datos = data.frame(xy[aleat,], genotipo, ps)
head(datos)
## x y genotipo ps
## 15 2 2 gen1 1143.952
## 26 1 4 gen1 1176.982
## 31 0 5 gen1 1355.871
## 16 3 2 gen1 1207.051
## 20 1 3 gen1 1212.929
## 30 5 4 gen1 1371.506
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.3
ggplot(datos)+
aes(x,y, fill=genotipo)+
geom_tile()
#Analisis descriptivo
ggplot(datos)+
aes(genotipo, ps)+
geom_boxplot()
ggplot(datos)+
aes(genotipo, ps)+
geom_violin()
###Analisis inferencial
Hipotesis \[H_0 : \mu_{g_1} = \mu_{g_2} = \mu_{g_2} = \mu_{g_3} = \mu_{g_4} = \mu_{g_5} = \mu_{g_6}\\ H_a : H_0\text{ es falsa}\] Modelo \[y_{ij} = \mu_i + \epsilon_{ij}\\ i=1,2,3,4,5,6\] \[y_{ij} = \text{Peso seco i-esimo genotipo y j-esima repetición}\] \[\mu_i = \text{La media de cada i-esimo genoripo}\] \[\epsilon_{ij} = \text{Residuales}\] Modelo en forma de efectos
\[y_{ij} = \mu + \tau_i + \epsilon_{ij}\] \(\mu\) media global \(\tau_i\) efecto de cada genotipo
Modelo en forma matricial
\[ t = x\beta + E\] \(x\) matriz del diseño 36 filas y 7 columnas (1 columna representa la media y 6 una por genotipo)
\(\beta\) vector de parametros ($ ;_1 ;_2;_3;_4;_5;_6 $)
Otra forma de plantear la hipotesis \[H_0 : \tau_1 = \tau_2 = \tau_3 = \tau_4 = \tau_5 = \tau_6 = 0\]
mod1 = aov(ps ~ genotipo, data=datos)
smod1 = summary(mod1)
pv1 = smod1[[1]][1,5]
ifelse(pv1 < 0.05,'Reschazo H0','NO reschazo H0')
## [1] "Reschazo H0"
Como el valor de f es de 12,22 lo que quiere decir que la variabilidad es causada por los genotipos es 14,22 veces más grande que la causada por el error
Interpretando el p-value: se rechaza la hipotesis nula, lo que sugiere que existen diferencias en al menos uno de los tratamientos en cuanto al peso seco. Los datos proporcionan evidencia en contra de la hipotesis nula (a favor de la alterna)
#Media global
mu = mean(datos$ps)
#Media por genotipo
mu_i = tapply(datos$ps,datos$genotipo, mean)
#Efecto genotipo
Tau_i = mu_i - mu
Tau_i
## gen1 gen2 gen3 gen4 gen5 gen6
## -134.97483 -185.56951 123.96002 82.81483 22.23648 91.53300
var_i = tapply(datos$ps,datos$genotipo, var)
boxplot( ps ~ genotipo, datos, ylim=c(1000, 1800), las=1)
points(1:6, mu_i, pch=16, col='purple')
abline(h=mu, lty=2, col= 'purple')
segments(1:6-.2, mu_i, 1:6-.2, mu, col = 'green', lwd = 2, lty = 2)
text(1:6, rep(1700, 6), round(Tau_i, 2))
text(1:6, rep(1000, 6), round(var_i, 2))
#Revision de residuales
\[H_0: \sigma^2_{g1} = \sigma^2_{g2} = \sigma^2_{g3} = \sigma^2_{g4} = \sigma^2_{g5} = \sigma^2_{g6} \]
\[H_0: \epsilon \sim n(0, \sigma^2_e) \]
hist(mod1$residuals)
tapply(mod1$residuals,datos$genotipo, var)
## gen1 gen2 gen3 gen4 gen5 gen6
## 9120.295 8180.614 9955.627 2776.509 10670.073 1696.089
# Igualdad de varianzas
bartlett.test(mod1$residuals, datos$genotipo)
##
## Bartlett test of homogeneity of variances
##
## data: mod1$residuals and datos$genotipo
## Bartlett's K-squared = 5.5895, df = 5, p-value = 0.3482
#Normalidad de residuos
shapiro.test(mod1$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod1$residuals
## W = 0.97311, p-value = 0.5164
Como el p-value en la prueba de igualdad de varianzas es mayor al 5% estadisticamente se consideran iguales
####Comparación de medias posterior al analisis de varianzas
#Prueba de máxima deiferencia Tukey
par(mar=c(5,6,3,1))
tt = TukeyHSD(mod1, 'genotipo')
plot(tt, las=1)
abline(v=0, lty=2, col='red', lwd=2)
library(agricolae)
## Warning: package 'agricolae' was built under R version 4.1.3
dt = duncan.test(mod1,'genotipo', console = T)
##
## Study: mod1 ~ "genotipo"
##
## Duncan's new multiple range test
## for ps
##
## Mean Square Error: 7066.534
##
## genotipo, means
##
## ps std r Min Max
## gen1 1244.715 95.50024 6 1143.952 1371.506
## gen2 1194.121 90.44675 6 1073.494 1322.408
## gen3 1503.650 99.77789 6 1342.671 1642.953
## gen4 1462.505 52.69259 6 1414.574 1556.108
## gen5 1401.927 103.29604 6 1268.198 1532.843
## gen6 1471.223 41.18360 6 1393.444 1500.561
##
## Alpha: 0.05 ; DF Error: 30
##
## Critical Range
## 2 3 4 5 6
## 99.11886 104.16376 107.43409 109.76839 111.53078
##
## Means with the same letter are not significantly different.
##
## ps groups
## gen3 1503.650 a
## gen6 1471.223 a
## gen4 1462.505 a
## gen5 1401.927 a
## gen1 1244.715 b
## gen2 1194.121 b
plot(dt)
#Factor
genotipo = gl(n=6, k=6, length =36,
labels = paste0('gen', 1:16))
#Variable respuesta
set.seed(123)
ps = c(rnorm(12, 1200, 120),
rnorm(12, 1500, 100),
rnorm(12, 1420, 250))
datos = data.frame(xy[aleat,],genotipo, ps)
head(datos)
## x y genotipo ps
## 15 2 2 gen1 1132.743
## 26 1 4 gen1 1172.379
## 31 0 5 gen1 1387.045
## 16 3 2 gen1 1208.461
## 20 1 3 gen1 1215.515
## 30 5 4 gen1 1405.808
ggplot(datos)+
aes(genotipo, ps)+
geom_boxplot()
mod1b = aov(ps ~ genotipo, datos)
smod1b= summary(mod1b)
shapiro.test(mod1b$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod1b$residuals
## W = 0.98349, p-value = 0.8558
bartlett.test(mod1b$residuals, datos$genotipo)
##
## Bartlett test of homogeneity of variances
##
## data: mod1b$residuals and datos$genotipo
## Bartlett's K-squared = 12.401, df = 5, p-value = 0.02969
Como el p-value es <5% se rechaza el supuesto de igual de varianzas, lo que quiere decir que se incumbe el supuesto lo cual complica la interpretación.
mod1c = oneway.test(ps ~ genotipo, datos)
mod1c
##
## One-way analysis of means (not assuming equal variances)
##
## data: ps and genotipo
## F = 8.6764, num df = 5.000, denom df = 13.702, p-value = 0.0006918
Cuando se incumble normalidad e igualdad de varianzas ?
Analisis de varianza no paremetrico para un diseño factorial simple en arreglo completamente al azar
\[H_0: R_1 = R_2 = R_3 = R_4 = R_5 = R_6\]
mod1d = kruskal.test(ps , genotipo)
mod1d
##
## Kruskal-Wallis rank sum test
##
## data: ps and genotipo
## Kruskal-Wallis chi-squared = 17.204, df = 5, p-value = 0.004128
Comparación de rango promedio posterior a kruskal-wallis
library(PMCMRplus)
## Warning: package 'PMCMRplus' was built under R version 4.1.3
kwAllPairsNemenyiTest(ps, genotipo)
##
## Pairwise comparisons using Tukey-Kramer-Nemenyi all-pairs test with Tukey-Dist approximation
## data: ps and genotipo
## gen1 gen2 gen3 gen4 gen5
## gen2 0.998 - - - -
## gen3 0.172 0.058 - - -
## gen4 0.443 0.205 0.995 - -
## gen5 0.807 0.533 0.883 0.993 -
## gen6 0.046 0.012 0.995 0.904 0.587
##
## P value adjustment method: single-step
## alternative hypothesis: two.sided
library(FSA)
## Warning: package 'FSA' was built under R version 4.1.3
## ## FSA v0.9.4. See citation('FSA') if used in publication.
## ## Run fishR() for related website and fishR('IFAR') for related book.
dunnTest(ps, genotipo)
## Dunn (1964) Kruskal-Wallis multiple comparison
## p-values adjusted with the Holm method.
## Comparison Z P.unadj P.adj
## 1 gen1 - gen2 0.4383973 0.6610983037 0.66109830
## 2 gen1 - gen3 -2.3563855 0.0184537565 0.22144508
## 3 gen2 - gen3 -2.7947828 0.0051934597 0.06751498
## 4 gen1 - gen4 -1.8357887 0.0663889146 0.66388915
## 5 gen2 - gen4 -2.2741860 0.0229548062 0.25250287
## 6 gen3 - gen4 0.5205968 0.6026476838 1.00000000
## 7 gen1 - gen5 -1.2603922 0.2075279011 1.00000000
## 8 gen2 - gen5 -1.6987895 0.0893588460 0.80422961
## 9 gen3 - gen5 1.0959932 0.2730817290 1.00000000
## 10 gen4 - gen5 0.5753965 0.5650232007 1.00000000
## 11 gen1 - gen6 -2.8769823 0.0040149814 0.05620974
## 12 gen2 - gen6 -3.3153796 0.0009151876 0.01372781
## 13 gen3 - gen6 -0.5205968 0.6026476838 1.00000000
## 14 gen4 - gen6 -1.0411936 0.2977857118 1.00000000
## 15 gen5 - gen6 -1.6165900 0.1059668029 0.84773442
rangos = rank(ps, ties.method = 'average')
rangos
## [1] 4 7 16 8 9 19 11 2 3 6 15 10 27 25 21 35 28 13 29 22 17 24 18 20 12
## [26] 1 32 23 5 36 26 14 34 33 31 30
boxplot(rangos ~ genotipo)
###Analisis de varianza permutacional
library(RVAideMemoire)
## Warning: package 'RVAideMemoire' was built under R version 4.1.3
## *** Package RVAideMemoire v 0.9-81-2 ***
##
## Attaching package: 'RVAideMemoire'
## The following object is masked from 'package:FSA':
##
## se
perm.anova(ps ~ genotipo, data = datos, nperm = 10000)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Permutation Analysis of Variance Table
##
## Response: ps
## 10000 permutations
## Sum Sq Df Mean Sq F value Pr(>F)
## genotipo 627712 5 125542 5.3717 0.0013 **
## Residuals 701126 30 23371
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1