Análisis de covarianza Análisis de varianza y análisis de regresión
Diseño Factorial completo en arreglo completamente al azar
Factor 1: Genotipo(criolla, pastusa, betania) Factor 2: Ambiente (2500 y 1700 msnm)
Covariable 1: Precipitación acumulada en 3 meses Covariable 2: Materia Orgánica
rto = Respuesta: Rendimiento kg/ha
set.seed(2022)
rto<-c(sort(rnorm(10,3,0.3)),
sort(rnorm(10,2.5,0.3)),
sort(rnorm(10,3.1,0.3)),
sort(rnorm(10,2,0.3)),
sort(rnorm(10,3.5,0.3)),
sort(rnorm(10,2.7,0.3)))
rto=sort(rto)
mo<-c(sort(rnorm(10,1.5,0.3)),
sort(rnorm(10,1,0.3)),
sort(rnorm(10,0.8,0.3)),
sort(rnorm(10,1.3,0.3)),
sort(rnorm(10,0.9,0.3)),
sort(rnorm(10,1.2,0.3)))
mo=sort(mo)
pcp<-c(rnorm(10,500,20),
rnorm(10,450,20),
rnorm(10,600,20),
rnorm(10,450,20),
rnorm(10,550,20),
rnorm(10,610,20))
genotipo<- gl(3,20,60,c("criolla","pastusa","betania"))
localidad = gl(2,10,60,c("2500","1700"))
df = data.frame(genotipo,localidad,rto,mo,pcp)
Arbol colapsable
library(collapsibleTree)
collapsibleTree(df,hierarchy = c("localidad","genotipo","rto"))
Análisis descriptivo
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df |>
ggplot(aes(y=rto,x=mo))+
geom_point(size=4,color="orange")
df |>
ggplot(aes(y=rto,x=pcp))+
geom_point(size=4,color="blue")
df |>
ggplot(aes(y=rto,x=genotipo,fill=genotipo))+
geom_boxplot()
df |>
ggplot(aes(y=rto,x=localidad,fill=localidad))+
geom_boxplot()
df |>
ggplot(aes(y=rto,x=genotipo,fill=genotipo))+
facet_wrap(~localidad)+
geom_boxplot()
Modelo de diseño \[
y_{ijk}=\mu+\tau_i+\theta_j+(\tau\theta)_{ij}+\delta(x_{ijk}-\bar{x})+\phi(z_{ijk}-\bar{z})+\epsilon_{ijk}
\] \[
y_{ijk}: \text{Rendimiento} \\
\mu: \text{Promedio general de rendimiento} \\
\tau_{i}: \text{Efecto del i-esimo genotipo} ~i: 1\cdots 3\\
\theta_{j}: \text{Efecto del j-esimo ambiente j: 1, 2 }\\
(\tau\theta)_{ij}: \text{Efecto de la interacción Genotipo x ambiente}\\
\delta: \text{Pendiente entre respuesta y la covariable (MO)} \\
\phi: \text{Pendiente entre respuesta y la covariable (PCP)} \\
x_{ijk}: \text{Covariable MO}\\
\bar{x}: \text{Media de MO}\\
z_{ijk}: \text{Covariable PCP}\\
\bar{z}: \text{Media de PCP}\\
\epsilon_{ijk}: \text{Error}
\] Hipótesis
\[ H_{01}: (\tau\theta)_{ij}=0 \\ H_{02}: \tau_1=\tau_2=\tau_3=0 \\ H_{03}: \theta_1=\theta_2= 0 \\ H_{04}: \delta=0 \\ H_{05}: \phi=0 \]
Análisis de Covarianza
mod1= aov(rto~mo+pcp+genotipo+localidad+genotipo*localidad,data = df)
summary(mod1)
## Df Sum Sq Mean Sq F value Pr(>F)
## mo 1 14.404 14.404 3030.216 < 2e-16 ***
## pcp 1 0.001 0.001 0.142 0.708
## genotipo 2 0.472 0.236 49.614 8.82e-13 ***
## localidad 1 0.085 0.085 17.835 9.72e-05 ***
## genotipo:localidad 2 0.014 0.007 1.474 0.238
## Residuals 52 0.247 0.005
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Comparaciones de medidas
TukeyHSD(mod1,"genotipo")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = rto ~ mo + pcp + genotipo + localidad + genotipo * localidad, data = df)
##
## $genotipo
## diff lwr upr p adj
## pastusa-criolla 0.09288386 0.04028376 0.14548395 0.0002501
## betania-criolla 0.09838406 0.04578396 0.15098415 0.0001081
## betania-pastusa 0.00550020 -0.04709989 0.05810029 0.9655476
TukeyHSD(mod1,"localidad")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = rto ~ mo + pcp + genotipo + localidad + genotipo * localidad, data = df)
##
## $localidad
## diff lwr upr p adj
## 1700-2500 0.04872784 0.01300653 0.08444915 0.0084594
Revisión de supuestos
Normalidad de residuales
shapiro.test(mod1$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod1$residuals
## W = 0.94692, p-value = 0.01119
library(nortest)
ad.test(mod1$residuals)
##
## Anderson-Darling normality test
##
## data: mod1$residuals
## A = 1.067, p-value = 0.00778
lillie.test(mod1$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: mod1$residuals
## D = 0.12393, p-value = 0.02264
cvm.test(mod1$residuals)
##
## Cramer-von Mises normality test
##
## data: mod1$residuals
## W = 0.19377, p-value = 0.006161
hist(mod1$residuals)
Detectando atípicos
library(outliers)
grubbs.test(mod1$residuals)
##
## Grubbs test for one outlier
##
## data: mod1$residuals
## G.59 = 3.1276, U = 0.8314, p-value = 0.03358
## alternative hypothesis: lowest value -0.202435191246066 is an outlier
which.min(mod1$residuals)
## 59
## 59
df[59,]
## genotipo localidad rto mo pcp
## 59 betania 1700 3.610452 2.166227 630.763
boxplot(df$rto[df$genotipo=="betania" & df$localidad=="1700"])
View(df[df$genotipo=="betania" & df$localidad=="1700",])
Volviendo a correr el análisis sin la covariable PCP
mod2= aov(rto~mo+genotipo+localidad+genotipo*localidad,data = df)
summary(mod2)
## Df Sum Sq Mean Sq F value Pr(>F)
## mo 1 14.404 14.404 3038.692 < 2e-16 ***
## genotipo 2 0.389 0.194 40.992 1.74e-11 ***
## localidad 1 0.158 0.158 33.270 4.23e-07 ***
## genotipo:localidad 2 0.021 0.010 2.191 0.122
## Residuals 53 0.251 0.005
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(mod2$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod2$residuals
## W = 0.93343, p-value = 0.002784
nortest::ad.test(mod2$residuals)
##
## Anderson-Darling normality test
##
## data: mod2$residuals
## A = 1.3102, p-value = 0.001927
nortest::cvm.test(mod2$residuals)
##
## Cramer-von Mises normality test
##
## data: mod2$residuals
## W = 0.25149, p-value = 0.001174
grubbs.test(x = mod2$residuals)
##
## Grubbs test for one outlier
##
## data: mod2$residuals
## G.59 = 3.1891, U = 0.8247, p-value = 0.02612
## alternative hypothesis: lowest value -0.208099949457393 is an outlier
which.min(mod2$residuals)
## 59
## 59
df$residuales2 = mod2$residuals
df
## genotipo localidad rto mo pcp residuales2
## 1 criolla 2500 1.532324 0.1413272 493.5911 -0.1210368423
## 2 criolla 2500 1.754842 0.4776777 513.4605 -0.1428329505
## 3 criolla 2500 1.933844 0.4952273 474.6874 0.0234222173
## 4 criolla 2500 1.949384 0.5347057 489.6296 0.0102861353
## 5 criolla 2500 2.035334 0.5993105 522.4499 0.0493091808
## 6 criolla 2500 2.042638 0.6737764 493.6804 0.0025244217
## 7 criolla 2500 2.047094 0.7362404 464.8340 -0.0383917999
## 8 criolla 2500 2.129811 0.7426894 524.7451 0.0396413121
## 9 criolla 2500 2.205452 0.7856594 466.7210 0.0840700479
## 10 criolla 2500 2.214795 0.7862166 537.1147 0.0930082775
## 11 criolla 1700 2.249456 0.8346335 471.1599 -0.0910354158
## 12 criolla 1700 2.303769 0.9009712 450.7454 -0.0849075018
## 13 criolla 1700 2.323003 0.9219713 450.0921 -0.0809269230
## 14 criolla 1700 2.323899 0.9372221 411.1233 -0.0911086246
## 15 criolla 1700 2.444456 0.9573691 418.6075 0.0148143388
## 16 criolla 1700 2.475902 0.9589183 444.5341 0.0451344682
## 17 criolla 1700 2.484165 0.9603634 459.3771 0.0523478592
## 18 criolla 1700 2.527872 0.9792048 470.6508 0.0823698095
## 19 criolla 1700 2.563936 1.0199286 421.6321 0.0888530940
## 20 criolla 1700 2.566650 1.0572483 448.2424 0.0644588955
## 21 pastusa 2500 2.647996 1.0984673 631.5726 -0.0684732303
## 22 pastusa 2500 2.682223 1.1086423 604.8925 -0.0416370188
## 23 pastusa 2500 2.706955 1.1124076 629.9419 -0.0196402682
## 24 pastusa 2500 2.730754 1.1178782 618.3181 0.0001854644
## 25 pastusa 2500 2.756026 1.1228595 606.4168 0.0218384544
## 26 pastusa 2500 2.757714 1.1309607 611.2545 0.0176423231
## 27 pastusa 2500 2.792812 1.1587945 610.7859 0.0325228468
## 28 pastusa 2500 2.801856 1.1895224 578.5319 0.0192469214
## 29 pastusa 2500 2.805869 1.2031200 562.3368 0.0133828695
## 30 pastusa 2500 2.824778 1.2132540 618.2278 0.0249316376
## 31 pastusa 1700 2.842134 1.2141848 450.9919 -0.1003531817
## 32 pastusa 1700 2.890715 1.2155011 425.2869 -0.0527278354
## 33 pastusa 1700 2.900696 1.2269485 424.7250 -0.0510623244
## 34 pastusa 1700 2.944616 1.2498250 427.1427 -0.0237588968
## 35 pastusa 1700 2.995502 1.2661920 463.0882 0.0152390207
## 36 pastusa 1700 2.998751 1.2672569 436.4636 0.0177142329
## 37 pastusa 1700 3.028305 1.2723601 428.5218 0.0435616758
## 38 pastusa 1700 3.053359 1.2742559 430.4868 0.0672383730
## 39 pastusa 1700 3.070764 1.3054759 427.4745 0.0619657390
## 40 pastusa 1700 3.071501 1.3612607 410.6137 0.0221831969
## 41 betania 2500 3.072475 1.3754277 538.3602 -0.0794195724
## 42 betania 2500 3.083386 1.3781274 514.7499 -0.0704689089
## 43 betania 2500 3.108545 1.3913979 539.4435 -0.0549491849
## 44 betania 2500 3.162585 1.3929892 522.5134 -0.0020655762
## 45 betania 2500 3.198418 1.3985190 511.3257 0.0297506883
## 46 betania 2500 3.207957 1.4072777 569.5957 0.0329278780
## 47 betania 2500 3.209338 1.4098105 520.0862 0.0324694034
## 48 betania 2500 3.215095 1.4284846 584.0162 0.0246622563
## 49 betania 2500 3.224846 1.4401069 521.9795 0.0259706770
## 50 betania 2500 3.262105 1.4430088 540.3107 0.0611223395
## 51 betania 1700 3.270043 1.4584437 585.6764 -0.0343998977
## 52 betania 1700 3.295008 1.4926352 612.0954 -0.0342698919
## 53 betania 1700 3.419289 1.5155070 605.1493 0.0733973864
## 54 betania 1700 3.422159 1.5229256 581.6844 0.0708794363
## 55 betania 1700 3.434022 1.5469536 589.0108 0.0652886374
## 56 betania 1700 3.463453 1.6240780 615.3811 0.0386993393
## 57 betania 1700 3.518107 1.6268919 600.4257 0.0913094870
## 58 betania 1700 3.602083 1.8216163 604.0110 0.0338441878
## 59 betania 1700 3.610452 2.1662270 630.7630 -0.2080999495
## 60 betania 1700 3.742331 2.1943496 630.5785 -0.0966487352
Eliminando la fila 59 atípica en el residual
df[59,3]=NA
# Desbalanceado
mod3=lm(aov(rto~mo+genotipo+localidad+genotipo*localidad,data = df))
anova(mod3)
## Analysis of Variance Table
##
## Response: rto
## Df Sum Sq Mean Sq F value Pr(>F)
## mo 1 14.0139 14.0139 3847.0268 < 2.2e-16 ***
## genotipo 2 0.2336 0.1168 32.0629 8.471e-10 ***
## localidad 1 0.1079 0.1079 29.6200 1.439e-06 ***
## genotipo:localidad 2 0.0075 0.0038 1.0356 0.3622
## Residuals 52 0.1894 0.0036
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(mod3$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod3$residuals
## W = 0.92944, p-value = 0.002079
nortest::ad.test(mod3$residuals)
##
## Anderson-Darling normality test
##
## data: mod3$residuals
## A = 1.2443, p-value = 0.002807
nortest::cvm.test(mod3$residuals)
##
## Cramer-von Mises normality test
##
## data: mod3$residuals
## W = 0.22592, p-value = 0.002419
nortest::lillie.test(mod3$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: mod3$residuals
## D = 0.15409, p-value = 0.001327
nortest::pearson.test(mod3$residuals)
##
## Pearson chi-square normality test
##
## data: mod3$residuals
## P = 13.525, p-value = 0.095
Imputando por la media
media=mean(df$rto[df$genotipo=="betania" & df$localidad=="1700"],na.rm = T)
df$rto[59]=media
#View(df[df$genotipo=="betania" & df$localidad=="1700",])
mod4=aov(rto~genotipo+localidad+genotipo*localidad,data = df)
summary(mod4)
## Df Sum Sq Mean Sq F value Pr(>F)
## genotipo 2 12.536 6.268 415.480 < 2e-16 ***
## localidad 1 1.533 1.533 101.641 5.12e-14 ***
## genotipo:localidad 2 0.121 0.060 3.996 0.0241 *
## Residuals 54 0.815 0.015
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(mod4$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod4$residuals
## W = 0.94997, p-value = 0.01552
plot(mod4$residuals)
ad.test(mod4$residuals)
##
## Anderson-Darling normality test
##
## data: mod4$residuals
## A = 0.70546, p-value = 0.06222
df$interaccion=interaction(df$genotipo,df$localidad)
df |>
ggplot(aes(y=rto,x=mo,color=interaccion))+
geom_point(size=4)
Gráfico de interacción
df |>
group_by(genotipo,localidad) |>
summarise(media=mean(rto)) |>
ggplot(aes(x=localidad,y=media,group=genotipo,color=genotipo))+
geom_point(size=4)+
geom_line()
## `summarise()` has grouped output by 'genotipo'. You can override using the
## `.groups` argument.