Clase 13

Análisis de covarianza Análisis de varianza y análisis de regresión

Diseño Factorial completo en arreglo completamente al azar

Factor 1: Genotipo(criolla, pastusa, betania) Factor 2: Ambiente (2500 y 1700 msnm)

Covariable 1: Precipitación acumulada en 3 meses Covariable 2: Materia Orgánica

rto = Respuesta: Rendimiento kg/ha

set.seed(2022)
rto<-c(sort(rnorm(10,3,0.3)),
       sort(rnorm(10,2.5,0.3)),
       sort(rnorm(10,3.1,0.3)),
       sort(rnorm(10,2,0.3)),
       sort(rnorm(10,3.5,0.3)),
       sort(rnorm(10,2.7,0.3)))
rto=sort(rto)

mo<-c(sort(rnorm(10,1.5,0.3)),
       sort(rnorm(10,1,0.3)),
       sort(rnorm(10,0.8,0.3)),
       sort(rnorm(10,1.3,0.3)),
       sort(rnorm(10,0.9,0.3)),
       sort(rnorm(10,1.2,0.3)))
mo=sort(mo)

pcp<-c(rnorm(10,500,20),
       rnorm(10,450,20),
       rnorm(10,600,20),
       rnorm(10,450,20),
       rnorm(10,550,20),
       rnorm(10,610,20))

genotipo<- gl(3,20,60,c("criolla","pastusa","betania"))

localidad = gl(2,10,60,c("2500","1700"))

df = data.frame(genotipo,localidad,rto,mo,pcp)

Arbol colapsable

library(collapsibleTree)
collapsibleTree(df,hierarchy = c("localidad","genotipo","rto"))

Análisis descriptivo

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

df |> 
  ggplot(aes(y=rto,x=mo))+
  geom_point(size=4,color="orange")

df |> 
  ggplot(aes(y=rto,x=pcp))+
  geom_point(size=4,color="blue")

df |> 
  ggplot(aes(y=rto,x=genotipo,fill=genotipo))+
  geom_boxplot()

df |> 
  ggplot(aes(y=rto,x=localidad,fill=localidad))+
  geom_boxplot()

df |> 
  ggplot(aes(y=rto,x=genotipo,fill=genotipo))+
  facet_wrap(~localidad)+
  geom_boxplot()

Modelo de diseño \[ y_{ijk}=\mu+\tau_i+\theta_j+(\tau\theta)_{ij}+\delta(x_{ijk}-\bar{x})+\phi(z_{ijk}-\bar{z})+\epsilon_{ijk} \] \[ y_{ijk}: \text{Rendimiento} \\ \mu: \text{Promedio general de rendimiento} \\ \tau_{i}: \text{Efecto del i-esimo genotipo} ~i: 1\cdots 3\\ \theta_{j}: \text{Efecto del j-esimo ambiente j: 1, 2 }\\ (\tau\theta)_{ij}: \text{Efecto de la interacción Genotipo x ambiente}\\ \delta: \text{Pendiente entre respuesta y la covariable (MO)} \\ \phi: \text{Pendiente entre respuesta y la covariable (PCP)} \\ x_{ijk}: \text{Covariable MO}\\ \bar{x}: \text{Media de MO}\\ z_{ijk}: \text{Covariable PCP}\\ \bar{z}: \text{Media de PCP}\\ \epsilon_{ijk}: \text{Error} \] Hipótesis

\[ H_{01}: (\tau\theta)_{ij}=0 \\ H_{02}: \tau_1=\tau_2=\tau_3=0 \\ H_{03}: \theta_1=\theta_2= 0 \\ H_{04}: \delta=0 \\ H_{05}: \phi=0 \]

Análisis de Covarianza

mod1= aov(rto~mo+pcp+genotipo+localidad+genotipo*localidad,data = df)
summary(mod1)

##                    Df Sum Sq Mean Sq  F value   Pr(>F)    
## mo                  1 14.404  14.404 3030.216  < 2e-16 ***
## pcp                 1  0.001   0.001    0.142    0.708    
## genotipo            2  0.472   0.236   49.614 8.82e-13 ***
## localidad           1  0.085   0.085   17.835 9.72e-05 ***
## genotipo:localidad  2  0.014   0.007    1.474    0.238    
## Residuals          52  0.247   0.005                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Comparaciones de medidas

TukeyHSD(mod1,"genotipo")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = rto ~ mo + pcp + genotipo + localidad + genotipo * localidad, data = df)
## 
## $genotipo
##                       diff         lwr        upr     p adj
## pastusa-criolla 0.09288386  0.04028376 0.14548395 0.0002501
## betania-criolla 0.09838406  0.04578396 0.15098415 0.0001081
## betania-pastusa 0.00550020 -0.04709989 0.05810029 0.9655476

TukeyHSD(mod1,"localidad")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = rto ~ mo + pcp + genotipo + localidad + genotipo * localidad, data = df)
## 
## $localidad
##                 diff        lwr        upr     p adj
## 1700-2500 0.04872784 0.01300653 0.08444915 0.0084594

Revisión de supuestos

Normalidad de residuales

shapiro.test(mod1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod1$residuals
## W = 0.94692, p-value = 0.01119

library(nortest)
ad.test(mod1$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  mod1$residuals
## A = 1.067, p-value = 0.00778

lillie.test(mod1$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  mod1$residuals
## D = 0.12393, p-value = 0.02264

cvm.test(mod1$residuals)

## 
##  Cramer-von Mises normality test
## 
## data:  mod1$residuals
## W = 0.19377, p-value = 0.006161

hist(mod1$residuals)

Detectando atípicos

library(outliers)
grubbs.test(mod1$residuals)

## 
##  Grubbs test for one outlier
## 
## data:  mod1$residuals
## G.59 = 3.1276, U = 0.8314, p-value = 0.03358
## alternative hypothesis: lowest value -0.202435191246066 is an outlier

which.min(mod1$residuals)

## 59 
## 59

df[59,]

##    genotipo localidad      rto       mo     pcp
## 59  betania      1700 3.610452 2.166227 630.763

boxplot(df$rto[df$genotipo=="betania" & df$localidad=="1700"])

View(df[df$genotipo=="betania" & df$localidad=="1700",])

Volviendo a correr el análisis sin la covariable PCP

mod2= aov(rto~mo+genotipo+localidad+genotipo*localidad,data = df)
summary(mod2)

##                    Df Sum Sq Mean Sq  F value   Pr(>F)    
## mo                  1 14.404  14.404 3038.692  < 2e-16 ***
## genotipo            2  0.389   0.194   40.992 1.74e-11 ***
## localidad           1  0.158   0.158   33.270 4.23e-07 ***
## genotipo:localidad  2  0.021   0.010    2.191    0.122    
## Residuals          53  0.251   0.005                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

shapiro.test(mod2$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod2$residuals
## W = 0.93343, p-value = 0.002784

nortest::ad.test(mod2$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  mod2$residuals
## A = 1.3102, p-value = 0.001927

nortest::cvm.test(mod2$residuals)

## 
##  Cramer-von Mises normality test
## 
## data:  mod2$residuals
## W = 0.25149, p-value = 0.001174

grubbs.test(x = mod2$residuals)

## 
##  Grubbs test for one outlier
## 
## data:  mod2$residuals
## G.59 = 3.1891, U = 0.8247, p-value = 0.02612
## alternative hypothesis: lowest value -0.208099949457393 is an outlier

which.min(mod2$residuals)

## 59 
## 59

df$residuales2 = mod2$residuals
df

##    genotipo localidad      rto        mo      pcp   residuales2
## 1   criolla      2500 1.532324 0.1413272 493.5911 -0.1210368423
## 2   criolla      2500 1.754842 0.4776777 513.4605 -0.1428329505
## 3   criolla      2500 1.933844 0.4952273 474.6874  0.0234222173
## 4   criolla      2500 1.949384 0.5347057 489.6296  0.0102861353
## 5   criolla      2500 2.035334 0.5993105 522.4499  0.0493091808
## 6   criolla      2500 2.042638 0.6737764 493.6804  0.0025244217
## 7   criolla      2500 2.047094 0.7362404 464.8340 -0.0383917999
## 8   criolla      2500 2.129811 0.7426894 524.7451  0.0396413121
## 9   criolla      2500 2.205452 0.7856594 466.7210  0.0840700479
## 10  criolla      2500 2.214795 0.7862166 537.1147  0.0930082775
## 11  criolla      1700 2.249456 0.8346335 471.1599 -0.0910354158
## 12  criolla      1700 2.303769 0.9009712 450.7454 -0.0849075018
## 13  criolla      1700 2.323003 0.9219713 450.0921 -0.0809269230
## 14  criolla      1700 2.323899 0.9372221 411.1233 -0.0911086246
## 15  criolla      1700 2.444456 0.9573691 418.6075  0.0148143388
## 16  criolla      1700 2.475902 0.9589183 444.5341  0.0451344682
## 17  criolla      1700 2.484165 0.9603634 459.3771  0.0523478592
## 18  criolla      1700 2.527872 0.9792048 470.6508  0.0823698095
## 19  criolla      1700 2.563936 1.0199286 421.6321  0.0888530940
## 20  criolla      1700 2.566650 1.0572483 448.2424  0.0644588955
## 21  pastusa      2500 2.647996 1.0984673 631.5726 -0.0684732303
## 22  pastusa      2500 2.682223 1.1086423 604.8925 -0.0416370188
## 23  pastusa      2500 2.706955 1.1124076 629.9419 -0.0196402682
## 24  pastusa      2500 2.730754 1.1178782 618.3181  0.0001854644
## 25  pastusa      2500 2.756026 1.1228595 606.4168  0.0218384544
## 26  pastusa      2500 2.757714 1.1309607 611.2545  0.0176423231
## 27  pastusa      2500 2.792812 1.1587945 610.7859  0.0325228468
## 28  pastusa      2500 2.801856 1.1895224 578.5319  0.0192469214
## 29  pastusa      2500 2.805869 1.2031200 562.3368  0.0133828695
## 30  pastusa      2500 2.824778 1.2132540 618.2278  0.0249316376
## 31  pastusa      1700 2.842134 1.2141848 450.9919 -0.1003531817
## 32  pastusa      1700 2.890715 1.2155011 425.2869 -0.0527278354
## 33  pastusa      1700 2.900696 1.2269485 424.7250 -0.0510623244
## 34  pastusa      1700 2.944616 1.2498250 427.1427 -0.0237588968
## 35  pastusa      1700 2.995502 1.2661920 463.0882  0.0152390207
## 36  pastusa      1700 2.998751 1.2672569 436.4636  0.0177142329
## 37  pastusa      1700 3.028305 1.2723601 428.5218  0.0435616758
## 38  pastusa      1700 3.053359 1.2742559 430.4868  0.0672383730
## 39  pastusa      1700 3.070764 1.3054759 427.4745  0.0619657390
## 40  pastusa      1700 3.071501 1.3612607 410.6137  0.0221831969
## 41  betania      2500 3.072475 1.3754277 538.3602 -0.0794195724
## 42  betania      2500 3.083386 1.3781274 514.7499 -0.0704689089
## 43  betania      2500 3.108545 1.3913979 539.4435 -0.0549491849
## 44  betania      2500 3.162585 1.3929892 522.5134 -0.0020655762
## 45  betania      2500 3.198418 1.3985190 511.3257  0.0297506883
## 46  betania      2500 3.207957 1.4072777 569.5957  0.0329278780
## 47  betania      2500 3.209338 1.4098105 520.0862  0.0324694034
## 48  betania      2500 3.215095 1.4284846 584.0162  0.0246622563
## 49  betania      2500 3.224846 1.4401069 521.9795  0.0259706770
## 50  betania      2500 3.262105 1.4430088 540.3107  0.0611223395
## 51  betania      1700 3.270043 1.4584437 585.6764 -0.0343998977
## 52  betania      1700 3.295008 1.4926352 612.0954 -0.0342698919
## 53  betania      1700 3.419289 1.5155070 605.1493  0.0733973864
## 54  betania      1700 3.422159 1.5229256 581.6844  0.0708794363
## 55  betania      1700 3.434022 1.5469536 589.0108  0.0652886374
## 56  betania      1700 3.463453 1.6240780 615.3811  0.0386993393
## 57  betania      1700 3.518107 1.6268919 600.4257  0.0913094870
## 58  betania      1700 3.602083 1.8216163 604.0110  0.0338441878
## 59  betania      1700 3.610452 2.1662270 630.7630 -0.2080999495
## 60  betania      1700 3.742331 2.1943496 630.5785 -0.0966487352

Eliminando la fila 59 atípica en el residual

df[59,3]=NA

# Desbalanceado
mod3=lm(aov(rto~mo+genotipo+localidad+genotipo*localidad,data = df))
anova(mod3)

## Analysis of Variance Table
## 
## Response: rto
##                    Df  Sum Sq Mean Sq   F value    Pr(>F)    
## mo                  1 14.0139 14.0139 3847.0268 < 2.2e-16 ***
## genotipo            2  0.2336  0.1168   32.0629 8.471e-10 ***
## localidad           1  0.1079  0.1079   29.6200 1.439e-06 ***
## genotipo:localidad  2  0.0075  0.0038    1.0356    0.3622    
## Residuals          52  0.1894  0.0036                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

shapiro.test(mod3$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod3$residuals
## W = 0.92944, p-value = 0.002079

nortest::ad.test(mod3$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  mod3$residuals
## A = 1.2443, p-value = 0.002807

nortest::cvm.test(mod3$residuals)

## 
##  Cramer-von Mises normality test
## 
## data:  mod3$residuals
## W = 0.22592, p-value = 0.002419

nortest::lillie.test(mod3$residuals)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  mod3$residuals
## D = 0.15409, p-value = 0.001327

nortest::pearson.test(mod3$residuals)

## 
##  Pearson chi-square normality test
## 
## data:  mod3$residuals
## P = 13.525, p-value = 0.095

Imputando por la media

media=mean(df$rto[df$genotipo=="betania" & df$localidad=="1700"],na.rm = T)

df$rto[59]=media

#View(df[df$genotipo=="betania" & df$localidad=="1700",])

mod4=aov(rto~genotipo+localidad+genotipo*localidad,data = df)
summary(mod4)

##                    Df Sum Sq Mean Sq F value   Pr(>F)    
## genotipo            2 12.536   6.268 415.480  < 2e-16 ***
## localidad           1  1.533   1.533 101.641 5.12e-14 ***
## genotipo:localidad  2  0.121   0.060   3.996   0.0241 *  
## Residuals          54  0.815   0.015                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

shapiro.test(mod4$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod4$residuals
## W = 0.94997, p-value = 0.01552

plot(mod4$residuals)

ad.test(mod4$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  mod4$residuals
## A = 0.70546, p-value = 0.06222

df$interaccion=interaction(df$genotipo,df$localidad)
df |> 
  ggplot(aes(y=rto,x=mo,color=interaccion))+
  geom_point(size=4)

Gráfico de interacción

df |> 
  group_by(genotipo,localidad) |> 
  summarise(media=mean(rto)) |> 
  ggplot(aes(x=localidad,y=media,group=genotipo,color=genotipo))+
  geom_point(size=4)+
  geom_line()

## `summarise()` has grouped output by 'genotipo'. You can override using the
## `.groups` argument.

Clase 13

Cesar Rincón

2022-11-27