18/04/2023

#Diseño 1 - Factorial simple en arreglo completamente al azar (un sólo factor)

# Único factor
# Sin razón de bloquear

xy = expand.grid(x = seq(0,5), y = seq(0,5))
plot(xy, pch = 15, cex = 3, asp = 1)

genotipo = gl(n = 6, k = 6, length = 36,
              labels = paste0('gen', 1:6))
genotipo

##  [1] gen1 gen1 gen1 gen1 gen1 gen1 gen2 gen2 gen2 gen2 gen2 gen2 gen3 gen3 gen3
## [16] gen3 gen3 gen3 gen4 gen4 gen4 gen4 gen4 gen4 gen5 gen5 gen5 gen5 gen5 gen5
## [31] gen6 gen6 gen6 gen6 gen6 gen6
## Levels: gen1 gen2 gen3 gen4 gen5 gen6

#Variable respuesta
set.seed(123)
PS = c(
  rnorm(12, 1200, 100),
  rnorm(12, 1500, 80),
  rnorm(12, 1420, 90)
)
aleat = sample(36)
datos = data.frame(xy[aleat,], genotipo, PS)
head(datos)

##    x y genotipo       PS
## 15 2 2     gen1 1143.952
## 26 1 4     gen1 1176.982
## 31 0 5     gen1 1355.871
## 16 3 2     gen1 1207.051
## 20 1 3     gen1 1212.929
## 30 5 4     gen1 1371.506

library(ggplot2)

ggplot(datos)+
  aes(x,y, fill=genotipo)+
  geom_tile()

#Análisis descriptivo
ggplot(datos)+
  aes(genotipo, PS)+
  geom_boxplot()

ggplot(datos)+
  aes(genotipo, PS)+
  geom_violin()

#Análisis inferencial #Hipótesis

\[H_0:\mu_{g_1}=\mu_{g_2}=\mu_{g_3}=\mu_{g_4}=\mu_{g_5}=\mu_{g_6}\\H_a: H_0\text{ es falsa}\] #Modelo

\[y_{ij} = \mu_i + \epsilon_{ij}\\ i = 1,2,3,4,5,6~;j=1,2,3,4,5,6\]

\[y_{ij} = \text{Peso seco i-ésimo genotipo y j-ésima repetición}\] \[\mu_i = \text{La media de cada i-ésimo genotipo}\] Modelo en forma de efectos \[\epsilon_{ij} = \mu + \tau_i + \epsilon_{ij}\] \(\mu\) Media global \(\tau_i\) Efecto de cada genotipo

Modelo en forma matricial

\[y = x\beta + E\]

\(x\) Matriz del diseño 36 filas y 7 columnas (1 columna representa la media y 6 una por genotipo)

\(\beta\) vector de parámetros (\(\mu; \tau_1; \tau_2;\tau_3;\tau_4; \tau_5;\tau_6\))

Otra forma de plantear la hipótesis

\[H_0: \tau_1=\tau_2=\tau_3=\tau_4=\tau_5=\tau_6=0\]

mod1 = aov(PS ~ genotipo, data =datos)
smod1 = summary(mod1)
pv1 = smod1[[1]][1,5]

ifelse(pv1 < 0.05, 'Rechazo Ho', 'No rechazo Ho')

## [1] "Rechazo Ho"

Como el valor de F es 14.22, esto quiere decir que la variabilidad causada por los genotipos es 14.22 veces más grande que la causada por el error.

Interpretando el p-value: Se rechaza la hipótesis nula, lo que sugiere que existen diferencias en al menos uno de los tratamiento en cuánto al peso seco. Los datos proporcionan evidencia en contra de la hipótesis nula (A favor de la alterna)

#Estimando los efectos

#Media global 
mu = mean(datos$PS)
#Media por genotipo  
mu_i = tapply(datos$PS, datos$genotipo, mean)
#Efecto por genotipo
tau_i = mu_i - mu
tau_i

##       gen1       gen2       gen3       gen4       gen5       gen6 
## -134.97483 -185.56951  123.96002   82.81483   22.23648   91.53300

boxplot(PS ~ genotipo, datos, ylim=c(1000, 1800), las=1)
points(1:6, mu_i, pch=16, col='red')
abline(h = mu, lty=2, col='red')
segments(1:6-.2, mu_i, 1:6-.2, mu, col='blue', lwd=2, lty=2)
text(1:6, rep(1700,6), round(tau_i, 2))
text(1:6, rep(1000,6), round(tau_i, 2))

#Revisión de supuestos #Varianza \[H_O: \sigma^2_{g1}=\sigma^2_{g2}=\sigma^2_{g3}=\sigma^2_{g4}=\sigma^2_{g5}=\sigma^2_{g6}\]

hist(mod1$residuals)

var_res = tapply(mod1$residuals, datos$genotipo, var)

#Varianza
bartlett.test(mod1$residuals, datos$genotipo)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod1$residuals and datos$genotipo
## Bartlett's K-squared = 5.5895, df = 5, p-value = 0.3482

#Residuos
shapiro.test(mod1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod1$residuals
## W = 0.97311, p-value = 0.5164

Como el p-value en la prueba de igualdad de varianzas es mayor al 5% estadisticamente se pueden considerar iguales

Como el p-value en la prueba de normalidad es 51.64%(>5%) se considera que los residuos siguen una distribución normal

#Comparación de medias posterior al análisis de varianza

#Prueba de máxima diferencia de Tukey
par(mar = c(5,6,3,1))
tt = TukeyHSD(mod1, 'genotipo')
plot(tt, las =1)
abline(v=0, lty=2, col='red', lwd=2)

library(agricolae)

## Warning: package 'agricolae' was built under R version 4.2.3

dt = duncan.test(mod1, 'genotipo', console = T)

## 
## Study: mod1 ~ "genotipo"
## 
## Duncan's new multiple range test
## for PS 
## 
## Mean Square Error:  7066.534 
## 
## genotipo,  means
## 
##            PS       std r      Min      Max
## gen1 1244.715  95.50024 6 1143.952 1371.506
## gen2 1194.121  90.44675 6 1073.494 1322.408
## gen3 1503.650  99.77789 6 1342.671 1642.953
## gen4 1462.505  52.69259 6 1414.574 1556.108
## gen5 1401.927 103.29604 6 1268.198 1532.843
## gen6 1471.223  41.18360 6 1393.444 1500.561
## 
## Alpha: 0.05 ; DF Error: 30 
## 
## Critical Range
##         2         3         4         5         6 
##  99.11886 104.16376 107.43409 109.76839 111.53078 
## 
## Means with the same letter are not significantly different.
## 
##            PS groups
## gen3 1503.650      a
## gen6 1471.223      a
## gen4 1462.505      a
## gen5 1401.927      a
## gen1 1244.715      b
## gen2 1194.121      b

plot(dt)

#Diseño 1 (Incumpliendo supuestos)

# Único factor
# Sin razón de bloquear

xy = expand.grid(x = seq(0,5), y = seq(0,5))
plot(xy, pch = 15, cex = 3, asp = 1)

genotipo = gl(n = 6, k = 6, length = 36,
              labels = paste0('gen', 1:6))
genotipo

##  [1] gen1 gen1 gen1 gen1 gen1 gen1 gen2 gen2 gen2 gen2 gen2 gen2 gen3 gen3 gen3
## [16] gen3 gen3 gen3 gen4 gen4 gen4 gen4 gen4 gen4 gen5 gen5 gen5 gen5 gen5 gen5
## [31] gen6 gen6 gen6 gen6 gen6 gen6
## Levels: gen1 gen2 gen3 gen4 gen5 gen6

#Variable respuesta
set.seed(123)
PS = c(
  rnorm(12, 1200, 120),
  rnorm(12, 1500, 100),
  rnorm(12, 1420, 250)
)
datos = data.frame(xy[aleat,], genotipo, PS)
head(datos)

##    x y genotipo       PS
## 15 2 2     gen1 1132.743
## 26 1 4     gen1 1172.379
## 31 0 5     gen1 1387.045
## 16 3 2     gen1 1208.461
## 20 1 3     gen1 1215.515
## 30 5 4     gen1 1405.808

ggplot(datos)+
  aes(genotipo, PS)+
  geom_boxplot()

mod1b = aov(PS~genotipo, datos)
smod1b = summary(mod1b)
smod1b

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## genotipo     5 627712  125542   5.372 0.00121 **
## Residuals   30 701126   23371                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

shapiro.test(mod1b$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod1b$residuals
## W = 0.98349, p-value = 0.8558

bartlett.test(mod1b$residuals, datos$genotipo)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod1b$residuals and datos$genotipo
## Bartlett's K-squared = 12.401, df = 5, p-value = 0.02969

Como se rechaza la hipótesis de igualdad de varianzas, se incumple el suepuesto lo cuál complica la interpretación

#Análisis de varianza para un diseño factorial simple en arreglo completame3nte al azar, en presencia de heterocedasticidad

mod1c = oneway.test(PS~genotipo, datos)
mod1c

## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  PS and genotipo
## F = 8.6764, num df = 5.000, denom df = 13.702, p-value = 0.0006918

Cuando se incumple normalidad e igualdad de varianzas

Análisis de varianza no parametrico para un diseño en arreglo factorial simple en arreglo completamente al azar

#Prueba de Kruskal-Walles –> Cuando no se cumplen dos supuestos. Solo es válido para este experimento.

\[H_o: R_1=R_2=R_3=R_4=R_5=R_6\]

mod1d = kruskal.test(PS, genotipo)
mod1d

## 
##  Kruskal-Wallis rank sum test
## 
## data:  PS and genotipo
## Kruskal-Wallis chi-squared = 17.204, df = 5, p-value = 0.004128

Comparación de rangos promedios posterior a kruskal-wallis

18/04/2023

Stefania Hernández Azuero

2023-04-18