#Diseño 1

# Unico factor
# Sin razon de bloquear

# IMAGINADO EL ARREGLO EN CAMPO DEL EXPERIMENTO
xy = expand.grid(x=seq(0,5), y=seq(0,5))
plot(xy, pch=15, cex=3, asp=1)

#Parcelas de 3 metros por 3 metros, en total 36 parcelas

# FACTOR
genotipo = gl(n = 6, k = 6, length = 36,
              labels = paste0('gen', 1:6));genotipo

##  [1] gen1 gen1 gen1 gen1 gen1 gen1 gen2 gen2 gen2 gen2 gen2 gen2 gen3 gen3 gen3
## [16] gen3 gen3 gen3 gen4 gen4 gen4 gen4 gen4 gen4 gen5 gen5 gen5 gen5 gen5 gen5
## [31] gen6 gen6 gen6 gen6 gen6 gen6
## Levels: gen1 gen2 gen3 gen4 gen5 gen6

# VARIABLE RESPUESTA
#en este caso se mide el Peso Seco
set.seed(123)
PS = c(
  rnorm(12, 1200, 100), #rnorm (media, desviacion estandar (sd))
  rnorm(12, 1500, 80),
  rnorm(12, 1402, 90)
);PS

##  [1] 1143.952 1176.982 1355.871 1207.051 1212.929 1371.506 1246.092 1073.494
##  [9] 1131.315 1155.434 1322.408 1235.981 1532.062 1508.855 1455.533 1642.953
## [17] 1539.828 1342.671 1556.108 1462.177 1414.574 1482.562 1417.920 1441.689
## [25] 1345.746 1250.198 1477.401 1415.804 1299.568 1514.843 1440.382 1375.444
## [33] 1482.561 1481.032 1475.942 1463.978

aleat = sample(36)
datos = data.frame(xy[aleat,], genotipo, PS)
head(datos)

##    x y genotipo       PS
## 15 2 2     gen1 1143.952
## 26 1 4     gen1 1176.982
## 31 0 5     gen1 1355.871
## 16 3 2     gen1 1207.051
## 20 1 3     gen1 1212.929
## 30 5 4     gen1 1371.506

library(ggplot2)

ggplot(datos)+
  aes(x,y, fill=genotipo)+
  geom_tile()

#Analisis descriptivo

ggplot(datos)+
  aes(genotipo, PS)+
  geom_boxplot()

ggplot(datos)+
  aes(genotipo, PS)+
  geom_violin()

#Se observa como se comportan todos los genotipos, los genotipos 1 y 2 parecen no ser los mejores mientras que los genotipos 3, 4, 5 y 6 son los mejores #Los puntos que se salen de las cajas no necesariamente son atipicos #Los genotipos 6 y 4 tiene mayor varibilidad, #——————————————————————————-

analisis inferencial

\[H_O: \mu_{g_1}=\mu_{g_2}=\mu_{g_3}=\mu_{g_4}=\mu_{g_5}=\mu_{g6}\\ H_a: H_O\text{ es falsa}\]

Modelo

\[y_{ij} = \mu_i + \epsilon_{ij}\\ i=1,2,3,4,5,6~; j=1,2,3,4,5,6\] \[y_{ij} = \text{Peso Seco i-esimo genotipo y j-esima repeticion}\] \[\mu_i = \text{La media de cada i-esimo genotipo }\] Modelo en forma de efectos

\[y_{ij} = \mu + \tau_i + \epsilon_{ij}\] $mu \text {media global}$ media global

$\tau_i\ text {efecto de cada genotipo}$ efecto de cada genotipo

Modelo en forma matricial

\[Y = X\beta + E\] $X$ matriz del diseño 36 filas y 7 columnas (1 columna representa la media y 6 una por genotipo)

$\beta$ vector de parametros ($; _1; _2; _3; _4; _5; _6 $)

Otra forma de plantear la hipotesis \[H_O: \tau_1; \tau_2; \tau_3; \tau_4; \tau_5; \tau_6 = 0\]

mod1 =aov(PS ~ genotipo, data= datos)
summary(mod1)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## genotipo     5 480526   96105    13.6 5.76e-07 ***
## Residuals   30 211996    7067                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

mod1 =aov(PS ~ genotipo, data= datos)
smod1 =summary(mod1)
pvl =smod1 [[1]][1,5]

ifelse(pvl < 0.05, 'rechazo HO', 'no rechazo HO')

## [1] "rechazo HO"

#Como el valor de F es 14.22, esto quiere decir que la variabilidad causada por los genotipos es 14.22 veces mas grande que la causada por el error

#Interpretando el P-value: Se rechaza la hipotesis nula, lo que sugiere que exixten diferencias en almenos uno de los tratamientos en cuanto alp peso seco. Los datos proporcionan evidencia en contra de la hipotesis nula (A favor de la alterna)

###Estimacion de efectos

#media global 
mu = mean(datos$PS)
#media por genotipo
mu_i = tapply(datos$PS, datos$genotipo, mean)
#efecto por genotipo
tau_i = mu_i - mu
tau_i

##       gen1       gen2       gen3       gen4       gen5       gen6 
## -128.97483 -179.56951  129.96002   88.81483   10.23648   79.53300

var_i = tapply(datos$PS, datos$genotipo, var)

boxplot(PS ~ genotipo, datos, yline=c(1000, 1800), las=1)
points(1:6, mu_i, pch=16, col='red')
abline(h =mu, lty=2, col='red')
segments(1:6-.2, mu_i, 1:6-.2, mu, col='blue', lwd=2, lty=2)
text(1:6, rep(1600, 6), round(tau_i, 2))
text(1:6, rep(1000, 6), round(var_i, 2))

Revicion de suspuestos

\[H_O: \sigma^2_{g1}=\sigma^2_{g2}=\sigma^2_{g3}=\sigma^2_{g4}=\sigma^2_{g5}=\sigma^2_{g6}=\] \[H_O: \epsilon \sim N(0, \sigma^2_e)\]

hist(mod1$residuals)

var_res = tapply(mod1$residuals, datos$genotipo, var)

#Igualdad de varianzas
bartlett.test(mod1$residuals, datos$genotipo)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod1$residuals and datos$genotipo
## Bartlett's K-squared = 5.5895, df = 5, p-value = 0.3482

#Normalidad de residuos
shapiro.test(mod1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod1$residuals
## W = 0.97311, p-value = 0.5164

Como el p-value en la prueba de igualdad de varianza es mayor al 5% (30%) estadisticamente se pueden considerar iguales

Como el p-value en la prueba de normalidad es de 50 % (>5%) se considdera que los residuales siguen una distribucion normal

#Comparacion de medias posterior al analisis de varianza

#Prueba de maxima direfencia de Tukey
par(mar=c(5,6,3,1))
tt = TukeyHSD(mod1, 'genotipo') 
plot(tt, las=1)
abline(v=0, lty=2, col='red', lwd=2)

tt

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = PS ~ genotipo, data = datos)
## 
## $genotipo
##                  diff         lwr       upr     p adj
## gen2-gen1  -50.594675 -198.214230  97.02488 0.8995877
## gen3-gen1  258.934855  111.315300 406.55441 0.0001225
## gen4-gen1  217.789664   70.170110 365.40922 0.0012678
## gen5-gen1  139.211312   -8.408243 286.83087 0.0734515
## gen6-gen1  208.507827   60.888272 356.12738 0.0021266
## gen3-gen2  309.529530  161.909976 457.14908 0.0000068
## gen4-gen2  268.384340  120.764785 416.00389 0.0000713
## gen5-gen2  189.805987   42.186433 337.42554 0.0059058
## gen6-gen2  259.102502  111.482947 406.72206 0.0001213
## gen4-gen3  -41.145191 -188.764745 106.47436 0.9557570
## gen5-gen3 -119.723543 -267.343098  27.89601 0.1663446
## gen6-gen3  -50.427028 -198.046583  97.19253 0.9008350
## gen5-gen4  -78.578352 -226.197907  69.04120 0.5929467
## gen6-gen4   -9.281838 -156.901392 138.33772 0.9999606
## gen6-gen5   69.296515  -78.323040 216.91607 0.7103650

#Genotipos que difieren y no difieren

library(agricolae)

dt = duncan.test(mod1, 'genotipo', console = T)

## 
## Study: mod1 ~ "genotipo"
## 
## Duncan's new multiple range test
## for PS 
## 
## Mean Square Error:  7066.534 
## 
## genotipo,  means
## 
##            PS       std r      Min      Max
## gen1 1244.715  95.50024 6 1143.952 1371.506
## gen2 1194.121  90.44675 6 1073.494 1322.408
## gen3 1503.650  99.77789 6 1342.671 1642.953
## gen4 1462.505  52.69259 6 1414.574 1556.108
## gen5 1383.927 103.29604 6 1250.198 1514.843
## gen6 1453.223  41.18360 6 1375.444 1482.561
## 
## Alpha: 0.05 ; DF Error: 30 
## 
## Critical Range
##         2         3         4         5         6 
##  99.11886 104.16376 107.43409 109.76839 111.53078 
## 
## Means with the same letter are not significantly different.
## 
##            PS groups
## gen3 1503.650      a
## gen4 1462.505     ab
## gen6 1453.223     ab
## gen5 1383.927      b
## gen1 1244.715      c
## gen2 1194.121      c

plot(dt)

#Diseño 1 (con incumplimiento de suspuestos)

# FACTOR
genotipo = gl(n = 6, k = 6, length = 36,
              labels = paste0('gen', 1:6));genotipo

##  [1] gen1 gen1 gen1 gen1 gen1 gen1 gen2 gen2 gen2 gen2 gen2 gen2 gen3 gen3 gen3
## [16] gen3 gen3 gen3 gen4 gen4 gen4 gen4 gen4 gen4 gen5 gen5 gen5 gen5 gen5 gen5
## [31] gen6 gen6 gen6 gen6 gen6 gen6
## Levels: gen1 gen2 gen3 gen4 gen5 gen6

# VARIABLE RESPUESTA
#en este caso se mide el Peso Seco
set.seed(123)
PS = c(
  rnorm(12, 1200, 120), #rnorm (media, desviacion estandar (sd))
  rnorm(12, 1500, 100),
  rnorm(12, 1420, 250)
);PS

##  [1] 1132.7429 1172.3787 1387.0450 1208.4610 1215.5145 1405.8078 1255.3099
##  [8] 1048.1927 1117.5777 1146.5206 1346.8898 1243.1777 1540.0771 1511.0683
## [15] 1444.4159 1678.6913 1549.7850 1303.3383 1570.1356 1452.7209 1393.2176
## [22] 1478.2025 1397.3996 1427.1109 1263.7402  998.3267 1629.4468 1458.3433
## [29] 1135.4658 1733.4537 1526.6161 1346.2321 1643.7814 1639.5334 1625.3953
## [36] 1592.1601

aleat = sample(36)
datos = data.frame(xy[aleat,], genotipo, PS)
head(datos)

##    x y genotipo       PS
## 15 2 2     gen1 1132.743
## 26 1 4     gen1 1172.379
## 31 0 5     gen1 1387.045
## 16 3 2     gen1 1208.461
## 20 1 3     gen1 1215.515
## 30 5 4     gen1 1405.808

ggplot(datos)+
  aes(genotipo, PS)+
  geom_boxplot()

mod1b = aov(PS ~genotipo, datos)
smod1b = summary(mod1b);smod1b

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## genotipo     5 627712  125542   5.372 0.00121 **
## Residuals   30 701126   23371                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

smod1b

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## genotipo     5 627712  125542   5.372 0.00121 **
## Residuals   30 701126   23371                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

shapiro.test(mod1b$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  mod1b$residuals
## W = 0.98349, p-value = 0.8558

bartlett.test(mod1b$residuals, datos$genotipo)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  mod1b$residuals and datos$genotipo
## Bartlett's K-squared = 12.401, df = 5, p-value = 0.02969

Como se rechaza la hipotesis de igualdad de varianzas, se incumple el suspuesto lo cual complica la interpretacion

#Analis factorial para un diseño factorial simple en arreglo completamente al azar heterocedasticidad

mod1c = oneway.test(PS ~ genotipo, datos); mod1c

## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  PS and genotipo
## F = 8.6764, num df = 5.000, denom df = 13.702, p-value = 0.0006918

Cuando se incumple la normalidad y la igualdad de varianzas

#Analsis de varianza no parametrico para un diseño en arreglo factorial simple en arreglo completamente al azar

\[H_0: R_1=R_2=R_3=R_4=R_5=R_6\]

mod1d = kruskal.test(PS, genotipo); mod1d #Kruskal no es parametrico por que no importan los parametros de la distribucion normal (no importa la normalidad) se comparan los modelos

## 
##  Kruskal-Wallis rank sum test
## 
## data:  PS and genotipo
## Kruskal-Wallis chi-squared = 17.204, df = 5, p-value = 0.004128

Comparacion de rasgos promedios posterior a kruskal.waliis

#library(PMCMR)
#posthoc.kruskal.nemenyi.test(PS, genotipo)
library(PMCMRplus)

## Warning: package 'PMCMRplus' was built under R version 4.2.3

kwAllPairsNemenyiTest(PS, genotipo) #Se observa que genotipo difiere de que genotipo

## 
##  Pairwise comparisons using Tukey-Kramer-Nemenyi all-pairs test with Tukey-Dist approximation

## data: PS and genotipo

##      gen1  gen2  gen3  gen4  gen5 
## gen2 0.998 -     -     -     -    
## gen3 0.172 0.058 -     -     -    
## gen4 0.443 0.205 0.995 -     -    
## gen5 0.807 0.533 0.883 0.993 -    
## gen6 0.046 0.012 0.995 0.904 0.587

## 
## P value adjustment method: single-step

## alternative hypothesis: two.sided

library(FSA)

## Warning: package 'FSA' was built under R version 4.2.3

## ## FSA v0.9.4. See citation('FSA') if used in publication.
## ## Run fishR() for related website and fishR('IFAR') for related book.

dunnTest(PS, genotipo) #Se menciona que el unico genotipo que difiere es el 2 del 6 (N E)

## Dunn (1964) Kruskal-Wallis multiple comparison

##   p-values adjusted with the Holm method.

##     Comparison          Z      P.unadj      P.adj
## 1  gen1 - gen2  0.4383973 0.6610983037 0.66109830
## 2  gen1 - gen3 -2.3563855 0.0184537565 0.22144508
## 3  gen2 - gen3 -2.7947828 0.0051934597 0.06751498
## 4  gen1 - gen4 -1.8357887 0.0663889146 0.66388915
## 5  gen2 - gen4 -2.2741860 0.0229548062 0.25250287
## 6  gen3 - gen4  0.5205968 0.6026476838 1.00000000
## 7  gen1 - gen5 -1.2603922 0.2075279011 1.00000000
## 8  gen2 - gen5 -1.6987895 0.0893588460 0.80422961
## 9  gen3 - gen5  1.0959932 0.2730817290 1.00000000
## 10 gen4 - gen5  0.5753965 0.5650232007 1.00000000
## 11 gen1 - gen6 -2.8769823 0.0040149814 0.05620974
## 12 gen2 - gen6 -3.3153796 0.0009151876 0.01372781
## 13 gen3 - gen6 -0.5205968 0.6026476838 1.00000000
## 14 gen4 - gen6 -1.0411936 0.2977857118 1.00000000
## 15 gen5 - gen6 -1.6165900 0.1059668029 0.84773442

#cuando hay datos empatados 
rangos = rank(PS, ties.method = 'average');rangos

##  [1]  4  7 16  8  9 19 11  2  3  6 15 10 27 25 21 35 28 13 29 22 17 24 18 20 12
## [26]  1 32 23  5 36 26 14 34 33 31 30

boxplot(rangos ~ genotipo)

##Analisis de varianza permutacional

#library(RVAideMemoire)
#perm.anova(PS ~ genotipo, data = datos, nperm = 10000, progress = F)

Clase 18 de Abril Analisis descriptivo

Luisa Fernanda Perea Peña

2023-04-18

analisis inferencial