Crear un vector de paquetes

pacotes <- c("agricolae", "performance", "ggplot2",
             "outliers","car", "readxl","tidyverse", 
             "RColorBrewer","nortest", "stats", 
             "agridat", "performance")

Script para instalar y cargar librerias y dependencias

if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
  instalador <- pacotes[!pacotes %in% installed.packages()]
  for(i in 1:length(instalador)) {
    install.packages(instalador, dependencies = T)
    break()}
  sapply(pacotes, require, character = T) 
} else {
  sapply(pacotes, require, character = T) 
}
## Loading required package: agricolae
## Loading required package: performance
## Loading required package: ggplot2
## Loading required package: outliers
## Loading required package: car
## Loading required package: carData
## Loading required package: readxl
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some()   masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: RColorBrewer
## 
## Loading required package: nortest
## 
## Loading required package: agridat
## Warning: package 'agridat' was built under R version 4.3.2
##    agricolae  performance      ggplot2     outliers          car       readxl 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##    tidyverse RColorBrewer      nortest        stats      agridat  performance 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE

DATOS

library(agricolae)
f <- system.file("external/ssp.csv", package="agricolae")
ssp<-read.csv(f)
view (ssp)
str(ssp)
## 'data.frame':    135 obs. of  5 variables:
##  $ block     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ nitrogen  : int  0 0 0 50 50 50 80 80 80 110 ...
##  $ management: chr  "m1" "m2" "m3" "m1" ...
##  $ variety   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ yield     : num  3.32 3.77 4.66 3.19 3.62 ...
summary(ssp)
##      block      nitrogen    management           variety      yield       
##  Min.   :1   Min.   :  0   Length:135         Min.   :1   Min.   : 3.132  
##  1st Qu.:1   1st Qu.: 50   Class :character   1st Qu.:1   1st Qu.: 5.301  
##  Median :2   Median : 80   Mode  :character   Median :2   Median : 6.509  
##  Mean   :2   Mean   : 76                      Mean   :2   Mean   : 6.554  
##  3rd Qu.:3   3rd Qu.:110                      3rd Qu.:3   3rd Qu.: 7.644  
##  Max.   :3   Max.   :140                      Max.   :3   Max.   :10.360

Valores atípicos

ssp%>%
  select_if(is.numeric)%>%
  outlier()
##    block nitrogen  variety    yield 
##     3.00     0.00     3.00    10.36

##ANOVA DE 3 FACTORES PARA DCA: para lo cual se verifica la estructura de la base de datos

str (ssp)
## 'data.frame':    135 obs. of  5 variables:
##  $ block     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ nitrogen  : int  0 0 0 50 50 50 80 80 80 110 ...
##  $ management: chr  "m1" "m2" "m3" "m1" ...
##  $ variety   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ yield     : num  3.32 3.77 4.66 3.19 3.62 ...
ssp$block <- as.factor(ssp$block)
ssp$nitrogen <- as.factor(ssp$nitrogen)
ssp$variety <- as.factor(ssp$variety)
str (ssp)
## 'data.frame':    135 obs. of  5 variables:
##  $ block     : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nitrogen  : Factor w/ 5 levels "0","50","80",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ management: chr  "m1" "m2" "m3" "m1" ...
##  $ variety   : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ yield     : num  3.32 3.77 4.66 3.19 3.62 ...
model_DCA <- aov(yield ~ management * variety * nitrogen, data = ssp)
summary(model_DCA)
##                             Df Sum Sq Mean Sq F value   Pr(>F)    
## management                   2  42.94   21.47  48.120 6.13e-15 ***
## variety                      2 206.01  103.01 230.886  < 2e-16 ***
## nitrogen                     4  61.64   15.41  34.542  < 2e-16 ***
## management:variety           4   3.85    0.96   2.158 0.080061 .  
## management:nitrogen          8   1.10    0.14   0.309 0.960824    
## variety:nitrogen             8  14.14    1.77   3.963 0.000468 ***
## management:variety:nitrogen 16   3.70    0.23   0.518 0.931339    
## Residuals                   90  40.15    0.45                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cv.model(model_DCA)
## [1] 10.19059

De lo anterior se puede decir que la variedad (variety), dosis de nitogeno (nitrogen) y gestion o manejo (management) tienen un alto valor de significancia frente a la variable de respuesta que es Rendimiento (Yield), de igual manera, se destaca la interacción entre la dosis de nitrogeno y la variedad de cereal que se maneja en el ensayo, la cual, es la unica de las interacciones que tiene un alto valor de significancia frente al rendimiento. Cabe destacar que los 4 items anteriores tienen una significancia de 0.001, es decir que si se replica el ensayo unas mil veces, solo una de esas mil va a ser diferente. Hablando en terminos de porcentaje y confianza, el experimento tendrá un 99,9% de confiabilidad.


Se verifica la normalidad mediante una prueba de Shapiro-Wilk

shapiro.test(residuals(model_DCA))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_DCA)
## W = 0.99198, p-value = 0.6403

Homogeneidad de varianzas ——————-

Calcular los residuos del modelo ANOVA

residuos <- residuals(model_DCA)

Agrupar los residuos por los niveles de las interacciones

grupo <- paste(ssp$variety, ssp$nitrogen, sep = "_")
grupos_residuos <- split(residuos, grupo)

Prueba de homogeneidad de varianzas con fligner.test

fligner.test(grupos_residuos)
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  grupos_residuos
## Fligner-Killeen:med chi-squared = 17.786, df = 14, p-value = 0.2167

Gráfico

residuos_estandarizados <- rstandard(model_DCA)

Obtener los valores ajustados (fitted values)

valores_ajustados <- fitted(model_DCA)

Graficar residuos estandarizados vs. valores ajustados

plot(valores_ajustados, residuos_estandarizados, pch = 14    , 
     col = "black",
     xlab = "Valores Ajustados", ylab = "Residuos Estandarizados",
     main = "Gráfico de Residuos Est. vs. Valores Ajustados")

Otra forma de verificar los supuestos:

plot(model_DCA)

check_model(model_DCA)

EVALUAR LOS EFECTOS COMPUESTOS POR LA DIF. SIGNIF. EN LA INTERACCIÓN

resultados_lsd<-LSD.test(model_DCA,c("nitrogen","variety"),
                         console=T)
## 
## Study: model_DCA ~ c("nitrogen", "variety")
## 
## LSD t Test for yield 
## 
## Mean Square Error:  0.4461352 
## 
## nitrogen:variety,  means and individual ( 95 %) CI
## 
##          yield       std r        se      LCL      UCL   Min    Max   Q25   Q50
## 0:1   4.513111 0.8176433 9 0.2226445 4.070789 4.955433 3.320  5.915 3.864 4.507
## 0:2   5.162889 0.7955654 9 0.2226445 4.720567 5.605211 4.166  6.573 4.815 5.096
## 0:3   6.478111 1.0853742 9 0.2226445 6.035789 6.920433 5.244  8.020 5.536 6.462
## 110:1 5.444667 0.7656558 9 0.2226445 5.002344 5.886989 4.246  6.829 4.863 5.345
## 110:2 6.924556 0.7410263 9 0.2226445 6.482233 7.366878 5.779  7.856 6.209 6.992
## 110:3 8.442889 0.9350460 9 0.2226445 8.000567 8.885211 6.902  9.660 7.778 8.966
## 140:1 5.077778 1.1663266 9 0.2226445 4.635456 5.520100 3.132  7.309 4.375 5.217
## 140:2 7.288444 0.7236999 9 0.2226445 6.846122 7.730767 6.573  8.950 6.860 6.974
## 140:3 9.335556 0.7091084 9 0.2226445 8.893233 9.777878 8.032 10.360 9.224 9.314
## 50:1  4.763667 0.8726879 9 0.2226445 4.321344 5.205989 3.188  6.046 4.752 4.809
## 50:2  6.016222 0.9620228 9 0.2226445 5.573900 6.458544 4.478  7.442 5.390 5.925
## 50:3  7.881111 1.1232814 9 0.2226445 7.438789 8.323433 6.546  9.942 7.092 7.646
## 80:1  5.834889 0.7497605 9 0.2226445 5.392567 6.277211 4.422  7.106 5.468 5.788
## 80:2  6.588556 0.7006203 9 0.2226445 6.146233 7.030878 5.442  7.991 6.398 6.533
## 80:3  8.563778 0.7680380 9 0.2226445 8.121456 9.006100 6.698  9.320 8.514 8.650
##         Q75
## 0:1   4.875
## 0:2   5.495
## 0:3   7.442
## 110:1 5.869
## 110:2 7.565
## 110:3 9.080
## 140:1 5.389
## 140:2 7.422
## 140:3 9.712
## 50:1  5.232
## 50:2  6.780
## 50:3  8.592
## 80:1  6.215
## 80:2  6.914
## 80:3  9.112
## 
## Alpha: 0.05 ; DF Error: 90
## Critical Value of t: 1.986675 
## 
## least Significant Difference: 0.625538 
## 
## Treatments with the same letter are not significantly different.
## 
##          yield groups
## 140:3 9.335556      a
## 80:3  8.563778      b
## 110:3 8.442889     bc
## 50:3  7.881111     cd
## 140:2 7.288444     de
## 110:2 6.924556     ef
## 80:2  6.588556     fg
## 0:3   6.478111     fg
## 50:2  6.016222     gh
## 80:1  5.834889      h
## 110:1 5.444667     hi
## 0:2   5.162889     ij
## 140:1 5.077778    ijk
## 50:1  4.763667     jk
## 0:1   4.513111      k

Crear un data frame con los resultados de la prueba LSD

resultados_lsd_df <- data.frame(
  nitrogen_variety = rownames(resultados_lsd$groups),
  yield = resultados_lsd$groups$yield,
  groups = resultados_lsd$groups$groups
)

Separar la columna nitrogen_variety en nitrogen y variety

resultados_lsd_df <- separate(resultados_lsd_df, nitrogen_variety, into = c("nitrogen", "variety"), sep = ":")

resultados_lsd_df$nitrogen<-
  factor(resultados_lsd_df$nitrogen, 
         levels = c("0",
                    "50", "80", "110", "140"))

Crear un gráfico de barras agrupadas

ggplot(resultados_lsd_df, aes(x = variety, y = yield, fill = nitrogen, label = groups)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_text(position = position_dodge(width = 0.9),    
            vjust = -0.5,                              
            aes(group = nitrogen),                     
            size = 4) +                                
  labs(title = "Comparación de Medias Significativas",
       x = "Variedad", y = "Producción") +
  scale_fill_brewer(palette = "Spectral") +  
  theme_minimal() +
  theme(legend.position = "top") 

Crea un gráfico de dispersión tridimensional

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot_ly(ssp, x = ~nitrogen, y = ~management, z = ~variety, 
        marker = list(color = ~yield, colorscale = "YlGnBu", size = 5),
        type = "scatter3d") %>%
  layout(scene = list(
    xaxis = list(title = "Nitrogen"),
    yaxis = list(title = "Management"),
    zaxis = list(title = "Variety"),
    bgcolor = "black"  # Color oscuro para el fondo
  ))
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

GRACIAS!!