Crear un vector de paquetes

pacotes <- c("agricolae", "ggplot2", "outliers", "tidyverse", "nortest", "stats", "performance")

Script para instalar y cargar librerias y dependencias

if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
  instalador <- pacotes[!pacotes %in% installed.packages()]
  for(i in 1:length(instalador)) {
    install.packages(instalador, dependencies = T)
    break()}
  sapply(pacotes, require, character = T) 
} else {
  sapply(pacotes, require, character = T) 
}
## Loading required package: agricolae
## Loading required package: ggplot2
## Loading required package: outliers
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: nortest
## 
## Loading required package: performance
##   agricolae     ggplot2    outliers   tidyverse     nortest       stats 
##        TRUE        TRUE        TRUE        TRUE        TRUE        TRUE 
## performance 
##        TRUE
data(potato)
?potato
## starting httpd help server ... done

Análisis exploratório ——

Crear un gráfico de cajas (boxplot) de las variedades por “corte”

ggplot(data = potato, aes(x = variety, y = cutting)) +
  geom_boxplot(fill = "lightblue") +
  labs(x = "Variedad", y = "Corte") +
  ggtitle("Rendimiento de Dos variedades de Papa")+
  theme_bw()

Valores atípicos (outlier)

potato%>%
  select_if(is.numeric)%>%
  outlier()
##    date harvest cutting 
## 18.0000  3.0000  9.8125
View(potato)

ANOVA DCA ——— aov= Analisis Of Variance

model_DCA<-aov(cutting~variety,data=potato)
summary(model_DCA)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## variety      1  25.09  25.087   6.665 0.0201 *
## Residuals   16  60.22   3.764                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cv.model(model_DCA)
## [1] 37.76287

Verificación de supuestos ————

Normalidad ——————–

Verificar la normalidad de los residuos (Prueba de Shapiro-Wilk)

residuals(model_DCA)
##           1           2           3           4           5           6 
## -1.18194444 -1.60694444  0.21805556 -1.39444444 -0.39444444 -0.58194444 
##           7           8           9          10          11          12 
##  0.79305556  1.51180556  2.63680556 -3.71805556 -1.04305556 -0.51805556 
##          13          14          15          16          17          18 
## -1.81805556  0.86944444 -0.66180556 -0.06805556  3.49444444  3.46319444
shapiro.test(residuals(model_DCA))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_DCA)
## W = 0.94913, p-value = 0.4116

Prueba de normalidad de Kolmogorov-Smirnov

ks.test(residuals(model_DCA), 
        "pnorm", 
        mean = mean(residuals(model_DCA)), 
        sd = sd(residuals(model_DCA)))
## 
##  Exact one-sample Kolmogorov-Smirnov test
## 
## data:  residuals(model_DCA)
## D = 0.13855, p-value = 0.8343
## alternative hypothesis: two-sided

Prueba de normalidad de Anderson-Darling. # Cargar la librería ‘nortest’ para la función ‘ad.test’

library(nortest)  
ad.test(residuals(model_DCA))
## 
##  Anderson-Darling normality test
## 
## data:  residuals(model_DCA)
## A = 0.42351, p-value = 0.2852

Crear un gráfico de qq-plot para evaluar la normalidad

qqnorm(residuals(model_DCA))

qqnorm(residuals(model_DCA))
qqline(residuals(model_DCA))

Homogeneidad de varianza (Prueba de Levene) ———–

Cargar la librería ‘car’ para la función ‘leveneTest’

library(car)  
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
leveneTest(model_DCA)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  0.8149 0.3801
##       16

Crear un gráfico de dispersión de residuos frente a los niveles del factor

ggplot(data = potato, aes(x = variety, 
                               y = residuals(model_DCA))) +
  geom_point() +
  labs(x = "Variedad", y = "residuos") +
  ggtitle("Gráfico de Dispersión de Residuos Según 
          las Variedades de Papa")+
  theme_bw()

Supuestos de normalidad en un solo código

check_model(model_DCA)

ANOVA DCA ———

model_DCA<-aov(cutting~variety,data=potato)
summary(model_DCA)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## variety      1  25.09  25.087   6.665 0.0201 *
## Residuals   16  60.22   3.764                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cv.model(model_DCA)
## [1] 37.76287

Comparación de medias ————

Tukey ————

Realizar la prueba de Tukey

tukey_result <- TukeyHSD(model_DCA)
tukey_result
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = cutting ~ variety, data = potato)
## 
## $variety
##                    diff       lwr        upr     p adj
## Unica-Canchan -2.361111 -4.299884 -0.4223378 0.0200726

Realiza la prueba de Tukey para el modelo ANOVA

tukey_result <- HSD.test(model_DCA, "variety")

Muestra los grupos caracterizados por letras

tukey_result
## $statistics
##    MSerror Df   Mean       CV      MSD
##   3.763862 16 5.1375 37.76287 1.938773
## 
## $parameters
##    test  name.t ntr StudentizedRange alpha
##   Tukey variety   2         2.997999  0.05
## 
## $means
##          cutting      std r        se  Min     Max   Q25    Q50    Q75
## Canchan 6.318056 2.342248 9 0.6466892 2.60 9.81250 5.275 5.8000 7.1875
## Unica   3.956944 1.428844 9 0.6466892 2.35 6.59375 2.775 3.5625 4.7500
## 
## $comparison
## NULL
## 
## $groups
##          cutting groups
## Canchan 6.318056      a
## Unica   3.956944      b
## 
## attr(,"class")
## [1] "group"

Realiza la prueba de Tukey para el modelo ANOVA

tukey_result_alfa_0.01 <- HSD.test(model_DCA, "variety",  alpha = 0.01)

Muestra los grupos caracterizados por letras

tukey_result_alfa_0.01
## $statistics
##    MSerror Df   Mean       CV     MSD
##   3.763862 16 5.1375 37.76287 2.67122
## 
## $parameters
##    test  name.t ntr StudentizedRange alpha
##   Tukey variety   2         4.130609  0.01
## 
## $means
##          cutting      std r        se  Min     Max   Q25    Q50    Q75
## Canchan 6.318056 2.342248 9 0.6466892 2.60 9.81250 5.275 5.8000 7.1875
## Unica   3.956944 1.428844 9 0.6466892 2.35 6.59375 2.775 3.5625 4.7500
## 
## $comparison
## NULL
## 
## $groups
##          cutting groups
## Canchan 6.318056      a
## Unica   3.956944      a
## 
## attr(,"class")
## [1] "group"

Gráfico de barras ———————–

bar.group(tukey_result_alfa_0.01$groups, ylim=c(0,10),
          col=9:6, ylab = "Cortes",
          main = "Evaluación de Dos Variedades de Papa")
box()

data <- tukey_result_alfa_0.01$groups
data$variedad <- row.names(tukey_result_alfa_0.01$groups)

Crear el gráfico de barras

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggrepel)
ggplot(data = potato, aes(x = variety, y = cutting, fill = harvest)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "Variedad", y = "corte", fill = "Grupos") +
  ggtitle("Evaluación de Dos Variedades de Papa") +
  theme_bw()

Etiquetas personalizadas

etiquetas <- c( "Variedad Canchan", "Variedad Única")

Crear el gráfico de barras con etiquetas personalizadas

ggplot(data = potato, aes(x = variety, y = cutting, fill = harvest)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_discrete(labels = etiquetas) +
  labs(x = "Variedad", y = "corte", fill = "Grupos") +
  ggtitle("Evaluación de Dos Variedades de Papa") +
  theme_bw()

data
##          cutting groups variedad
## Canchan 6.318056      a  Canchan
## Unica   3.956944      a    Unica

Crear el gráfico de barras con etiquetas personalizadas y letras en la parte superior

ggplot(data = potato, aes(x = variety, y = cutting, fill = harvest)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_discrete(labels = etiquetas) +
  geom_text(aes(label = harvest), vjust = 1.0, 
            position = position_dodge(width = 0.1)) +
  labs(x = "Variedad", y = "corte", fill = "Grupos") +
  ggtitle("Evaluación de Dos Variedades de Papa") +
  theme_bw()