pacotes <- c("agricolae", "ggplot2", "outliers", "tidyverse", "nortest", "stats", "performance")
if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
instalador <- pacotes[!pacotes %in% installed.packages()]
for(i in 1:length(instalador)) {
install.packages(instalador, dependencies = T)
break()}
sapply(pacotes, require, character = T)
} else {
sapply(pacotes, require, character = T)
}
## Loading required package: agricolae
## Loading required package: ggplot2
## Loading required package: outliers
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: nortest
##
## Loading required package: performance
## agricolae ggplot2 outliers tidyverse nortest stats
## TRUE TRUE TRUE TRUE TRUE TRUE
## performance
## TRUE
data(potato)
?potato
## starting httpd help server ... done
data(potato)
?potato
data<- as.data.frame(potato)
str(data)
## 'data.frame': 18 obs. of 4 variables:
## $ date : int 18 26 31 18 26 31 18 26 31 18 ...
## $ variety: Factor w/ 2 levels "Canchan","Unica": 2 2 2 2 2 2 2 2 2 1 ...
## $ harvest: int 1 1 1 2 2 2 3 3 3 1 ...
## $ cutting: num 2.77 2.35 4.17 2.56 3.56 ...
summary(data)
## date variety harvest cutting
## Min. :18 Canchan:9 Min. :1 Min. :2.350
## 1st Qu.:18 Unica :9 1st Qu.:1 1st Qu.:3.422
## Median :26 Median :2 Median :5.013
## Mean :25 Mean :2 Mean :5.138
## 3rd Qu.:31 3rd Qu.:3 3rd Qu.:6.138
## Max. :31 Max. :3 Max. :9.812
ggplot(data, aes(x = variety, y = cutting)) +
geom_boxplot(fill = "green") +
labs(x = "Variedad", y = "Rto Corte") +
ggtitle("Variedades de Papa") +
theme_bw() +
theme(axis.text.x = element_text(angle = 25, hjust = 1))
data%>%
select_if(is.numeric)%>%
outlier()
## date harvest cutting
## 18.0000 3.0000 9.8125
View(data)
Resumen de base de datos
str(data)
## 'data.frame': 18 obs. of 4 variables:
## $ date : int 18 26 31 18 26 31 18 26 31 18 ...
## $ variety: Factor w/ 2 levels "Canchan","Unica": 2 2 2 2 2 2 2 2 2 1 ...
## $ harvest: int 1 1 1 2 2 2 3 3 3 1 ...
## $ cutting: num 2.77 2.35 4.17 2.56 3.56 ...
data$harvest <- as.factor(data$harvest)
Revisar base de datos
str(data)
## 'data.frame': 18 obs. of 4 variables:
## $ date : int 18 26 31 18 26 31 18 26 31 18 ...
## $ variety: Factor w/ 2 levels "Canchan","Unica": 2 2 2 2 2 2 2 2 2 1 ...
## $ harvest: Factor w/ 3 levels "1","2","3": 1 1 1 2 2 2 3 3 3 1 ...
## $ cutting: num 2.77 2.35 4.17 2.56 3.56 ...
model_DBCA<- aov(cutting ~ variety + Error(harvest), data = data)
model_DBCA2<- aov(cutting ~ variety + harvest, data = data)
summary(model_DBCA)
##
## Error: harvest
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 2 36.24 18.12
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## variety 1 25.09 25.087 14.65 0.00185 **
## Residuals 14 23.98 1.713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_DBCA2)
## Df Sum Sq Mean Sq F value Pr(>F)
## variety 1 25.09 25.087 14.65 0.00185 **
## harvest 2 36.24 18.121 10.58 0.00159 **
## Residuals 14 23.98 1.713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cv.model(model_DBCA2)
## [1] 25.47463
summary(model_DBCA)
##
## Error: harvest
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 2 36.24 18.12
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## variety 1 25.09 25.087 14.65 0.00185 **
## Residuals 14 23.98 1.713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
my.residuos.DBCA<-residuals(model_DBCA$Within)
shapiro.test(my.residuos.DBCA)
##
## Shapiro-Wilk normality test
##
## data: my.residuos.DBCA
## W = 0.97385, p-value = 0.9105
ks.test(my.residuos.DBCA, "pnorm", mean = mean(my.residuos.DBCA), sd = sd(my.residuos.DBCA))
##
## Exact one-sample Kolmogorov-Smirnov test
##
## data: my.residuos.DBCA
## D = 0.10051, p-value = 0.9942
## alternative hypothesis: two-sided
library(nortest)
ad.test(my.residuos.DBCA)
##
## Anderson-Darling normality test
##
## data: my.residuos.DBCA
## A = 0.16377, p-value = 0.9272
qqnorm(my.residuos.DBCA)
qqline(my.residuos.DBCA)
attach(data)
bartlett_result <- bartlett.test(cutting ~ variety)
bartlett_result
##
## Bartlett test of homogeneity of variances
##
## data: cutting by variety
## Bartlett's K-squared = 1.7689, df = 1, p-value = 0.1835
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
leveneTest(cutting ~ variety)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.8149 0.3801
## 16
residuals(model_DBCA$Within)
## 4 5 6 7 8 9
## -1.087981456 -0.087981456 -0.275481456 -0.711243898 0.007506102 1.132506102
## 10 11 12 13 14 15
## -2.526801781 0.148198219 0.673198219 -1.511592567 1.175907433 -0.355342567
## 16 17 18
## -1.572355009 1.990144991 1.958894991
residual_data <- data.frame(
variety = rep(4:18, each = 1),
Residuos = residuals(model_DBCA$Within)
)
ggplot(residual_data, aes(x = variety, y = Residuos)) +
geom_point() +
labs(x = "Variedad", y = "Rto corte") +
ggtitle("Gráfico de dispersión de residuos por nivel de Variedad") +
theme_minimal()
check_model(model_DBCA2)
model_DBCA<- aov(cutting ~ variety + Error(harvest), data = data)
model_DBCA2<- aov(cutting ~ variety + harvest, data = data)
summary(model_DBCA)
##
## Error: harvest
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 2 36.24 18.12
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## variety 1 25.09 25.087 14.65 0.00185 **
## Residuals 14 23.98 1.713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_DBCA2)
## Df Sum Sq Mean Sq F value Pr(>F)
## variety 1 25.09 25.087 14.65 0.00185 **
## harvest 2 36.24 18.121 10.58 0.00159 **
## Residuals 14 23.98 1.713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(agricolae)
duncan_result <- duncan.test(model_DBCA2, "variety")
print(duncan_result)
## $statistics
## MSerror Df Mean CV
## 1.71285 14 5.1375 25.47463
##
## $parameters
## test name.t ntr alpha
## Duncan variety 2 0.05
##
## $duncan
## Table CriticalRange
## 2 3.033186 1.323237
##
## $means
## cutting std r se Min Max Q25 Q50 Q75
## Canchan 6.318056 2.342248 9 0.436253 2.60 9.81250 5.275 5.8000 7.1875
## Unica 3.956944 1.428844 9 0.436253 2.35 6.59375 2.775 3.5625 4.7500
##
## $comparison
## NULL
##
## $groups
## cutting groups
## Canchan 6.318056 a
## Unica 3.956944 b
##
## attr(,"class")
## [1] "group"
duncan_result_alfa_0.01<-duncan.test(model_DBCA2, "variety", console = T, alpha = 0.01)
##
## Study: model_DBCA2 ~ "variety"
##
## Duncan's new multiple range test
## for cutting
##
## Mean Square Error: 1.71285
##
## variety, means
##
## cutting std r se Min Max Q25 Q50 Q75
## Canchan 6.318056 2.342248 9 0.436253 2.60 9.81250 5.275 5.8000 7.1875
## Unica 3.956944 1.428844 9 0.436253 2.35 6.59375 2.775 3.5625 4.7500
##
## Alpha: 0.01 ; DF Error: 14
##
## Critical Range
## 2
## 1.836578
##
## Means with the same letter are not significantly different.
##
## cutting groups
## Canchan 6.318056 a
## Unica 3.956944 b
duncan_result_alfa_0.01$groups
## cutting groups
## Canchan 6.318056 a
## Unica 3.956944 b
duncan_data_frame <- data.frame(variety = rownames(duncan_result_alfa_0.01$groups),
groups = duncan_result_alfa_0.01$groups$groups)
data_combinado <- merge(data, duncan_data_frame, by = "variety")
ggplot(data_combinado, aes(x = reorder(variety, cutting), y = cutting, fill = groups)) +
geom_boxplot() +
labs(x = "Variedad", y = "Rto Corte", fill = "Grupos") +
ggtitle("Distribución de rendimiento por cada Variedad") +
scale_fill_manual(values = c("a" = "orange", "b" = "green")) +
theme_bw() +
theme(axis.text.x = element_text(angle = 25, hjust = 1))
p <- ggplot(data_combinado, aes(x = reorder(variety, cutting), y = cutting, fill = groups)) +
geom_boxplot() +
labs(x = "Variedad", y = "Rto Corte", fill = "Grupos") +
ggtitle("Distribución de rendimiento por cada Variedad") +
scale_fill_manual(values = c("a" = "orange", "b" = "green")) +
theme_bw() +
theme(axis.text.x = element_text(angle = 25, hjust = 1))
label_data <- data_combinado %>%
group_by(variety) %>%
summarise(y = max(cutting),
groups = first(groups))
p + geom_text(data = label_data, aes(x = variety, y = y, label = groups), vjust = -0.5)