Datos totales
library(readxl)
Datos_Cluster_Lina_27_07_20 <- read_excel("Datos Cluster Lina 27.07.20.xlsx", sheet = "Resumen")
Datos_Cluster_Lina_27_07_20
## # A tibble: 107 × 35
## G R.C R.E R.D PPT.C PPT.E PPT.D NT.C NT.E NT.D PST.C PST.E
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 NA 23.8 -23.8 NA 5.77 -5.77 NA 5.17 -5.17 NA 36.5
## 2 3 NA 34.3 -34.3 NA 5.12 -5.12 NA 6.25 -6.25 NA 38.6
## 3 4 112. 48.7 62.8 11.6 7.75 3.86 10.8 7.67 3.17 79.2 26.2
## 4 5 53.8 28.3 25.5 6.75 5.39 1.37 9 5.4 3.6 49.3 28.3
## 5 6 84.0 37.2 46.8 17.7 4.10 13.6 5.67 8.2 -2.53 51.8 38.6
## 6 7 77.2 26 51.2 14.6 4.33 10.2 5.83 6 -0.167 40.1 23.8
## 7 8 89.9 66.6 23.2 10.6 5.66 4.94 7.83 10.3 -2.5 36.6 33.4
## 8 9 90.1 46.5 43.6 9.75 5.51 4.24 9.83 8.33 1.5 56.5 37.8
## 9 11 58.2 33.1 25.2 8.18 4.32 3.86 10.8 8.25 2.58 35.6 22.8
## 10 13 107. 59 47.6 22.2 13.8 8.42 5.5 5.5 0 63.2 41.7
## # ℹ 97 more rows
## # ℹ 23 more variables: PST.D <dbl>, IC.C <dbl>, IC.E <dbl>, IC.D <dbl>,
## # Rpa.C <dbl>, Rpa.E <dbl>, Rpa.D <dbl>, praiz.C <dbl>, praiz.E <dbl>,
## # praiz.D <dbl>, ppa.C <dbl>, ppa.E <dbl>, ppa.D <dbl>, CE.C <dbl>,
## # CE.E <dbl>, CE.D <dbl>, MDA.C <dbl>, MDA.E <dbl>, MDA.D <dbl>, CRA.C <dbl>,
## # CRA.E <dbl>, CRA.D <dbl>, Contar <dbl>
head(Datos_Cluster_Lina_27_07_20)
## # A tibble: 6 × 35
## G R.C R.E R.D PPT.C PPT.E PPT.D NT.C NT.E NT.D PST.C PST.E PST.D
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 NA 23.8 -23.8 NA 5.77 -5.77 NA 5.17 -5.17 NA 36.5 -36.5
## 2 3 NA 34.3 -34.3 NA 5.12 -5.12 NA 6.25 -6.25 NA 38.6 -38.6
## 3 4 112. 48.7 62.8 11.6 7.75 3.86 10.8 7.67 3.17 79.2 26.2 53.0
## 4 5 53.8 28.3 25.5 6.75 5.39 1.37 9 5.4 3.6 49.3 28.3 21.1
## 5 6 84.0 37.2 46.8 17.7 4.10 13.6 5.67 8.2 -2.53 51.8 38.6 13.2
## 6 7 77.2 26 51.2 14.6 4.33 10.2 5.83 6 -0.167 40.1 23.8 16.3
## # ℹ 22 more variables: IC.C <dbl>, IC.E <dbl>, IC.D <dbl>, Rpa.C <dbl>,
## # Rpa.E <dbl>, Rpa.D <dbl>, praiz.C <dbl>, praiz.E <dbl>, praiz.D <dbl>,
## # ppa.C <dbl>, ppa.E <dbl>, ppa.D <dbl>, CE.C <dbl>, CE.E <dbl>, CE.D <dbl>,
## # MDA.C <dbl>, MDA.E <dbl>, MDA.D <dbl>, CRA.C <dbl>, CRA.E <dbl>,
## # CRA.D <dbl>, Contar <dbl>
#
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df<- Datos_Cluster_Lina_27_07_20 %>%
select(G, ends_with("D"), Contar) %>%
mutate(., G=factor(G)) %>%
subset(., subset = (Contar==33), -Contar); df
## # A tibble: 103 × 12
## G R.D PPT.D NT.D PST.D IC.D Rpa.D praiz.D ppa.D CE.D
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 62.8 3.86 3.17 53.0 -0.0869 -0.154 -0.0912 0.178 207.
## 2 5 25.5 1.37 3.6 21.1 0.0293 -0.0799 -0.0298 0.000513 71.3
## 3 6 46.8 13.6 -2.53 13.2 0.183 -0.0162 -0.0895 -0.0936 58.9
## 4 7 51.2 10.2 -0.167 16.3 0.208 -0.0495 -0.0723 -0.136 107.
## 5 8 23.2 4.94 -2.5 3.28 0.124 -0.0285 -0.0975 -0.0265 563.
## 6 9 43.6 4.24 1.5 18.6 0.0884 -0.0382 -0.0634 -0.0250 241.
## 7 11 25.2 3.86 2.58 12.9 0.0146 -0.261 -0.102 0.0870 537.
## 8 13 47.6 8.42 0 21.5 0.0677 -0.0308 -0.0616 -0.00611 747.
## 9 14 47.7 0.386 5.67 21.2 0.185 -0.0591 -0.0907 -0.0941 670.
## 10 15 45.5 20.0 -5 15.3 0.0908 -0.0253 -0.0745 -0.0163 764.
## # ℹ 93 more rows
## # ℹ 2 more variables: MDA.D <dbl>, CRA.D <dbl>
head(df)
## # A tibble: 6 × 12
## G R.D PPT.D NT.D PST.D IC.D Rpa.D praiz.D ppa.D CE.D MDA.D
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 62.8 3.86 3.17 53.0 -0.0869 -0.154 -0.0912 0.178 207. -1.13
## 2 5 25.5 1.37 3.6 21.1 0.0293 -0.0799 -0.0298 0.000513 71.3 -0.867
## 3 6 46.8 13.6 -2.53 13.2 0.183 -0.0162 -0.0895 -0.0936 58.9 -0.567
## 4 7 51.2 10.2 -0.167 16.3 0.208 -0.0495 -0.0723 -0.136 107. -1.2
## 5 8 23.2 4.94 -2.5 3.28 0.124 -0.0285 -0.0975 -0.0265 563. 2.03
## 6 9 43.6 4.24 1.5 18.6 0.0884 -0.0382 -0.0634 -0.0250 241. 0.133
## # ℹ 1 more variable: CRA.D <dbl>
Correlaciones
library(corrplot)
## corrplot 0.95 loaded
colnames(df) <- c("G","TP","ATW","TN","TDW","HI","SRR","RWP","SWP","SC","MDA","RWC")
A=cor(df[2:12], method= "spearman")
#
corrplot.mixed(A, lower.col= "black", number.cex= 0.8, tl.pos="l")
ACP
efecto <- df[,2:12]
round(var(efecto), digits = 2) # Varianza, vale la pena estandarizar
## TP ATW TN TDW HI SRR RWP SWP SC MDA RWC
## TP 647.68 79.06 19.10 199.95 1.37 -0.04 -0.45 -0.92 449.73 -2.55 60.23
## ATW 79.06 37.60 -8.40 16.16 0.20 0.01 -0.07 -0.13 44.93 -2.28 9.12
## TN 19.10 -8.40 9.01 10.53 0.03 -0.02 -0.01 -0.02 42.40 1.79 -0.92
## TDW 199.95 16.16 10.53 106.67 0.09 -0.27 -0.17 0.08 190.79 5.58 10.55
## HI 1.37 0.20 0.03 0.09 0.01 0.00 0.00 -0.01 1.66 -0.03 0.13
## SRR -0.04 0.01 -0.02 -0.27 0.00 0.00 0.00 0.00 -0.23 -0.04 0.15
## RWP -0.45 -0.07 -0.01 -0.17 0.00 0.00 0.00 0.00 -1.31 0.00 -0.01
## SWP -0.92 -0.13 -0.02 0.08 -0.01 0.00 0.00 0.01 -0.35 0.03 -0.12
## SC 449.73 44.93 42.40 190.79 1.66 -0.23 -1.31 -0.35 43483.61 6.65 360.43
## MDA -2.55 -2.28 1.79 5.58 -0.03 -0.04 0.00 0.03 6.65 24.08 -8.46
## RWC 60.23 9.12 -0.92 10.55 0.13 0.15 -0.01 -0.12 360.43 -8.46 112.16
#
set.seed(2020)
ef.est=scale(efecto) # Estandarizar variables
#
PCA= prcomp(ef.est) # Analisis componentes principales
head(PCA$x) # Estos son los PC... t?ntos como variables originales
## PC1 PC2 PC3 PC4 PC5 PC6
## [1,] 0.44802370 5.66599513 0.25865732 -0.2151886 -1.51411320 -0.70581056
## [2,] 1.23104683 1.33294374 -1.05739546 -0.9915639 -1.14267476 -0.63195162
## [3,] -1.35953826 -0.23498402 0.98389993 -2.8433584 0.07172088 0.36617967
## [4,] -1.73338187 -0.15604615 0.03129812 -1.9843408 -0.31337900 -0.01407585
## [5,] -0.07876364 -0.08018773 0.56865022 0.1290434 1.81821009 0.06786737
## [6,] -0.12824478 0.87265989 -0.50941765 -0.8795538 -0.15021034 -0.36056064
## PC7 PC8 PC9 PC10 PC11
## [1,] 0.6571701 -1.2520063 0.02489586 0.39050553 -7.105558e-07
## [2,] 0.1136637 0.4314264 0.11712211 0.45415407 -5.031463e-06
## [3,] -0.4596243 -0.2793017 0.16931327 0.14761574 -4.910991e-06
## [4,] -0.4835438 0.6255011 -0.23419004 0.34951069 -3.980300e-06
## [5,] -1.6335431 -0.1559153 -0.10408740 -0.17743287 5.126780e-07
## [6,] -0.2223714 -0.2116869 0.03444580 0.01717298 -3.138598e-06
# Variabilidad explicada
summary(PCA) # 4 componentes explican el 74%
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.8034 1.5107 1.2011 1.0693 0.97651 0.92632 0.71091
## Proportion of Variance 0.2957 0.2075 0.1312 0.1039 0.08669 0.07801 0.04594
## Cumulative Proportion 0.2957 0.5032 0.6343 0.7383 0.82495 0.90295 0.94890
## PC8 PC9 PC10 PC11
## Standard deviation 0.56761 0.44586 0.20288 8.74e-06
## Proportion of Variance 0.02929 0.01807 0.00374 0.00e+00
## Cumulative Proportion 0.97819 0.99626 1.00000 1.00e+00
plot(PCA, ylim=c(0,3))
# Biplot
biplot(PCA, scale = 0, cex=0.3, col=c("black","red"),
arrow.len=0.1,var.axes=TRUE,expand=1)
Cluster jerarquico
Datos_Cluster_Lina_27_07_20 <- read_excel("Datos Cluster Lina 27.07.20.xlsx",
sheet = "Resumen")
head(Datos_Cluster_Lina_27_07_20)
## # A tibble: 6 × 35
## G R.C R.E R.D PPT.C PPT.E PPT.D NT.C NT.E NT.D PST.C PST.E PST.D
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 NA 23.8 -23.8 NA 5.77 -5.77 NA 5.17 -5.17 NA 36.5 -36.5
## 2 3 NA 34.3 -34.3 NA 5.12 -5.12 NA 6.25 -6.25 NA 38.6 -38.6
## 3 4 112. 48.7 62.8 11.6 7.75 3.86 10.8 7.67 3.17 79.2 26.2 53.0
## 4 5 53.8 28.3 25.5 6.75 5.39 1.37 9 5.4 3.6 49.3 28.3 21.1
## 5 6 84.0 37.2 46.8 17.7 4.10 13.6 5.67 8.2 -2.53 51.8 38.6 13.2
## 6 7 77.2 26 51.2 14.6 4.33 10.2 5.83 6 -0.167 40.1 23.8 16.3
## # ℹ 22 more variables: IC.C <dbl>, IC.E <dbl>, IC.D <dbl>, Rpa.C <dbl>,
## # Rpa.E <dbl>, Rpa.D <dbl>, praiz.C <dbl>, praiz.E <dbl>, praiz.D <dbl>,
## # ppa.C <dbl>, ppa.E <dbl>, ppa.D <dbl>, CE.C <dbl>, CE.E <dbl>, CE.D <dbl>,
## # MDA.C <dbl>, MDA.E <dbl>, MDA.D <dbl>, CRA.C <dbl>, CRA.E <dbl>,
## # CRA.D <dbl>, Contar <dbl>
df<- Datos_Cluster_Lina_27_07_20 %>%
select(G, ends_with("D"), Contar) %>%
mutate(., G=factor(G)) %>%
subset(., subset = (Contar==33), -Contar); df
## # A tibble: 103 × 12
## G R.D PPT.D NT.D PST.D IC.D Rpa.D praiz.D ppa.D CE.D
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 62.8 3.86 3.17 53.0 -0.0869 -0.154 -0.0912 0.178 207.
## 2 5 25.5 1.37 3.6 21.1 0.0293 -0.0799 -0.0298 0.000513 71.3
## 3 6 46.8 13.6 -2.53 13.2 0.183 -0.0162 -0.0895 -0.0936 58.9
## 4 7 51.2 10.2 -0.167 16.3 0.208 -0.0495 -0.0723 -0.136 107.
## 5 8 23.2 4.94 -2.5 3.28 0.124 -0.0285 -0.0975 -0.0265 563.
## 6 9 43.6 4.24 1.5 18.6 0.0884 -0.0382 -0.0634 -0.0250 241.
## 7 11 25.2 3.86 2.58 12.9 0.0146 -0.261 -0.102 0.0870 537.
## 8 13 47.6 8.42 0 21.5 0.0677 -0.0308 -0.0616 -0.00611 747.
## 9 14 47.7 0.386 5.67 21.2 0.185 -0.0591 -0.0907 -0.0941 670.
## 10 15 45.5 20.0 -5 15.3 0.0908 -0.0253 -0.0745 -0.0163 764.
## # ℹ 93 more rows
## # ℹ 2 more variables: MDA.D <dbl>, CRA.D <dbl>
head(df)
## # A tibble: 6 × 12
## G R.D PPT.D NT.D PST.D IC.D Rpa.D praiz.D ppa.D CE.D MDA.D
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 62.8 3.86 3.17 53.0 -0.0869 -0.154 -0.0912 0.178 207. -1.13
## 2 5 25.5 1.37 3.6 21.1 0.0293 -0.0799 -0.0298 0.000513 71.3 -0.867
## 3 6 46.8 13.6 -2.53 13.2 0.183 -0.0162 -0.0895 -0.0936 58.9 -0.567
## 4 7 51.2 10.2 -0.167 16.3 0.208 -0.0495 -0.0723 -0.136 107. -1.2
## 5 8 23.2 4.94 -2.5 3.28 0.124 -0.0285 -0.0975 -0.0265 563. 2.03
## 6 9 43.6 4.24 1.5 18.6 0.0884 -0.0382 -0.0634 -0.0250 241. 0.133
## # ℹ 1 more variable: CRA.D <dbl>
# Solo variables de crecimiento
# Se reviso previamente cuales tenian mas correlaciones y
# Generaban dos componentes que explicaran mas del 79% de la variabilidad
crec <- data.frame(scale(df[,c(2,3,5,6,7,8,9)]))
rownames(crec) <- df$G
head(crec)
## R.D PPT.D PST.D IC.D Rpa.D praiz.D
## 4 1.0250050 -0.24327237 3.84669434 -2.093177114 -2.20276365 -1.2136344
## 5 -0.4417532 -0.64929163 0.75742486 -0.706591116 -0.97353050 0.6844583
## 6 0.3953933 1.35179707 -0.00462943 1.130161364 0.09199261 -1.1586914
## 7 0.5678915 0.79476157 0.29712229 1.427419100 -0.46578760 -0.6273412
## 8 -0.5316696 -0.06732269 -0.96482618 0.424406201 -0.11411577 -1.4074900
## 9 0.2692614 -0.18061242 0.51936157 -0.000247352 -0.27698683 -0.3534064
## ppa.D
## 4 2.8634114
## 5 0.4938235
## 6 -0.7622287
## 7 -1.3239912
## 8 0.1339176
## 9 0.1529800
#
A=cor(crec, method= "spearman");A
## R.D PPT.D PST.D IC.D Rpa.D praiz.D
## R.D 1.0000000 0.53886790 0.74498089 0.6144621 -0.20616790 -0.48761147
## PPT.D 0.5388679 1.00000000 0.28105917 0.3981132 -0.05478188 -0.34207925
## PST.D 0.7449809 0.28105917 1.00000000 0.1208101 -0.62979396 -0.42615209
## IC.D 0.6144621 0.39811317 0.12081009 1.0000000 0.30759346 -0.42885384
## Rpa.D -0.2061679 -0.05478188 -0.62979396 0.3075935 1.00000000 0.36262795
## praiz.D -0.4876115 -0.34207925 -0.42615209 -0.4288538 0.36262795 1.00000000
## ppa.D -0.4925098 -0.29672056 0.03536441 -0.9201226 -0.47687036 0.08261213
## ppa.D
## R.D -0.49250977
## PPT.D -0.29672056
## PST.D 0.03536441
## IC.D -0.92012257
## Rpa.D -0.47687036
## praiz.D 0.08261213
## ppa.D 1.00000000
corrplot.mixed(A, lower.col= "black", number.cex= 0.9, sig.level= 0.05, insig= "n")
Seleccion del numero de grupos
library(factoextra)
# Grafico de codo
fviz_nbclust(x=crec, FUNcluster=kmeans, method="wss", k.max=15,
diss=get_dist(crec, method="euclidean"), nstart=50)+
labs(title=NULL)+
xlab(label = "Number of clusters (k)")+
ylab(label = "Total within sum of squares")
Selección del numero de grupos
set.seed(1234)
### Sel 4 grupos
km_clusters<- kmeans(x=crec, centers=4, nstart = 50)
#
grafico_cluster <- fviz_cluster(
object = km_clusters,
data = crec,
show.clust.cent = FALSE,
geom = c("text"),
ellipse.type = "t",
ellipse.alpha = 0.05,
labelsize = 10,
star.plot = TRUE,
segment.size = 0.4,
repel = TRUE,
pointsize = 0.5,
outlier.color = "darkred"
)
#
grafico_cluster +
labs(title=NULL)+
xlab("Principal component 1 (45%)")+
ylab("Principal component 2 (31%)")+
theme_bw()+
scale_colour_manual(
values = c("#edae49","#637029", "#00798c","#d1495b"),
labels = c( "Moderately\nsusceptible", "Tolerant", "Moderately\ntolerant", "Susceptible"))+
scale_fill_manual(
values = c("#edae49","#637029", "#00798c","#d1495b"),
labels = c( "Moderately\nsusceptible", "Tolerant", "Moderately\ntolerant", "Susceptible"))+
theme(
legend.position = "top",
legend.title = element_blank(),
legend.text = element_text(size = 10),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12)
)+
guides(
colour = guide_legend(override.aes = list(label = "")),
fill = guide_legend(override.aes = list(label = ""))
)
## Warning: Duplicated `override.aes` is ignored.
df2<- Datos_Cluster_Lina_27_07_20 %>%
select(G, ends_with("C"), ends_with("E"), ends_with("D"), Contar) %>%
mutate(., G=factor(G)) %>%
subset(., subset = (Contar==33), -Contar); df
## # A tibble: 103 × 12
## G R.D PPT.D NT.D PST.D IC.D Rpa.D praiz.D ppa.D CE.D
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 62.8 3.86 3.17 53.0 -0.0869 -0.154 -0.0912 0.178 207.
## 2 5 25.5 1.37 3.6 21.1 0.0293 -0.0799 -0.0298 0.000513 71.3
## 3 6 46.8 13.6 -2.53 13.2 0.183 -0.0162 -0.0895 -0.0936 58.9
## 4 7 51.2 10.2 -0.167 16.3 0.208 -0.0495 -0.0723 -0.136 107.
## 5 8 23.2 4.94 -2.5 3.28 0.124 -0.0285 -0.0975 -0.0265 563.
## 6 9 43.6 4.24 1.5 18.6 0.0884 -0.0382 -0.0634 -0.0250 241.
## 7 11 25.2 3.86 2.58 12.9 0.0146 -0.261 -0.102 0.0870 537.
## 8 13 47.6 8.42 0 21.5 0.0677 -0.0308 -0.0616 -0.00611 747.
## 9 14 47.7 0.386 5.67 21.2 0.185 -0.0591 -0.0907 -0.0941 670.
## 10 15 45.5 20.0 -5 15.3 0.0908 -0.0253 -0.0745 -0.0163 764.
## # ℹ 93 more rows
## # ℹ 2 more variables: MDA.D <dbl>, CRA.D <dbl>
#
Resumen <- data.frame(df2,
kmeans= km_clusters$cluster)
#
tab_gen <- Resumen %>%
select(., ends_with("D"), kmeans, G) %>%
mutate(., Clasificacion= case_when(
kmeans==1 ~ "Moderately susceptible",
kmeans==2 ~ "Tolerant",
kmeans==3 ~ "Moderately tolerant",
kmeans==4 ~ "Susceptible") ) %>%
group_by(., kmeans, Clasificacion) %>%
summarise(.,
R_prom=mean(R.D),
TP_prom=mean(PPT.D),
Genotipos = toString(G)
)
## `summarise()` has grouped output by 'kmeans'. You can override using the
## `.groups` argument.
head(tab_gen)
## # A tibble: 4 × 5
## # Groups: kmeans [4]
## kmeans Clasificacion R_prom TP_prom Genotipos
## <int> <chr> <dbl> <dbl> <chr>
## 1 1 Moderately susceptible 45.9 6.41 4, 9, 11, 13, 17, 20, 23, 37, 43…
## 2 2 Tolerant 10.8 1.10 5, 27, 30, 32, 57, 59, 66, 69, 7…
## 3 3 Moderately tolerant 29.9 4.30 8, 21, 24, 35, 36, 40, 42, 47, 5…
## 4 4 Susceptible 67.0 10.5 6, 7, 14, 15, 16, 19, 31, 33, 38…