dados <- read_excel("dados.xlsx")
dados <- as.data.frame(dados)
rownames(dados) <- dados$CLONES
dados <- dados[-1]
names(dados) <- c("Leaf (E1)","Leaf (E2)", "Pod Lab.", "Pod Field")
dados %>% kable
| Leaf (E1) | Leaf (E2) | Pod Lab. | Pod Field | |
|---|---|---|---|---|
| AMAZON 15.15 | 1.6020600 | 1.1139434 | 0.8571114 | 1.723369 |
| AMAZON 2.1 | 1.0211893 | 1.0000000 | 1.5354184 | 1.900000 |
| APA 5 | 0.9542425 | 1.3222193 | 0.5068532 | 2.918904 |
| CAB 4 | 1.3222193 | 1.3891661 | 0.4960531 | 2.104757 |
| CAB 5003.23 | 1.3802112 | 1.4149733 | 1.3177060 | 1.466288 |
| CAB 5046.1403 | 1.2174839 | 1.4232459 | 0.9791707 | 6.892750 |
| CCN 51 | 1.6532125 | 1.0791812 | 0.2809029 | 5.754129 |
| CEPEC 1008 | 1.3521825 | 1.6483600 | 0.9891409 | 4.869292 |
| CEPEC 38 | 1.3979400 | 1.2174839 | 1.3666029 | 4.202380 |
| CEPEC 40 | 1.5740313 | 1.3222193 | 0.7961165 | 1.445683 |
| CEPEC 42 | 1.0413927 | 1.2041200 | 1.1048756 | 3.495712 |
| CEPEC 44 | 1.7817554 | 1.2900346 | 1.2825478 | 1.539480 |
| CEPEC 75 | 1.4623980 | 1.3710679 | 0.8729614 | 1.624808 |
| CEPEC 82 | 1.6901961 | 1.2787536 | 0.9510319 | 2.424871 |
| CEPEC 84 | 1.5118834 | 1.3617278 | 0.7590695 | 1.876166 |
| CEPEC 89 | 1.3010300 | 1.3222193 | 0.2587726 | 2.391652 |
| CEPEC 92 | 1.3010300 | 1.2671717 | 0.2639505 | 3.224903 |
| CEPEC 93 | 1.4623980 | 1.3617278 | 0.3156066 | 1.288410 |
| CHUAO 120 | 0.9777236 | 1.1461280 | 1.7819526 | 6.527634 |
| CJ 10 | 1.6020600 | 1.2671717 | 1.8245065 | 1.606238 |
| CSUL 4 | 1.2304489 | 1.2671717 | 0.9232208 | 2.922328 |
| CSUL 5 | 1.4313638 | 1.4623980 | 1.1350635 | 5.498182 |
| EET 272 | 1.3324385 | 1.2900346 | 0.6117698 | 1.926136 |
| EET 390 | 1.1760913 | 1.3979400 | 0.5764116 | 2.083267 |
| EET 392 | 1.3117539 | 1.4393327 | 0.5464359 | 4.435087 |
| EET 62 | 1.3117539 | 1.3324385 | 1.1572554 | 3.896152 |
| ICS 100 | 1.5563025 | 1.4623980 | 1.1430697 | 5.324472 |
| ICS 78 | 1.5051500 | 1.3617278 | 1.2239901 | 7.615773 |
| ICS 95 | 1.4771213 | 1.5682017 | 0.6780611 | 4.369211 |
| IMC 2 | 1.6020600 | 1.2430380 | 0.5063949 | 3.018278 |
| IMC 23 | 1.5563025 | 0.8750613 | 1.9398831 | 2.118962 |
| IMC 51 | 1.5682017 | 1.0413927 | 1.9315410 | 1.407125 |
| MA 12 | 1.5250448 | 1.3222193 | 0.7386612 | 2.670206 |
| MOCORONGO 1 | 1.0211893 | 1.2900346 | 1.2753404 | 2.875761 |
| MOCORONGO 2 | 1.1461280 | 1.3222193 | 0.7311415 | 2.600000 |
| MOQ 417 | 1.3891661 | 1.1461280 | 1.0109978 | 1.260952 |
| MOQ 647 | 1.2041200 | 0.7403627 | 0.7115607 | 1.268858 |
| NA 312 | 1.3424227 | 1.2787536 | 1.4783115 | 3.917908 |
| NA 33 | 1.5440680 | 1.5118834 | 1.5895142 | 2.090454 |
| OC 77 | 1.7242759 | 1.4232459 | 1.0502791 | 3.917908 |
| PA 120 | 1.4232459 | 0.9777236 | 0.9652000 | 1.228821 |
| PA 148 | 1.3710679 | 1.1760913 | 1.0276270 | 3.938274 |
| PA 15 | 1.3617278 | 1.4698220 | 1.2356085 | 4.701064 |
| PA 150 | 1.3324385 | 1.4623980 | 1.2646627 | 4.282523 |
| PA 169 | 0.9542425 | 1.4698220 | 0.5385116 | 2.477902 |
| PA 285 | 0.3979400 | 0.3979400 | 0.6545942 | 3.056141 |
| PA 294 | 1.2174839 | 0.9030900 | 0.4485199 | 2.477902 |
| PA 30 | 1.1613680 | 0.9777236 | 0.0000000 | 3.364521 |
| PA 44 | 1.3710679 | 0.9030900 | 0.7755202 | 2.100000 |
| PA 51 | 1.3324385 | 1.2041200 | 0.7822918 | 1.720465 |
| PA 70 | 1.3010300 | 0.9777236 | 1.0344874 | 2.095233 |
| PA 88 | 1.2787536 | 1.0000000 | 0.5565108 | 2.306513 |
| RB 31 | 1.3222193 | 1.1461280 | 0.6663900 | 2.170253 |
| RB 32 | 1.5563025 | 1.4232459 | 0.3983577 | 2.233831 |
| RB 33 | 1.2304489 | 1.4623980 | 0.6417879 | 2.034699 |
| RB 39 | 1.3324385 | 1.3617278 | 0.9754240 | 3.430743 |
| RIM 117 | 1.4471580 | 1.3979400 | 2.1415689 | 7.009280 |
| SCAVINA 6 | 0.7403627 | 0.7781513 | 0.0778750 | 3.424909 |
| SIAL 164 | 1.3010300 | 1.4065402 | 1.2432902 | 4.846648 |
| SIAL 505 | 1.2552725 | 1.4393327 | 1.5233441 | 7.345747 |
| SIAL 542 | 1.4913617 | 1.2900346 | 1.2504389 | 4.316248 |
| SIC 19 | 1.6283889 | 1.4065402 | 1.2911151 | 4.771792 |
| SIC 23 | 1.8195439 | 1.6857417 | 1.4537780 | 4.941660 |
| SIC 842 | 1.2552725 | 1.2671717 | 1.7287604 | 5.336666 |
| SIC 864 | 1.3710679 | 1.1903317 | 1.7068808 | 5.502727 |
| SIC 891 | 1.3710679 | 1.3710679 | 1.2539790 | 5.582114 |
| SPA 12 | 1.4393327 | 1.5250448 | 1.7871112 | 2.760435 |
| SPA 5 | 1.4149733 | 1.3710679 | 1.3093953 | 4.760252 |
| TSA 516 | 1.5440680 | 1.5051500 | 0.1945788 | 3.856164 |
| TSA 644 | 1.6074550 | 1.4393327 | 0.3985943 | 2.092845 |
| TSA 654 | 1.3324385 | 1.2552725 | 0.7623967 | 1.989975 |
| UF 36 | 1.4065402 | 1.2787536 | 1.9960110 | 1.311488 |
| UF 667 | 1.5250448 | 1.3010300 | 2.0585615 | 5.486347 |
df <- scale(dados)
df %>% kable()
| Leaf (E1) | Leaf (E2) | Pod Lab. | Pod Field | |
|---|---|---|---|---|
| AMAZON 15.15 | 1.0339809 | -0.7179530 | -0.2973635 | -0.9692307 |
| AMAZON 2.1 | -1.4723134 | -1.2381215 | 1.0323556 | -0.8629994 |
| APA 5 | -1.7611700 | 0.2328580 | -0.9839921 | -0.2502001 |
| CAB 4 | -0.1734533 | 0.5384801 | -1.0051639 | -0.7398527 |
| CAB 5003.23 | 0.0767657 | 0.6562941 | 0.6055630 | -1.1238469 |
| CAB 5046.1403 | -0.6253570 | 0.6940595 | -0.0580844 | 2.1397897 |
| CCN 51 | 1.2546897 | -0.8766472 | -1.4269337 | 1.4549891 |
| CEPEC 1008 | -0.0441704 | 1.7217393 | -0.0385395 | 0.9228214 |
| CEPEC 38 | 0.1532604 | -0.2452746 | 0.7014181 | 0.5217210 |
| CEPEC 40 | 0.9130448 | 0.2328580 | -0.4169349 | -1.1362391 |
| CEPEC 42 | -1.3851414 | -0.3062831 | 0.1883411 | 0.0967094 |
| CEPEC 44 | 1.8093162 | 0.0859300 | 0.5366408 | -1.0798266 |
| CEPEC 75 | 0.4313785 | 0.4558590 | -0.2662919 | -1.0285083 |
| CEPEC 82 | 1.4142635 | 0.0344305 | -0.1132464 | -0.5473262 |
| CEPEC 84 | 0.6448940 | 0.4132204 | -0.4895599 | -0.8773337 |
| CEPEC 89 | -0.2648792 | 0.2328580 | -1.4703169 | -0.5673051 |
| CEPEC 92 | -0.2648792 | -0.0184424 | -1.4601664 | -0.0661630 |
| CEPEC 93 | 0.4313785 | 0.4132204 | -1.3589023 | -1.2308280 |
| CHUAO 120 | -1.6598557 | -0.5710251 | 1.5156488 | 1.9201979 |
| CJ 10 | 1.0339809 | -0.0184424 | 1.5990695 | -1.0396767 |
| CSUL 4 | -0.5694167 | -0.0184424 | -0.1677659 | -0.2481408 |
| CSUL 5 | 0.2974745 | 0.8727948 | 0.2475199 | 1.3010546 |
| EET 272 | -0.1293605 | 0.0859300 | -0.7783188 | -0.8472804 |
| EET 390 | -0.8039548 | 0.5785344 | -0.8476333 | -0.7527773 |
| EET 392 | -0.2186087 | 0.7674982 | -0.9063960 | 0.6616780 |
| EET 62 | -0.2186087 | 0.2795100 | 0.2910238 | 0.3375461 |
| ICS 100 | 0.8365502 | 0.8727948 | 0.2632149 | 1.1965804 |
| ICS 78 | 0.6158414 | 0.4132204 | 0.4218472 | 2.5746376 |
| ICS 95 | 0.4949053 | 1.3558047 | -0.6483647 | 0.6220577 |
| IMC 2 | 1.0339809 | -0.1286163 | -0.9848905 | -0.1904337 |
| IMC 23 | 0.8365502 | -1.8084856 | 1.8252479 | -0.7313091 |
| IMC 51 | 0.8878920 | -1.0491577 | 1.8088944 | -1.1594293 |
| MA 12 | 0.7016820 | 0.2328580 | -0.5295673 | -0.3997745 |
| MOCORONGO 1 | -1.4723134 | 0.0859300 | 0.5225118 | -0.2761477 |
| MOCORONGO 2 | -0.9332377 | 0.2328580 | -0.5443085 | -0.4419984 |
| MOQ 417 | 0.1154034 | -0.5710251 | 0.0043078 | -1.2473420 |
| MOQ 647 | -0.6830187 | -2.4234048 | -0.5826938 | -1.2425872 |
| NA 312 | -0.0862813 | 0.0344305 | 0.9204060 | 0.3506306 |
| NA 33 | 0.7837619 | 1.0987029 | 1.1384022 | -0.7484543 |
| OC 77 | 1.5613082 | 0.6940595 | 0.0813128 | 0.3506306 |
| PA 120 | 0.2624481 | -1.3398166 | -0.0854719 | -1.2666668 |
| PA 148 | 0.0373146 | -0.4342385 | 0.0369068 | 0.3628794 |
| PA 15 | -0.0029850 | 0.9066865 | 0.4446232 | 0.8216441 |
| PA 150 | -0.1293605 | 0.8727948 | 0.5015796 | 0.5699210 |
| PA 169 | -1.7611700 | 0.9066865 | -0.9219304 | -0.5154316 |
| PA 285 | -4.1614595 | -3.9866158 | -0.6943680 | -0.1676614 |
| PA 294 | -0.6253570 | -1.6805303 | -1.0983457 | -0.5154316 |
| PA 30 | -0.8674815 | -1.3398166 | -1.9776016 | 0.0178072 |
| PA 44 | 0.0373146 | -1.6805303 | -0.4573107 | -0.7427134 |
| PA 51 | -0.1293605 | -0.3062831 | -0.4440361 | -0.9709771 |
| PA 70 | -0.2648792 | -1.3398166 | 0.0503555 | -0.7455806 |
| PA 88 | -0.3609956 | -1.2381215 | -0.8866458 | -0.6185106 |
| RB 31 | -0.1734533 | -0.5710251 | -0.6712442 | -0.7004609 |
| RB 32 | 0.8365502 | 0.6940595 | -1.1966812 | -0.6622236 |
| RB 33 | -0.5694167 | 0.8727948 | -0.7194729 | -0.7819874 |
| RB 39 | -0.1293605 | 0.4132204 | -0.0654294 | 0.0576355 |
| RIM 117 | 0.3656224 | 0.5785344 | 2.2206225 | 2.2098742 |
| SCAVINA 6 | -2.6840015 | -2.2508944 | -1.8249394 | 0.0541264 |
| SIAL 164 | -0.2648792 | 0.6177955 | 0.4596822 | 0.9092031 |
| SIAL 505 | -0.4623099 | 0.7674982 | 1.0086858 | 2.4122358 |
| SIAL 542 | 0.5563488 | 0.0859300 | 0.4736960 | 0.5902047 |
| SIC 19 | 1.1475829 | 0.6177955 | 0.5534357 | 0.8641823 |
| SIC 23 | 1.9723632 | 1.8923925 | 0.8723117 | 0.9663457 |
| SIC 842 | -0.4623099 | -0.0184424 | 1.4113735 | 1.2039141 |
| SIC 864 | 0.0373146 | -0.3692287 | 1.3684819 | 1.3037882 |
| SIC 891 | 0.0373146 | 0.4558590 | 0.4806359 | 1.3515343 |
| SPA 12 | 0.3318582 | 1.1587869 | 1.5257616 | -0.3455082 |
| SPA 5 | 0.2267545 | 0.4558590 | 0.5892712 | 0.8572418 |
| TSA 516 | 0.7837619 | 1.0679640 | -1.5961591 | 0.3134961 |
| TSA 644 | 1.0572590 | 0.7674982 | -1.1962174 | -0.7470167 |
| TSA 654 | -0.1293605 | -0.0727642 | -0.4830375 | -0.8088858 |
| UF 36 | 0.1903677 | 0.0344305 | 1.9352783 | -1.2169483 |
| UF 667 | 0.7016820 | 0.1361256 | 2.0578990 | 1.2939368 |
Número ótimo de clusters
# Silhueta média para kmeans
fviz_nbclust(df, kmeans, method = "silhouette")
# Estatística de lacunas
fviz_nbclust(df, kmeans, method = "gap_stat")
# Método Elbow para kmeans
fviz_nbclust(df, kmeans, method = "wss") + geom_vline(xintercept = 4, linetype = 2)
nbclust_out <- NbClust(
data = df,
distance = "euclidean",
min.nc = 2,
max.nc = 5,
method = "kmeans",
index = "alllong")
*** : The Hubert index is a graphical method of determining the number of clusters.
In the plot of Hubert index, we seek a significant knee that corresponds to a
significant increase of the value of the measure i.e the significant peak in Hubert
index second differences plot.
*** : The D index is a graphical method of determining the number of clusters.
In the plot of D index, we seek a significant knee (the significant peak in Dindex
second differences plot) that corresponds to a significant increase of the value of
the measure.
*******************************************************************
* Among all indices:
* 8 proposed 2 as the best number of clusters
* 4 proposed 3 as the best number of clusters
* 9 proposed 4 as the best number of clusters
* 6 proposed 5 as the best number of clusters
***** Conclusion *****
* According to the majority rule, the best number of clusters is 4
*******************************************************************
nbclust_out$All.index
KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman
2 2.2321 27.1506 16.8421 -1.9194 103.3947 15566957 3238.529 208.3329 2.8096
3 0.1505 24.8614 16.6925 -2.9517 156.0221 17033075 2179.479 168.3890 4.5956
4 39.9813 25.7181 8.2846 -2.9127 199.3300 16730983 1318.126 135.9660 5.3345
5 0.0205 23.3325 13.7369 -3.6410 267.6354 10255958 1063.210 121.3911 10.2345
Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky Ball
2 1.3824 0.2755 1.5232 0.2674 2.0616 -25.2324 -1.1971 0.3388 104.1665
3 1.7103 0.2965 1.5332 0.2378 1.6361 -13.2183 -0.9010 0.3616 56.1297
4 2.1182 0.2938 1.2623 0.2762 0.9983 0.0430 0.0038 0.3603 33.9915
5 2.3725 0.2867 1.3254 0.2491 3.1784 -19.1906 -1.4183 0.3363 24.2782
Ptbiserial Gap Frey McClain Gamma Gplus Tau Dunn Hubert
2 0.3378 -0.3797 0.4487 0.6825 0.4527 179.4901 296.9102 0.0651 0.0051
3 0.3711 -0.9354 -0.2269 1.4188 0.5047 142.9258 291.3295 0.0792 0.0056
4 0.4770 -1.2828 0.4890 1.4381 0.6777 87.3973 367.5171 0.0906 0.0077
5 0.4655 -1.3825 0.4144 1.8516 0.7118 67.6591 334.1613 0.1383 0.0093
SDindex Dindex SDbw
2 1.6888 1.4823 1.6102
3 1.5888 1.3645 2.2758
4 1.3727 1.2396 0.9037
5 1.5641 1.1691 0.7172
nbclust_out$Best.nc
KL CH Hartigan CCC Scott Marriot TrCovW
Number_clusters 4.0000 2.0000 4.0000 2.0000 5.0000 3 3.000
Value_Index 39.9813 27.1506 8.4079 -1.9194 68.3054 -1768210 1059.051
TraceW Friedman Rubin Cindex DB Silhouette Duda
Number_clusters 4.000 5.0 4.0000 2.0000 4.0000 4.0000 2.0000
Value_Index 17.848 4.9 -0.1535 0.2755 1.2623 0.2762 2.0616
PseudoT2 Beale Ratkowsky Ball PtBiserial Gap Frey
Number_clusters 2.0000 2.0000 3.0000 3.0000 4.000 2.0000 1
Value_Index -25.2324 -1.1971 0.3616 48.0368 0.477 -0.3797 NA
McClain Gamma Gplus Tau Dunn Hubert SDindex Dindex
Number_clusters 2.0000 5.0000 5.0000 4.0000 5.0000 0 4.0000 0
Value_Index 0.6825 0.7118 67.6591 367.5171 0.1383 0 1.3727 0
SDbw
Number_clusters 5.0000
Value_Index 0.7172
nbclust_out$All.CriticalValues
CritValue_Duda CritValue_PseudoT2 Fvalue_Beale CritValue_Gap
2 0.4590 57.7529 1 0.5685
3 0.4446 42.4750 1 0.3650
4 0.4195 35.9728 1 0.1197
5 0.1265 193.3684 1 0.3722
# create a dataframe of the optimal number of clusters
nbclust_plot <- data.frame(clusters = nbclust_out$Best.nc[1, ])
nbclust_plot
# select only indices which select between 2 and 5 clusters
nbclust_plot <- subset(nbclust_plot, clusters >= 2 & clusters <= 5)
# create plot
ggplot(nbclust_plot) +
aes(x = clusters) +
geom_histogram(bins = 30L, fill = "#0c4c8a") +
labs(x = "Number of clusters", y = "Frequency among all indices", title = "Optimal number of clusters") +
theme_minimal()
set.seed(123)
km.res=kmeans(df, 4, nstart = 25)
print(km.res)
K-means clustering with 4 clusters of sizes 12, 23, 29, 9
Cluster means:
Leaf (E1) Leaf (E2) Pod Lab. Pod Field
1 0.73138239 -0.2030254 0.8735903 -0.9562971
2 0.19796956 0.5115613 0.7299159 1.1725929
3 -0.07750679 0.2706292 -0.7183433 -0.3679490
4 -1.23135464 -1.9086502 -0.7154660 -0.5359501
Clustering vector:
AMAZON 15.15 AMAZON 2.1 APA 5 CAB 4 CAB 5003.23
1 4 3 3 1
CAB 5046.1403 CCN 51 CEPEC 1008 CEPEC 38 CEPEC 40
2 3 2 2 3
CEPEC 42 CEPEC 44 CEPEC 75 CEPEC 82 CEPEC 84
3 1 3 1 3
CEPEC 89 CEPEC 92 CEPEC 93 CHUAO 120 CJ 10
3 3 3 2 1
CSUL 4 CSUL 5 EET 272 EET 390 EET 392
3 2 3 3 3
EET 62 ICS 100 ICS 78 ICS 95 IMC 2
2 2 2 3 3
IMC 23 IMC 51 MA 12 MOCORONGO 1 MOCORONGO 2
1 1 3 3 3
MOQ 417 MOQ 647 NA 312 NA 33 OC 77
1 4 2 1 2
PA 120 PA 148 PA 15 PA 150 PA 169
1 3 2 2 3
PA 285 PA 294 PA 30 PA 44 PA 51
4 4 4 4 3
PA 70 PA 88 RB 31 RB 32 RB 33
4 4 3 3 3
RB 39 RIM 117 SCAVINA 6 SIAL 164 SIAL 505
3 2 4 2 2
SIAL 542 SIC 19 SIC 23 SIC 842 SIC 864
2 2 2 2 2
SIC 891 SPA 12 SPA 5 TSA 516 TSA 644
2 1 2 3 3
TSA 654 UF 36 UF 667
3 1 2
Within cluster sum of squares by cluster:
[1] 21.94401 38.32819 45.64750 29.42227
(between_SS / total_SS = 53.0 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss"
[6] "betweenss" "size" "iter" "ifault"
#aggregate(dados, by=list(cluster=km.res$cluster), mean)
aggregate(dados, by=list(cluster=nbclust_out$Best.partition), mean)
dd <- cbind(dados, cluster = km.res$cluster)
dd %>% kable("pandoc")
| Leaf (E1) | Leaf (E2) | Pod Lab. | Pod Field | cluster | |
|---|---|---|---|---|---|
| AMAZON 15.15 | 1.6020600 | 1.1139434 | 0.8571114 | 1.723369 | 1 |
| AMAZON 2.1 | 1.0211893 | 1.0000000 | 1.5354184 | 1.900000 | 4 |
| APA 5 | 0.9542425 | 1.3222193 | 0.5068532 | 2.918904 | 3 |
| CAB 4 | 1.3222193 | 1.3891661 | 0.4960531 | 2.104757 | 3 |
| CAB 5003.23 | 1.3802112 | 1.4149733 | 1.3177060 | 1.466288 | 1 |
| CAB 5046.1403 | 1.2174839 | 1.4232459 | 0.9791707 | 6.892750 | 2 |
| CCN 51 | 1.6532125 | 1.0791812 | 0.2809029 | 5.754129 | 3 |
| CEPEC 1008 | 1.3521825 | 1.6483600 | 0.9891409 | 4.869292 | 2 |
| CEPEC 38 | 1.3979400 | 1.2174839 | 1.3666029 | 4.202380 | 2 |
| CEPEC 40 | 1.5740313 | 1.3222193 | 0.7961165 | 1.445683 | 3 |
| CEPEC 42 | 1.0413927 | 1.2041200 | 1.1048756 | 3.495712 | 3 |
| CEPEC 44 | 1.7817554 | 1.2900346 | 1.2825478 | 1.539480 | 1 |
| CEPEC 75 | 1.4623980 | 1.3710679 | 0.8729614 | 1.624808 | 3 |
| CEPEC 82 | 1.6901961 | 1.2787536 | 0.9510319 | 2.424871 | 1 |
| CEPEC 84 | 1.5118834 | 1.3617278 | 0.7590695 | 1.876166 | 3 |
| CEPEC 89 | 1.3010300 | 1.3222193 | 0.2587726 | 2.391652 | 3 |
| CEPEC 92 | 1.3010300 | 1.2671717 | 0.2639505 | 3.224903 | 3 |
| CEPEC 93 | 1.4623980 | 1.3617278 | 0.3156066 | 1.288410 | 3 |
| CHUAO 120 | 0.9777236 | 1.1461280 | 1.7819526 | 6.527634 | 2 |
| CJ 10 | 1.6020600 | 1.2671717 | 1.8245065 | 1.606238 | 1 |
| CSUL 4 | 1.2304489 | 1.2671717 | 0.9232208 | 2.922328 | 3 |
| CSUL 5 | 1.4313638 | 1.4623980 | 1.1350635 | 5.498182 | 2 |
| EET 272 | 1.3324385 | 1.2900346 | 0.6117698 | 1.926136 | 3 |
| EET 390 | 1.1760913 | 1.3979400 | 0.5764116 | 2.083267 | 3 |
| EET 392 | 1.3117539 | 1.4393327 | 0.5464359 | 4.435087 | 3 |
| EET 62 | 1.3117539 | 1.3324385 | 1.1572554 | 3.896152 | 2 |
| ICS 100 | 1.5563025 | 1.4623980 | 1.1430697 | 5.324472 | 2 |
| ICS 78 | 1.5051500 | 1.3617278 | 1.2239901 | 7.615773 | 2 |
| ICS 95 | 1.4771213 | 1.5682017 | 0.6780611 | 4.369211 | 3 |
| IMC 2 | 1.6020600 | 1.2430380 | 0.5063949 | 3.018278 | 3 |
| IMC 23 | 1.5563025 | 0.8750613 | 1.9398831 | 2.118962 | 1 |
| IMC 51 | 1.5682017 | 1.0413927 | 1.9315410 | 1.407125 | 1 |
| MA 12 | 1.5250448 | 1.3222193 | 0.7386612 | 2.670206 | 3 |
| MOCORONGO 1 | 1.0211893 | 1.2900346 | 1.2753404 | 2.875761 | 3 |
| MOCORONGO 2 | 1.1461280 | 1.3222193 | 0.7311415 | 2.600000 | 3 |
| MOQ 417 | 1.3891661 | 1.1461280 | 1.0109978 | 1.260952 | 1 |
| MOQ 647 | 1.2041200 | 0.7403627 | 0.7115607 | 1.268858 | 4 |
| NA 312 | 1.3424227 | 1.2787536 | 1.4783115 | 3.917908 | 2 |
| NA 33 | 1.5440680 | 1.5118834 | 1.5895142 | 2.090454 | 1 |
| OC 77 | 1.7242759 | 1.4232459 | 1.0502791 | 3.917908 | 2 |
| PA 120 | 1.4232459 | 0.9777236 | 0.9652000 | 1.228821 | 1 |
| PA 148 | 1.3710679 | 1.1760913 | 1.0276270 | 3.938274 | 3 |
| PA 15 | 1.3617278 | 1.4698220 | 1.2356085 | 4.701064 | 2 |
| PA 150 | 1.3324385 | 1.4623980 | 1.2646627 | 4.282523 | 2 |
| PA 169 | 0.9542425 | 1.4698220 | 0.5385116 | 2.477902 | 3 |
| PA 285 | 0.3979400 | 0.3979400 | 0.6545942 | 3.056141 | 4 |
| PA 294 | 1.2174839 | 0.9030900 | 0.4485199 | 2.477902 | 4 |
| PA 30 | 1.1613680 | 0.9777236 | 0.0000000 | 3.364521 | 4 |
| PA 44 | 1.3710679 | 0.9030900 | 0.7755202 | 2.100000 | 4 |
| PA 51 | 1.3324385 | 1.2041200 | 0.7822918 | 1.720465 | 3 |
| PA 70 | 1.3010300 | 0.9777236 | 1.0344874 | 2.095233 | 4 |
| PA 88 | 1.2787536 | 1.0000000 | 0.5565108 | 2.306513 | 4 |
| RB 31 | 1.3222193 | 1.1461280 | 0.6663900 | 2.170253 | 3 |
| RB 32 | 1.5563025 | 1.4232459 | 0.3983577 | 2.233831 | 3 |
| RB 33 | 1.2304489 | 1.4623980 | 0.6417879 | 2.034699 | 3 |
| RB 39 | 1.3324385 | 1.3617278 | 0.9754240 | 3.430743 | 3 |
| RIM 117 | 1.4471580 | 1.3979400 | 2.1415689 | 7.009280 | 2 |
| SCAVINA 6 | 0.7403627 | 0.7781513 | 0.0778750 | 3.424909 | 4 |
| SIAL 164 | 1.3010300 | 1.4065402 | 1.2432902 | 4.846648 | 2 |
| SIAL 505 | 1.2552725 | 1.4393327 | 1.5233441 | 7.345747 | 2 |
| SIAL 542 | 1.4913617 | 1.2900346 | 1.2504389 | 4.316248 | 2 |
| SIC 19 | 1.6283889 | 1.4065402 | 1.2911151 | 4.771792 | 2 |
| SIC 23 | 1.8195439 | 1.6857417 | 1.4537780 | 4.941660 | 2 |
| SIC 842 | 1.2552725 | 1.2671717 | 1.7287604 | 5.336666 | 2 |
| SIC 864 | 1.3710679 | 1.1903317 | 1.7068808 | 5.502727 | 2 |
| SIC 891 | 1.3710679 | 1.3710679 | 1.2539790 | 5.582114 | 2 |
| SPA 12 | 1.4393327 | 1.5250448 | 1.7871112 | 2.760435 | 1 |
| SPA 5 | 1.4149733 | 1.3710679 | 1.3093953 | 4.760252 | 2 |
| TSA 516 | 1.5440680 | 1.5051500 | 0.1945788 | 3.856164 | 3 |
| TSA 644 | 1.6074550 | 1.4393327 | 0.3985943 | 2.092845 | 3 |
| TSA 654 | 1.3324385 | 1.2552725 | 0.7623967 | 1.989975 | 3 |
| UF 36 | 1.4065402 | 1.2787536 | 1.9960110 | 1.311488 | 1 |
| UF 667 | 1.5250448 | 1.3010300 | 2.0585615 | 5.486347 | 2 |
km.res$cluster
AMAZON 15.15 AMAZON 2.1 APA 5 CAB 4 CAB 5003.23
1 4 3 3 1
CAB 5046.1403 CCN 51 CEPEC 1008 CEPEC 38 CEPEC 40
2 3 2 2 3
CEPEC 42 CEPEC 44 CEPEC 75 CEPEC 82 CEPEC 84
3 1 3 1 3
CEPEC 89 CEPEC 92 CEPEC 93 CHUAO 120 CJ 10
3 3 3 2 1
CSUL 4 CSUL 5 EET 272 EET 390 EET 392
3 2 3 3 3
EET 62 ICS 100 ICS 78 ICS 95 IMC 2
2 2 2 3 3
IMC 23 IMC 51 MA 12 MOCORONGO 1 MOCORONGO 2
1 1 3 3 3
MOQ 417 MOQ 647 NA 312 NA 33 OC 77
1 4 2 1 2
PA 120 PA 148 PA 15 PA 150 PA 169
1 3 2 2 3
PA 285 PA 294 PA 30 PA 44 PA 51
4 4 4 4 3
PA 70 PA 88 RB 31 RB 32 RB 33
4 4 3 3 3
RB 39 RIM 117 SCAVINA 6 SIAL 164 SIAL 505
3 2 4 2 2
SIAL 542 SIC 19 SIC 23 SIC 842 SIC 864
2 2 2 2 2
SIC 891 SPA 12 SPA 5 TSA 516 TSA 644
2 1 2 3 3
TSA 654 UF 36 UF 667
3 1 2
km.res$size
[1] 12 23 29 9
km.res$centers
Leaf (E1) Leaf (E2) Pod Lab. Pod Field
1 0.73138239 -0.2030254 0.8735903 -0.9562971
2 0.19796956 0.5115613 0.7299159 1.1725929
3 -0.07750679 0.2706292 -0.7183433 -0.3679490
4 -1.23135464 -1.9086502 -0.7154660 -0.5359501
a<-fviz_cluster(km.res, data=df,
axes = c(1, 2),
geom.ind = c("text"),
ellipse.type="euclid",
star.plot=TRUE,
palette = "Dark2",
repel=TRUE,
ggtheme=theme_minimal()
)
b<-fviz_cluster(km.res, data=df,
axes = c(1, 3),
geom.ind = c("text"),
ellipse.type="euclid",
star.plot=TRUE,
palette = "Dark2",
repel=TRUE,
ggtheme=theme_minimal()
)
ggarrange(a,b)
dista=dist(df, method="euclidean")
dista.hc=hclust(d=dista, method="ward.D")
fviz_dend(dista.hc, cex=0.5, k = 4, color_labels_by_k = TRUE, horiz = T)
km.res1 <- hkmeans(df, 4,hc.metric = "euclid" ,hc.method = "ward.D")
fviz_dend(km.res1, cex = 0.6, palette = "Dark2",
rect = TRUE, rect_border = "Dark2", rect_fill = TRUE, horiz = T)
fviz_dist(dista, gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))
km.pca <- PCA(
df,
graph = F,
scale.unit = TRUE)
eig.val <- get_eigenvalue(km.pca)
eig.val
eigenvalue variance.percent cumulative.variance.percent
Dim.1 1.7136448 42.841119 42.84112
Dim.2 1.0728453 26.821132 69.66225
Dim.3 0.8152177 20.380442 90.04269
Dim.4 0.3982923 9.957307 100.00000
fviz_eig(km.pca, addlabels=TRUE)
var <- get_pca_var(km.pca)
var
Principal Component Analysis Results for variables
===================================================
Name Description
1 "$coord" "Coordinates for the variables"
2 "$cor" "Correlations between variables and dimensions"
3 "$cos2" "Cos2 for the variables"
4 "$contrib" "contributions of the variables"
$coord
$cor
$cos2
$contrib
Coordinates for the variables
Correlations between variables and dimensions
Cos2 for the variables
contributions of the variables
# Coordenadas
head(var$coord)
Dim.1 Dim.2 Dim.3 Dim.4
Leaf (E1) 0.6763683 -0.5969645 0.2276115 0.3665409
Leaf (E2) 0.7804613 -0.2834038 -0.4044565 -0.3833762
Pod Lab. 0.5795067 0.4345923 0.6635507 -0.1870882
Pod Field 0.5578734 0.6687978 -0.3994071 0.2862880
# Cos2: qualidade no mapa do fator
head(var$cos2)
Dim.1 Dim.2 Dim.3 Dim.4
Leaf (E1) 0.4574741 0.35636667 0.05180698 0.13435225
Leaf (E2) 0.6091199 0.08031772 0.16358510 0.14697727
Pod Lab. 0.3358280 0.18887043 0.44029958 0.03500198
Pod Field 0.3112228 0.44729045 0.15952600 0.08196079
# Contribuições para os componentes principais
head(var$contrib)
Dim.1 Dim.2 Dim.3 Dim.4
Leaf (E1) 26.69597 33.216968 6.354988 33.732074
Leaf (E2) 35.54528 7.486421 20.066432 36.901862
Pod Lab. 19.59729 17.604629 54.010064 8.788013
Pod Field 18.16145 41.691982 19.568517 20.578051
fviz_cos2(km.pca, choice = "var", axes = 1:3)
df %>% cor(method = "spearman") %>% corrplot(.,
method = "number",
type = "upper",
tl.pos = "td")
summary(km.pca)
Call:
PCA(X = df, scale.unit = TRUE, graph = F)
Eigenvalues
Dim.1 Dim.2 Dim.3 Dim.4
Variance 1.714 1.073 0.815 0.398
% of var. 42.841 26.821 20.380 9.957
Cumulative % of var. 42.841 69.662 90.043 100.000
Individuals (the 10 first)
Dist Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3
AMAZON 15.15 | 1.627 | -0.442 0.156 0.074 | -1.158 1.712 0.506 | 0.798
AMAZON 2.1 | 2.364 | -1.419 1.611 0.361 | 1.071 1.464 0.205 | 1.333
APA 5 | 2.060 | -1.322 1.398 0.412 | 0.380 0.184 0.034 | -1.169
CAB 4 | 1.380 | -0.533 0.227 0.149 | -0.953 1.161 0.477 | -0.701
CAB 5003.23 | 1.447 | 0.222 0.039 0.023 | -0.700 0.626 0.234 | 0.672
CAB 5046.1403 | 2.352 | 0.984 0.773 0.175 | 1.538 3.022 0.428 | -1.468
CCN 51 | 2.566 | 0.115 0.011 0.002 | -0.143 0.026 0.003 | -0.990
CEPEC 1008 | 1.968 | 1.389 1.543 0.499 | 0.135 0.023 0.005 | -1.227
CEPEC 38 | 0.927 | 0.469 0.176 0.256 | 0.614 0.482 0.439 | 0.436
CEPEC 40 | 1.544 | -0.059 0.003 0.001 | -1.509 2.907 0.954 | 0.324
ctr cos2
AMAZON 15.15 1.070 0.240 |
AMAZON 2.1 2.986 0.318 |
APA 5 2.295 0.322 |
CAB 4 0.826 0.258 |
CAB 5003.23 0.759 0.216 |
CAB 5046.1403 3.621 0.390 |
CCN 51 1.647 0.149 |
CEPEC 1008 2.531 0.389 |
CEPEC 38 0.320 0.221 |
CEPEC 40 0.177 0.044 |
Variables
Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr
Leaf (E1) | 0.676 26.696 0.457 | -0.597 33.217 0.356 | 0.228 6.355
Leaf (E2) | 0.780 35.545 0.609 | -0.283 7.486 0.080 | -0.404 20.066
Pod Lab. | 0.580 19.597 0.336 | 0.435 17.605 0.189 | 0.664 54.010
Pod Field | 0.558 18.161 0.311 | 0.669 41.692 0.447 | -0.399 19.569
cos2
Leaf (E1) 0.052 |
Leaf (E2) 0.164 |
Pod Lab. 0.440 |
Pod Field 0.160 |
# Contribuições de variáveis para PC1
fviz_contrib(km.pca, choice = "var", axes = 1)
# Contribuições de variáveis para PC2
fviz_contrib(km.pca, choice = "var", axes = 2)
# Contribuições de variáveis para PC3
fviz_contrib(km.pca, choice = "var", axes = 3)
# contribuição total para PC1 e PC2
fviz_contrib(km.pca, choice = "var", axes = 1:3)
a<-fviz_pca_biplot(
km.pca,
axes = c(1, 2),
geom.ind = "text",
col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
legend.title = "Contribuição",
palette = "Dark2",
repel = F
)
b<-fviz_pca_biplot(
km.pca,
axes = c(1, 3),
geom.ind = "text",
col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
legend.title = "Contribuição",
palette = "Dark2",
repel = F
)
ggarrange(a,b)
a<-fviz_pca_ind(
km.pca,
axes = c(1, 2),
geom = "text",
habillage = as.factor(dd$cluster),
addEllipses = TRUE,
palette = "Dark2"
)
b<-fviz_pca_ind(
km.pca,
axes = c(1, 3),
geom = "text",
habillage = as.factor(dd$cluster),
addEllipses = TRUE,
palette = "Dark2"
)
ggarrange(a,b)
a<-fviz_pca_ind(km.pca,
axes = c(1, 2),
geom.ind = "text",
col.ind = as.factor(dd$cluster),
addEllipses = TRUE,
legend.title = "Grupos",
repel = T,
palette = "Dark2"
)
b<-fviz_pca_ind(km.pca,
axes = c(1, 3),
geom.ind = "text",
col.ind = as.factor(dd$cluster),
addEllipses = TRUE,
legend.title = "Grupos",
repel = T,
palette = "Dark2"
)
ggarrange(a,b)
df %>% pairs.panels(.,
show.points=TRUE,
method = "spearman",
gap=0,
stars=TRUE,
ci=FALSE,
alpha=0.05,
cex.cor=1,
cex=1.0,
breaks="Sturges",
rug=FALSE,
density=F,
hist.col="darkgreen",
factor=5,
digits=2,
ellipses=FALSE,
scale=FALSE,
smooth=TRUE,
lm=T,
cor=T
)
# dd.pca = prcomp(df, scale = T)
#
#
# ggbiplot2(
# dd.pca,
# obs.scale = 1,
# var.scale = 1,
# ellipse = T,
# circle = T,
# varname.abbrev = T,
# grupos = as.factor(dd$cluster)
# ) + theme_minimal() + scale_color_brewer( palette = 'Dark2')
ind <- get_pca_ind(km.pca)
ind
Principal Component Analysis Results for individuals
===================================================
Name Description
1 "$coord" "Coordinates for the individuals"
2 "$cos2" "Cos2 for the individuals"
3 "$contrib" "contributions of the individuals"
$coord
$cos2
$contrib
Coordinates for the individuals
Cos2 for the individuals
contributions of the individuals
# Coordenadas de indivíduos
head(ind$coord)
Dim.1 Dim.2 Dim.3 Dim.4
AMAZON 15.15 -0.4415276 -1.1580371 0.7979677 0.6898845
AMAZON 2.1 -1.4194051 1.0706016 1.3330820 -0.8060493
APA 5 -1.3224511 0.3795163 -1.1687899 -0.9929521
CAB 4 -0.5325107 -0.9533828 -0.7011874 -0.4687149
CAB 5003.23 0.2216024 -0.7002055 0.6721670 -1.0506420
CAB 5046.1403 0.9836323 1.5383653 -1.4678948 0.2044751
# Qualidade dos indivíduos
head(ind$cos2)
Dim.1 Dim.2 Dim.3 Dim.4
AMAZON 15.15 0.07360117 0.50630689 0.2404028 0.179689106
AMAZON 2.1 0.36056034 0.20512616 0.3180379 0.116275558
APA 5 0.41199163 0.03393048 0.3218118 0.232266039
CAB 4 0.14894325 0.47741794 0.2582452 0.115393588
CAB 5003.23 0.02343981 0.23402171 0.2156550 0.526883515
CAB 5046.1403 0.17494089 0.42790238 0.3895970 0.007559737
# Contribuições de indivíduos
head(ind$contrib)
Dim.1 Dim.2 Dim.3 Dim.4
AMAZON 15.15 0.15583755 1.7123201 1.0699764 1.6369223
AMAZON 2.1 1.61053089 1.4635106 2.9861888 2.2345930
APA 5 1.39802705 0.1839081 2.2954954 3.3910313
CAB 4 0.22667991 1.1605788 0.8261743 0.7556014
CAB 5003.23 0.03925593 0.6260242 0.7592031 3.7965121
CAB 5046.1403 0.77343166 3.0217534 3.6207074 0.1437991
a<-fviz_pca_ind(km.pca, col.ind = "cos2",
axes = c(1, 2),
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)
b<-fviz_pca_ind(km.pca, col.ind = "cos2",
axes = c(1, 3),
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)
ggarrange(a,b)
fviz_contrib(km.pca, choice = "ind", axes = 1:3)
set.seed(123)
my.cont.var <- rnorm(73)
a<-fviz_pca_ind(km.pca, col.ind = my.cont.var,
axes = c(1, 2),
gradient.cols = c("blue", "yellow", "red"),
legend.title = "Cont.Var")
b<-fviz_pca_ind(km.pca, col.ind = my.cont.var,
axes = c(1, 3),
gradient.cols = c("blue", "yellow", "red"),
legend.title = "Cont.Var")
ggarrange(a,b)
a<-fviz_pca_ind(km.pca,
geom.ind = "point",
axes = c(1, 2),
col.ind = as.factor(dd$cluster),
palette = "Dark2",
addEllipses = TRUE,
legend.title = "Grupos"
)
b<-fviz_pca_ind(km.pca,
geom.ind = "point",
axes = c(1, 3),
col.ind = as.factor(dd$cluster),
palette = "Dark2",
addEllipses = TRUE,
legend.title = "Grupos"
)
ggarrange(a,b)
a<-fviz_pca_biplot(km.pca,
# geom.ind = c("point","text"),
axes = c(1, 2),
fill.ind = as.factor(dd$cluster),
col.ind = as.factor(dd$cluster),
pointshape = 21,
pointsize = 2,
palette = "Dark2",
#labelsize = 3,
addEllipses = TRUE,
col.var = "black",
legend.title = list(fill = "Cluster",
color = "Cluster")
)
b<-fviz_pca_biplot(km.pca,
geom.ind = c("point","text"),
axes = c(1, 3),
fill.ind = as.factor(dd$cluster),
col.ind = as.factor(dd$cluster),
pointshape = 21,
pointsize = 2,
palette = "Dark2",
#labelsize = 3,
addEllipses = TRUE,
col.var = "black",
legend.title = list(fill = "Cluster",
color = "Cluster")
)
ggarrange(a,b)
dados[1:2] %>% kable
| Leaf (E1) | Leaf (E2) | |
|---|---|---|
| AMAZON 15.15 | 1.6020600 | 1.1139434 |
| AMAZON 2.1 | 1.0211893 | 1.0000000 |
| APA 5 | 0.9542425 | 1.3222193 |
| CAB 4 | 1.3222193 | 1.3891661 |
| CAB 5003.23 | 1.3802112 | 1.4149733 |
| CAB 5046.1403 | 1.2174839 | 1.4232459 |
| CCN 51 | 1.6532125 | 1.0791812 |
| CEPEC 1008 | 1.3521825 | 1.6483600 |
| CEPEC 38 | 1.3979400 | 1.2174839 |
| CEPEC 40 | 1.5740313 | 1.3222193 |
| CEPEC 42 | 1.0413927 | 1.2041200 |
| CEPEC 44 | 1.7817554 | 1.2900346 |
| CEPEC 75 | 1.4623980 | 1.3710679 |
| CEPEC 82 | 1.6901961 | 1.2787536 |
| CEPEC 84 | 1.5118834 | 1.3617278 |
| CEPEC 89 | 1.3010300 | 1.3222193 |
| CEPEC 92 | 1.3010300 | 1.2671717 |
| CEPEC 93 | 1.4623980 | 1.3617278 |
| CHUAO 120 | 0.9777236 | 1.1461280 |
| CJ 10 | 1.6020600 | 1.2671717 |
| CSUL 4 | 1.2304489 | 1.2671717 |
| CSUL 5 | 1.4313638 | 1.4623980 |
| EET 272 | 1.3324385 | 1.2900346 |
| EET 390 | 1.1760913 | 1.3979400 |
| EET 392 | 1.3117539 | 1.4393327 |
| EET 62 | 1.3117539 | 1.3324385 |
| ICS 100 | 1.5563025 | 1.4623980 |
| ICS 78 | 1.5051500 | 1.3617278 |
| ICS 95 | 1.4771213 | 1.5682017 |
| IMC 2 | 1.6020600 | 1.2430380 |
| IMC 23 | 1.5563025 | 0.8750613 |
| IMC 51 | 1.5682017 | 1.0413927 |
| MA 12 | 1.5250448 | 1.3222193 |
| MOCORONGO 1 | 1.0211893 | 1.2900346 |
| MOCORONGO 2 | 1.1461280 | 1.3222193 |
| MOQ 417 | 1.3891661 | 1.1461280 |
| MOQ 647 | 1.2041200 | 0.7403627 |
| NA 312 | 1.3424227 | 1.2787536 |
| NA 33 | 1.5440680 | 1.5118834 |
| OC 77 | 1.7242759 | 1.4232459 |
| PA 120 | 1.4232459 | 0.9777236 |
| PA 148 | 1.3710679 | 1.1760913 |
| PA 15 | 1.3617278 | 1.4698220 |
| PA 150 | 1.3324385 | 1.4623980 |
| PA 169 | 0.9542425 | 1.4698220 |
| PA 285 | 0.3979400 | 0.3979400 |
| PA 294 | 1.2174839 | 0.9030900 |
| PA 30 | 1.1613680 | 0.9777236 |
| PA 44 | 1.3710679 | 0.9030900 |
| PA 51 | 1.3324385 | 1.2041200 |
| PA 70 | 1.3010300 | 0.9777236 |
| PA 88 | 1.2787536 | 1.0000000 |
| RB 31 | 1.3222193 | 1.1461280 |
| RB 32 | 1.5563025 | 1.4232459 |
| RB 33 | 1.2304489 | 1.4623980 |
| RB 39 | 1.3324385 | 1.3617278 |
| RIM 117 | 1.4471580 | 1.3979400 |
| SCAVINA 6 | 0.7403627 | 0.7781513 |
| SIAL 164 | 1.3010300 | 1.4065402 |
| SIAL 505 | 1.2552725 | 1.4393327 |
| SIAL 542 | 1.4913617 | 1.2900346 |
| SIC 19 | 1.6283889 | 1.4065402 |
| SIC 23 | 1.8195439 | 1.6857417 |
| SIC 842 | 1.2552725 | 1.2671717 |
| SIC 864 | 1.3710679 | 1.1903317 |
| SIC 891 | 1.3710679 | 1.3710679 |
| SPA 12 | 1.4393327 | 1.5250448 |
| SPA 5 | 1.4149733 | 1.3710679 |
| TSA 516 | 1.5440680 | 1.5051500 |
| TSA 644 | 1.6074550 | 1.4393327 |
| TSA 654 | 1.3324385 | 1.2552725 |
| UF 36 | 1.4065402 | 1.2787536 |
| UF 667 | 1.5250448 | 1.3010300 |
Dimensionamento e padronização
df <- scale(dados[1:2])
df %>% kable()
| Leaf (E1) | Leaf (E2) | |
|---|---|---|
| AMAZON 15.15 | 1.0339809 | -0.7179530 |
| AMAZON 2.1 | -1.4723134 | -1.2381215 |
| APA 5 | -1.7611700 | 0.2328580 |
| CAB 4 | -0.1734533 | 0.5384801 |
| CAB 5003.23 | 0.0767657 | 0.6562941 |
| CAB 5046.1403 | -0.6253570 | 0.6940595 |
| CCN 51 | 1.2546897 | -0.8766472 |
| CEPEC 1008 | -0.0441704 | 1.7217393 |
| CEPEC 38 | 0.1532604 | -0.2452746 |
| CEPEC 40 | 0.9130448 | 0.2328580 |
| CEPEC 42 | -1.3851414 | -0.3062831 |
| CEPEC 44 | 1.8093162 | 0.0859300 |
| CEPEC 75 | 0.4313785 | 0.4558590 |
| CEPEC 82 | 1.4142635 | 0.0344305 |
| CEPEC 84 | 0.6448940 | 0.4132204 |
| CEPEC 89 | -0.2648792 | 0.2328580 |
| CEPEC 92 | -0.2648792 | -0.0184424 |
| CEPEC 93 | 0.4313785 | 0.4132204 |
| CHUAO 120 | -1.6598557 | -0.5710251 |
| CJ 10 | 1.0339809 | -0.0184424 |
| CSUL 4 | -0.5694167 | -0.0184424 |
| CSUL 5 | 0.2974745 | 0.8727948 |
| EET 272 | -0.1293605 | 0.0859300 |
| EET 390 | -0.8039548 | 0.5785344 |
| EET 392 | -0.2186087 | 0.7674982 |
| EET 62 | -0.2186087 | 0.2795100 |
| ICS 100 | 0.8365502 | 0.8727948 |
| ICS 78 | 0.6158414 | 0.4132204 |
| ICS 95 | 0.4949053 | 1.3558047 |
| IMC 2 | 1.0339809 | -0.1286163 |
| IMC 23 | 0.8365502 | -1.8084856 |
| IMC 51 | 0.8878920 | -1.0491577 |
| MA 12 | 0.7016820 | 0.2328580 |
| MOCORONGO 1 | -1.4723134 | 0.0859300 |
| MOCORONGO 2 | -0.9332377 | 0.2328580 |
| MOQ 417 | 0.1154034 | -0.5710251 |
| MOQ 647 | -0.6830187 | -2.4234048 |
| NA 312 | -0.0862813 | 0.0344305 |
| NA 33 | 0.7837619 | 1.0987029 |
| OC 77 | 1.5613082 | 0.6940595 |
| PA 120 | 0.2624481 | -1.3398166 |
| PA 148 | 0.0373146 | -0.4342385 |
| PA 15 | -0.0029850 | 0.9066865 |
| PA 150 | -0.1293605 | 0.8727948 |
| PA 169 | -1.7611700 | 0.9066865 |
| PA 285 | -4.1614595 | -3.9866158 |
| PA 294 | -0.6253570 | -1.6805303 |
| PA 30 | -0.8674815 | -1.3398166 |
| PA 44 | 0.0373146 | -1.6805303 |
| PA 51 | -0.1293605 | -0.3062831 |
| PA 70 | -0.2648792 | -1.3398166 |
| PA 88 | -0.3609956 | -1.2381215 |
| RB 31 | -0.1734533 | -0.5710251 |
| RB 32 | 0.8365502 | 0.6940595 |
| RB 33 | -0.5694167 | 0.8727948 |
| RB 39 | -0.1293605 | 0.4132204 |
| RIM 117 | 0.3656224 | 0.5785344 |
| SCAVINA 6 | -2.6840015 | -2.2508944 |
| SIAL 164 | -0.2648792 | 0.6177955 |
| SIAL 505 | -0.4623099 | 0.7674982 |
| SIAL 542 | 0.5563488 | 0.0859300 |
| SIC 19 | 1.1475829 | 0.6177955 |
| SIC 23 | 1.9723632 | 1.8923925 |
| SIC 842 | -0.4623099 | -0.0184424 |
| SIC 864 | 0.0373146 | -0.3692287 |
| SIC 891 | 0.0373146 | 0.4558590 |
| SPA 12 | 0.3318582 | 1.1587869 |
| SPA 5 | 0.2267545 | 0.4558590 |
| TSA 516 | 0.7837619 | 1.0679640 |
| TSA 644 | 1.0572590 | 0.7674982 |
| TSA 654 | -0.1293605 | -0.0727642 |
| UF 36 | 0.1903677 | 0.0344305 |
| UF 667 | 0.7016820 | 0.1361256 |
Número ótimo de clusters
# Silhueta média para kmeans
fviz_nbclust(df, kmeans, method = "silhouette")
# Estatística de lacunas
fviz_nbclust(df, kmeans, method = "gap_stat")
# Método Elbow para kmeans
fviz_nbclust(df, kmeans, method = "wss") +
geom_vline(xintercept = 4, linetype = 2)
nbclust_out <- NbClust(
data = df,
distance = "euclidean",
min.nc = 2,
max.nc = 6,
method = "kmeans"
)
*** : The Hubert index is a graphical method of determining the number of clusters.
In the plot of Hubert index, we seek a significant knee that corresponds to a
significant increase of the value of the measure i.e the significant peak in Hubert
index second differences plot.
*** : The D index is a graphical method of determining the number of clusters.
In the plot of D index, we seek a significant knee (the significant peak in Dindex
second differences plot) that corresponds to a significant increase of the value of
the measure.
*******************************************************************
* Among all indices:
* 10 proposed 2 as the best number of clusters
* 3 proposed 3 as the best number of clusters
* 6 proposed 4 as the best number of clusters
* 5 proposed 6 as the best number of clusters
***** Conclusion *****
* According to the majority rule, the best number of clusters is 2
*******************************************************************
# create a dataframe of the optimal number of clusters
nbclust_plot <- data.frame(clusters = nbclust_out$Best.nc[1, ])
# select only indices which select between 2 and 5 clusters
nbclust_plot <- subset(nbclust_plot, clusters >= 2 & clusters <= 5)
# create plot
ggplot(nbclust_plot) +
aes(x = clusters) +
geom_histogram(bins = 30L, fill = "#0c4c8a") +
labs(x = "Number of clusters", y = "Frequency among all indices", title = "Optimal number of clusters") +
theme_minimal()
set.seed(123)
km.res=kmeans(df, 2, nstart=25)
print(km.res)
K-means clustering with 2 clusters of sizes 58, 15
Cluster means:
Leaf (E1) Leaf (E2)
1 0.2803737 0.3600806
2 -1.0841116 -1.3923116
Clustering vector:
AMAZON 15.15 AMAZON 2.1 APA 5 CAB 4 CAB 5003.23
1 2 2 1 1
CAB 5046.1403 CCN 51 CEPEC 1008 CEPEC 38 CEPEC 40
1 1 1 1 1
CEPEC 42 CEPEC 44 CEPEC 75 CEPEC 82 CEPEC 84
2 1 1 1 1
CEPEC 89 CEPEC 92 CEPEC 93 CHUAO 120 CJ 10
1 1 1 2 1
CSUL 4 CSUL 5 EET 272 EET 390 EET 392
1 1 1 1 1
EET 62 ICS 100 ICS 78 ICS 95 IMC 2
1 1 1 1 1
IMC 23 IMC 51 MA 12 MOCORONGO 1 MOCORONGO 2
2 1 1 2 1
MOQ 417 MOQ 647 NA 312 NA 33 OC 77
1 2 1 1 1
PA 120 PA 148 PA 15 PA 150 PA 169
2 1 1 1 1
PA 285 PA 294 PA 30 PA 44 PA 51
2 2 2 2 1
PA 70 PA 88 RB 31 RB 32 RB 33
2 2 1 1 1
RB 39 RIM 117 SCAVINA 6 SIAL 164 SIAL 505
1 1 2 1 1
SIAL 542 SIC 19 SIC 23 SIC 842 SIC 864
1 1 1 1 1
SIC 891 SPA 12 SPA 5 TSA 516 TSA 644
1 1 1 1 1
TSA 654 UF 36 UF 667
1 1 1
Within cluster sum of squares by cluster:
[1] 48.02313 37.18992
(between_SS / total_SS = 40.8 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss"
[6] "betweenss" "size" "iter" "ifault"
aggregate(dados[1:2], by=list(cluster=km.res$cluster), mean)
dd <- cbind(dados[1:2], cluster = km.res$cluster)
km.res$cluster
AMAZON 15.15 AMAZON 2.1 APA 5 CAB 4 CAB 5003.23
1 2 2 1 1
CAB 5046.1403 CCN 51 CEPEC 1008 CEPEC 38 CEPEC 40
1 1 1 1 1
CEPEC 42 CEPEC 44 CEPEC 75 CEPEC 82 CEPEC 84
2 1 1 1 1
CEPEC 89 CEPEC 92 CEPEC 93 CHUAO 120 CJ 10
1 1 1 2 1
CSUL 4 CSUL 5 EET 272 EET 390 EET 392
1 1 1 1 1
EET 62 ICS 100 ICS 78 ICS 95 IMC 2
1 1 1 1 1
IMC 23 IMC 51 MA 12 MOCORONGO 1 MOCORONGO 2
2 1 1 2 1
MOQ 417 MOQ 647 NA 312 NA 33 OC 77
1 2 1 1 1
PA 120 PA 148 PA 15 PA 150 PA 169
2 1 1 1 1
PA 285 PA 294 PA 30 PA 44 PA 51
2 2 2 2 1
PA 70 PA 88 RB 31 RB 32 RB 33
2 2 1 1 1
RB 39 RIM 117 SCAVINA 6 SIAL 164 SIAL 505
1 1 2 1 1
SIAL 542 SIC 19 SIC 23 SIC 842 SIC 864
1 1 1 1 1
SIC 891 SPA 12 SPA 5 TSA 516 TSA 644
1 1 1 1 1
TSA 654 UF 36 UF 667
1 1 1
km.res$size
[1] 58 15
km.res$centers
Leaf (E1) Leaf (E2)
1 0.2803737 0.3600806
2 -1.0841116 -1.3923116
fviz_cluster(km.res, data=df,
geom.ind = c("text"),
ellipse.type="euclid",
star.plot=TRUE,
palette = "Dark2",
repel=TRUE,
ggtheme=theme_minimal()
)
dista=dist(df, method="euclidean")
dista.hc=hclust(d=dista, method="ward.D")
fviz_dend(dista.hc, cex=0.5, k = 2, color_labels_by_k = TRUE, horiz = T)
km.res1 <- hkmeans(df, 2,hc.metric = "euclid" ,hc.method = "ward.D")
fviz_dend(km.res1, cex = 0.6, palette = "Dark2",
rect = TRUE, rect_border = "Dark2", rect_fill = TRUE, horiz = T)
fviz_dist(dista, gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))
km.pca <- PCA(
df,
graph = F,
scale.unit = TRUE)
eig.val <- get_eigenvalue(km.pca)
eig.val
eigenvalue variance.percent cumulative.variance.percent
Dim.1 1.4644793 73.22397 73.22397
Dim.2 0.5355207 26.77603 100.00000
fviz_eig(km.pca, addlabels=TRUE)
var <- get_pca_var(km.pca)
var
Principal Component Analysis Results for variables
===================================================
Name Description
1 "$coord" "Coordinates for the variables"
2 "$cor" "Correlations between variables and dimensions"
3 "$cos2" "Cos2 for the variables"
4 "$contrib" "contributions of the variables"
$coord
$cor
$cos2
$contrib
Coordinates for the variables
Correlations between variables and dimensions
Cos2 for the variables
contributions of the variables
# Coordenadas
head(var$coord)
Dim.1 Dim.2
Leaf (E1) 0.85571 -0.5174556
Leaf (E2) 0.85571 0.5174556
# Cos2: qualidade no mapa do fator
head(var$cos2)
Dim.1 Dim.2
Leaf (E1) 0.7322397 0.2677603
Leaf (E2) 0.7322397 0.2677603
# Contribuições para os componentes principais
head(var$contrib)
Dim.1 Dim.2
Leaf (E1) 50 50
Leaf (E2) 50 50
fviz_cos2(km.pca, choice = "var", axes = 1:2)
df %>% cor(method = "spearman") %>% corrplot(.,
method = "number",
type = "upper",
tl.pos = "td")
summary(km.pca)
Call:
PCA(X = df, scale.unit = TRUE, graph = F)
Eigenvalues
Dim.1 Dim.2
Variance 1.464 0.536
% of var. 73.224 26.776
Cumulative % of var. 73.224 100.000
Individuals (the 10 first)
Dist Dim.1 ctr cos2 Dim.2 ctr cos2
AMAZON 15.15 | 1.268 | 0.225 0.047 0.032 | -1.247 3.980 0.968 |
AMAZON 2.1 | 1.937 | -1.930 3.484 0.993 | 0.167 0.071 0.007 |
APA 5 | 1.789 | -1.088 1.108 0.370 | 1.420 5.156 0.630 |
CAB 4 | 0.570 | 0.260 0.063 0.208 | 0.507 0.657 0.792 |
CAB 5003.23 | 0.665 | 0.522 0.255 0.615 | 0.413 0.436 0.385 |
CAB 5046.1403 | 0.941 | 0.049 0.002 0.003 | 0.939 2.257 0.997 |
CCN 51 | 1.541 | 0.269 0.068 0.031 | -1.518 5.891 0.969 |
CEPEC 1008 | 1.734 | 1.194 1.334 0.474 | 1.257 4.044 0.526 |
CEPEC 38 | 0.291 | -0.066 0.004 0.051 | -0.284 0.206 0.949 |
CEPEC 40 | 0.949 | 0.816 0.623 0.739 | -0.484 0.600 0.261 |
Variables
Dim.1 ctr cos2 Dim.2 ctr cos2
Leaf (E1) | 0.856 50.000 0.732 | -0.517 50.000 0.268 |
Leaf (E2) | 0.856 50.000 0.732 | 0.517 50.000 0.268 |
# Contribuições de variáveis para PC1
fviz_contrib(km.pca, choice = "var", axes = 1, top = 10)
# Contribuições de variáveis para PC2
fviz_contrib(km.pca, choice = "var", axes = 2, top = 10)
# contribuição total para PC1 e PC2
fviz_contrib(km.pca, choice = "var", axes = 1:2)
fviz_pca_biplot(
km.pca,
geom.ind = "text",
col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
legend.title = "Contribuição",
palette = "Dark2",
repel = F
)
fviz_pca_ind(
km.pca,
geom = "text",
habillage = as.factor(dd$cluster),
addEllipses = TRUE,
palette = "Dark2"
)
fviz_pca_ind(km.pca,
geom.ind = "text",
col.ind = as.factor(dd$cluster),
addEllipses = TRUE,
legend.title = "Grupos",
repel = T,
palette = "Dark2"
)
df %>% pairs.panels(.,
show.points=TRUE,
method = "spearman",
gap=0,
stars=TRUE,
ci=FALSE,
alpha=0.05,
cex.cor=1,
cex=1.0,
breaks="Sturges",
rug=FALSE,
density=F,
hist.col="darkgreen",
factor=5,
digits=2,
ellipses=FALSE,
scale=FALSE,
smooth=TRUE,
lm=T,
cor=T
)
ind <- get_pca_ind(km.pca)
ind
Principal Component Analysis Results for individuals
===================================================
Name Description
1 "$coord" "Coordinates for the individuals"
2 "$cos2" "Cos2 for the individuals"
3 "$contrib" "contributions of the individuals"
$coord
$cos2
$contrib
Coordinates for the individuals
Cos2 for the individuals
contributions of the individuals
# Coordenadas de indivíduos
head(ind$coord)
Dim.1 Dim.2
AMAZON 15.15 0.22501198 -1.2473775
AMAZON 2.1 -1.92983049 0.1667447
APA 5 -1.08815866 1.4197485
CAB 4 0.25989921 0.5068968
CAB 5003.23 0.52193885 0.4126244
CAB 5046.1403 0.04891619 0.9394249
# Qualidade dos indivíduos
head(ind$cos2)
Dim.1 Dim.2
AMAZON 15.15 0.031514373 0.968485627
AMAZON 2.1 0.992589706 0.007410294
APA 5 0.370053894 0.629946106
CAB 4 0.208164169 0.791835831
CAB 5003.23 0.615389746 0.384610254
CAB 5046.1403 0.002703991 0.997296009
# Contribuições de indivíduos
head(ind$contrib)
Dim.1 Dim.2
AMAZON 15.15 0.047359290 3.98012509
AMAZON 2.1 3.483631537 0.07112216
APA 5 1.107588244 5.15612909
CAB 4 0.063183520 0.65726426
CAB 5003.23 0.254819780 0.43552267
CAB 5046.1403 0.002238201 2.25748603
fviz_pca_ind(km.pca, col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE
)
fviz_contrib(km.pca, choice = "ind", axes = 1:2)
set.seed(123)
my.cont.var <- rnorm(73)
fviz_pca_ind(km.pca, col.ind = my.cont.var,
gradient.cols = c("blue", "yellow", "red"),
legend.title = "Cont.Var")
fviz_pca_ind(km.pca,
geom.ind = "point",
col.ind = as.factor(dd$cluster),
palette = "Dark2",
addEllipses = TRUE,
legend.title = "Grupos"
)
fviz_pca_biplot(km.pca,
geom.ind = c("text","point"),
fill.ind = as.factor(dd$cluster), col.ind = "black",
pointshape = 21,
pointsize = 2,
palette = "Dark2",
addEllipses = TRUE,
#alpha.var ="contrib",
col.var = "black",
legend.title = list(fill = "Cluster",
color = "Cluster")
)
Os procedimentos estatísticos utilizados neste estudo foram realizados no programa R (R Core Team 2020). Pacotes stats (R Core Team 2020). Pacote dyplr, para manipulação de dados, (Wickham et al. 2020). Pacote ggpot2: elementos gráficos (Wickham 2016), Para análise multivariada foram utilizados os pacotes factoextra, (Kassambara and Mundt 2020), FactorMiner, (Lê, Josse, and Husson 2008), vegan (Oksanen et al. 2019), corrplot (Wei and Simko 2017) e NbClust (Charrad et al. 2014)
Charrad, Malika, Nadia Ghazzali, Véronique Boiteau, and Azam Niknafs. 2014. “NbClust: An R Package for Determining the Relevant Number of Clusters in a Data Set.” Journal of Statistical Software 61 (6): 1–36. http://www.jstatsoft.org/v61/i06/.
Kassambara, Alboukadel, and Fabian Mundt. 2020. Factoextra: Extract and Visualize the Results of Multivariate Data Analyses. https://CRAN.R-project.org/package=factoextra.
Lê, Sébastien, Julie Josse, and François Husson. 2008. “FactoMineR: A Package for Multivariate Analysis.” Journal of Statistical Software 25 (1): 1–18. https://doi.org/10.18637/jss.v025.i01.
Oksanen, Jari, F. Guillaume Blanchet, Michael Friendly, Roeland Kindt, Pierre Legendre, Dan McGlinn, Peter R. Minchin, et al. 2019. Vegan: Community Ecology Package. https://CRAN.R-project.org/package=vegan.
R Core Team. 2020. “R: A Language and Environment for Statistical Computing.” https://www.R-project.org/.
Wei, Taiyun, and Viliam Simko. 2017. R Package "Corrplot": Visualization of a Correlation Matrix. https://github.com/taiyun/corrplot.
Wickham, Hadley. 2016. Ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. https://ggplot2.tidyverse.org.
Wickham, Hadley, Romain François, Lionel Henry, and Kirill Müller. 2020. Dplyr: A Grammar of Data Manipulation. https://CRAN.R-project.org/package=dplyr.