library(tidyverse)
library(readr)
library(scales)
library(ggthemes)
library(reshape2)
library(ggplot2)
library(factoextra)
library(psych)
library(cluster)
library(NbClust)
library(readxl)
library(robustX)
library(dendextend)
library(corrplot)
library(readxl)
covidcluster <- read_excel("covidcluster2.xlsx")
dados <- as.data.frame(covidcluster)
head(dados)
# adequando a base de dados
# guardar o nome dos alunos
nomes <- dados$pais
# apagar a variável alunos
dados <- dados[,-1]
# nomear as linhas com o nome dos alunos
row.names(dados) <- c(nomes)
head(dados)
Estatística Descritiva
describe(dados)
boxplot(dados)

# dados padronizados
dados <- scale(dados)
describe(dados)
boxplot(dados)

# Compute dissimilarity matrix
d <- dist(dados, method = "euclidean")
#d <-d^2 # dist quad Euclidiana
# O pacote vegan apresenta muitas opções de distâncias para dados não métricos; inclusive a quadrática euclidiana.
# library(vegan)
# ?designdist
# d <- designdist(dados, method = "A+B-2*J", terms = c("quadratic"))
# d
# Hierarchical clustering using complete method
hc <- hclust(d, method = "complete" )
plot(hc,cex=0.6,labels=rownames(dados), hang = -1) # display dendogram

# ou
# plot(hc,cex=0.6,labels=rownames(dados)) # display dendogram
# outliers
#outliers <- BACON(dados)
# outliers
# há obs discrepante?
#table(outliers$subset)
#rownames(dados)[outliers$subset== F]
k=3
# Cut tree into 4 groups
grupos <- cutree(hc, k)
#grupos
rownames(dados)[grupos == 1]
[1] "USA" "China" "Brazil"
rownames(dados)[grupos == 2]
[1] "Italy" "Spain"
rownames(dados)[grupos == 3]
[1] "S. Korea"
#rownames(dados)[grupos == 4]
#rownames(dados)[grupos == 5]
Usando o pacote factoextra
# ?hcut
# este pacote não tem dist quad Euclidiana
res <- hcut(dados, k , stand = TRUE,
hc_func = "hclust",
hc_method = "ward.D2",
hc_metric = "euclidean")
fviz_dend(res, rect = T, cex = 0.7, horiz = T)

fviz_cluster(res) +theme_bw()

fviz_silhouette(res)+theme_bw()

# It’s possible to compare simultaneously multiple dendrograms.
# A chaining operator %>% (available in dendextend) is used to run multiple function at the same time. It’s useful for simplifying the code:
# Create multiple dendrograms by chaining
dend1 <- dados %>% dist("euclidean") %>% hclust("complete") %>% as.dendrogram
dend2 <- dados %>% dist("euclidean") %>% hclust("single") %>% as.dendrogram
dend3 <- dados %>% dist("euclidean") %>% hclust("average") %>% as.dendrogram
dend4 <- dados %>% dist("euclidean") %>% hclust("centroid") %>% as.dendrogram
dend5 <- dados %>% dist("euclidean") %>% hclust("ward.D2") %>% as.dendrogram
#?hclust
# Compute correlation matrix
dend_list <- dendlist("Complete" = dend1, "Single" = dend2,
"Average" = dend3, "Centroid" = dend4,
"ward"= dend5)
#Computes the cophenetic distances for a hierarchical clustering.
# ?cor.dendlist
cors <- cor.dendlist(dend_list)
# Print correlation matrix
round(cors, 2)
Complete Single Average Centroid ward
Complete 1.00 0.39 0.54 0.52 0.43
Single 0.39 1.00 0.97 0.93 0.72
Average 0.54 0.97 1.00 0.96 0.82
Centroid 0.52 0.93 0.96 1.00 0.77
ward 0.43 0.72 0.82 0.77 1.00
# Visualize the correlation matrix using corrplot package
corrplot(cors, "number", "lower")

corrplot(cors, "pie", "lower")

LS0tDQp0aXRsZTogIkNvdmlkLTE5LSBDbHVzdGVyIg0KYXV0aG9yOiAiUHJvZmVzc29yIERyLiBMZW9uaSwgUi4gQy4gLSBBTUFOIC0gUmVzZW5kZSAtIFJKLiINCmRhdGU6ICdSZWxhdMOzcmlvIGdlcmFkbyBlbTogYHIgZm9ybWF0KFN5cy50aW1lKCksICIlZCBkZSAlQiBkZSAlWSIpYCcNCm91dHB1dDoNCiAgaHRtbF9ub3RlYm9vazoNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBmaWdfY2FwdGlvbjogeWVzDQogICAgdGhlbWU6IGpvdXJuYWwNCiAgICB0b2M6IHllcw0KICB3b3JkX2RvY3VtZW50Og0KICAgIHRvYzogeWVzDQplbWFpbDogbGVvbmkucm9iZXJ0b0BhbWFuLmViLm1pbC5icg0KLS0tDQoNCg0KDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHJlYWRyKQ0KbGlicmFyeShzY2FsZXMpDQpsaWJyYXJ5KGdndGhlbWVzKQ0KbGlicmFyeShyZXNoYXBlMikNCg0KbGlicmFyeShnZ3Bsb3QyKSAgDQpsaWJyYXJ5KGZhY3RvZXh0cmEpDQpsaWJyYXJ5KHBzeWNoKQ0KbGlicmFyeShjbHVzdGVyKQ0KbGlicmFyeShOYkNsdXN0KQ0KbGlicmFyeShyZWFkeGwpDQpsaWJyYXJ5KHJvYnVzdFgpDQpsaWJyYXJ5KGRlbmRleHRlbmQpDQpsaWJyYXJ5KGNvcnJwbG90KQ0KYGBgDQoNCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpsaWJyYXJ5KHJlYWR4bCkNCmNvdmlkY2x1c3RlciA8LSByZWFkX2V4Y2VsKCJjb3ZpZGNsdXN0ZXIyLnhsc3giKSANCmBgYA0KDQpgYGB7cn0NCmRhZG9zIDwtIGFzLmRhdGEuZnJhbWUoY292aWRjbHVzdGVyKQ0KaGVhZChkYWRvcykNCg0KIyBhZGVxdWFuZG8gYSBiYXNlIGRlIGRhZG9zDQoNCiMgZ3VhcmRhciBvIG5vbWUgZG9zIGFsdW5vcw0Kbm9tZXMgPC0gZGFkb3MkcGFpcw0KIyBhcGFnYXIgYSB2YXJpw6F2ZWwgYWx1bm9zDQpkYWRvcyA8LSBkYWRvc1ssLTFdDQojIG5vbWVhciBhcyBsaW5oYXMgY29tIG8gbm9tZSBkb3MgYWx1bm9zDQpyb3cubmFtZXMoZGFkb3MpIDwtIGMobm9tZXMpDQoNCmhlYWQoZGFkb3MpDQpgYGANCg0KDQo+IEVzdGF0w61zdGljYSBEZXNjcml0aXZhDQoNCmBgYHtyfQ0KZGVzY3JpYmUoZGFkb3MpDQpib3hwbG90KGRhZG9zKQ0KIyBkYWRvcyBwYWRyb25pemFkb3MNCmRhZG9zIDwtIHNjYWxlKGRhZG9zKQ0KZGVzY3JpYmUoZGFkb3MpDQpib3hwbG90KGRhZG9zKQ0KYGBgDQoNCg0KDQoNCmBgYHtyLCBmaWcuaGVpZ2h0PTEwLCBmaWcud2lkdGg9MTJ9DQojIENvbXB1dGUgZGlzc2ltaWxhcml0eSBtYXRyaXgNCmQgPC0gZGlzdChkYWRvcywgbWV0aG9kID0gImV1Y2xpZGVhbiIpDQojZCA8LWReMiAjIGRpc3QgcXVhZCBFdWNsaWRpYW5hDQoNCiMgTyBwYWNvdGUgdmVnYW4gYXByZXNlbnRhIG11aXRhcyBvcMOnw7VlcyBkZSBkaXN0w6JuY2lhcyBwYXJhIGRhZG9zIG7Do28gbcOpdHJpY29zOyBpbmNsdXNpdmUgYSBxdWFkcsOhdGljYSBldWNsaWRpYW5hLiANCiMgbGlicmFyeSh2ZWdhbikNCiMgP2Rlc2lnbmRpc3QNCiMgZCA8LSBkZXNpZ25kaXN0KGRhZG9zLCBtZXRob2QgPSAiQStCLTIqSiIsIHRlcm1zID0gYygicXVhZHJhdGljIikpDQojIGQNCg0KIyBIaWVyYXJjaGljYWwgY2x1c3RlcmluZyB1c2luZyBjb21wbGV0ZSBtZXRob2QNCmhjIDwtIGhjbHVzdChkLCBtZXRob2QgPSAiY29tcGxldGUiICkNCg0KcGxvdChoYyxjZXg9MC42LGxhYmVscz1yb3duYW1lcyhkYWRvcyksIGhhbmcgPSAtMSkgIyBkaXNwbGF5IGRlbmRvZ3JhbQ0KIyBvdQ0KIyBwbG90KGhjLGNleD0wLjYsbGFiZWxzPXJvd25hbWVzKGRhZG9zKSkgIyBkaXNwbGF5IGRlbmRvZ3JhbQ0KDQpgYGANCg0KDQoNCg0KYGBge3J9DQojIG91dGxpZXJzDQoNCiNvdXRsaWVycyA8LSBCQUNPTihkYWRvcykNCiMgb3V0bGllcnMNCg0KDQojIGjDoSBvYnMgZGlzY3JlcGFudGU/DQojdGFibGUob3V0bGllcnMkc3Vic2V0KQ0KI3Jvd25hbWVzKGRhZG9zKVtvdXRsaWVycyRzdWJzZXQ9PSBGXQ0KYGBgDQoNCg0KDQpgYGB7cn0NCms9MyANCiMgQ3V0IHRyZWUgaW50byA0IGdyb3Vwcw0KZ3J1cG9zIDwtIGN1dHJlZShoYywgaykNCiNncnVwb3MNCg0Kcm93bmFtZXMoZGFkb3MpW2dydXBvcyA9PSAxXQ0Kcm93bmFtZXMoZGFkb3MpW2dydXBvcyA9PSAyXQ0Kcm93bmFtZXMoZGFkb3MpW2dydXBvcyA9PSAzXQ0KI3Jvd25hbWVzKGRhZG9zKVtncnVwb3MgPT0gNF0NCiNyb3duYW1lcyhkYWRvcylbZ3J1cG9zID09IDVdDQoNCmBgYA0KDQoNCg0KPiBVc2FuZG8gbyBwYWNvdGUgZmFjdG9leHRyYQ0KDQpgYGB7ciwgZmlnLmhlaWdodD0xMiwgZmlnLndpZHRoPTE0fQ0KIyA/aGN1dA0KIyBlc3RlIHBhY290ZSBuw6NvIHRlbSBkaXN0IHF1YWQgRXVjbGlkaWFuYQ0KDQpyZXMgPC0gaGN1dChkYWRvcywgayAsIHN0YW5kID0gVFJVRSwgDQogICAgICAgICAgICBoY19mdW5jID0gImhjbHVzdCIsIA0KICAgICAgICAgICAgaGNfbWV0aG9kID0gIndhcmQuRDIiLCANCiAgICAgICAgICAgIGhjX21ldHJpYyA9ICJldWNsaWRlYW4iKQ0KDQpmdml6X2RlbmQocmVzLCByZWN0ID0gVCwgY2V4ID0gMC43LCBob3JpeiA9IFQpDQpmdml6X2NsdXN0ZXIocmVzKSArdGhlbWVfYncoKQ0KZnZpel9zaWxob3VldHRlKHJlcykrdGhlbWVfYncoKQ0KYGBgDQoNCmBgYHtyfQ0KDQojIEl04oCZcyBwb3NzaWJsZSB0byBjb21wYXJlIHNpbXVsdGFuZW91c2x5IG11bHRpcGxlIGRlbmRyb2dyYW1zLg0KIyBBIGNoYWluaW5nIG9wZXJhdG9yICU+JSAoYXZhaWxhYmxlIGluIGRlbmRleHRlbmQpIGlzIHVzZWQgdG8gcnVuIG11bHRpcGxlIGZ1bmN0aW9uIGF0IHRoZSBzYW1lIHRpbWUuIEl04oCZcyB1c2VmdWwgZm9yIHNpbXBsaWZ5aW5nIHRoZSBjb2RlOg0KDQojIENyZWF0ZSBtdWx0aXBsZSBkZW5kcm9ncmFtcyBieSBjaGFpbmluZw0KZGVuZDEgPC0gZGFkb3MgJT4lIGRpc3QoImV1Y2xpZGVhbiIpICU+JSBoY2x1c3QoImNvbXBsZXRlIikgJT4lIGFzLmRlbmRyb2dyYW0NCmRlbmQyIDwtIGRhZG9zICU+JSBkaXN0KCJldWNsaWRlYW4iKSAlPiUgaGNsdXN0KCJzaW5nbGUiKSAlPiUgYXMuZGVuZHJvZ3JhbQ0KZGVuZDMgPC0gZGFkb3MgJT4lIGRpc3QoImV1Y2xpZGVhbiIpICU+JSBoY2x1c3QoImF2ZXJhZ2UiKSAlPiUgYXMuZGVuZHJvZ3JhbQ0KZGVuZDQgPC0gZGFkb3MgJT4lIGRpc3QoImV1Y2xpZGVhbiIpICU+JSBoY2x1c3QoImNlbnRyb2lkIikgJT4lIGFzLmRlbmRyb2dyYW0NCmRlbmQ1IDwtIGRhZG9zICU+JSBkaXN0KCJldWNsaWRlYW4iKSAlPiUgaGNsdXN0KCJ3YXJkLkQyIikgJT4lIGFzLmRlbmRyb2dyYW0NCg0KIz9oY2x1c3QNCiMgQ29tcHV0ZSBjb3JyZWxhdGlvbiBtYXRyaXgNCmRlbmRfbGlzdCA8LSBkZW5kbGlzdCgiQ29tcGxldGUiID0gZGVuZDEsICJTaW5nbGUiID0gZGVuZDIsDQogICAgICAgICAgICAgICAgICAgICAgIkF2ZXJhZ2UiID0gZGVuZDMsICJDZW50cm9pZCIgPSBkZW5kNCwNCiAgICAgICAgICAgICAgICAgICAgICAid2FyZCI9IGRlbmQ1KQ0KDQojQ29tcHV0ZXMgdGhlIGNvcGhlbmV0aWMgZGlzdGFuY2VzIGZvciBhIGhpZXJhcmNoaWNhbCBjbHVzdGVyaW5nLg0KIyA/Y29yLmRlbmRsaXN0DQoNCmNvcnMgPC0gY29yLmRlbmRsaXN0KGRlbmRfbGlzdCkNCg0KIyBQcmludCBjb3JyZWxhdGlvbiBtYXRyaXgNCnJvdW5kKGNvcnMsIDIpDQoNCiMgVmlzdWFsaXplIHRoZSBjb3JyZWxhdGlvbiBtYXRyaXggdXNpbmcgY29ycnBsb3QgcGFja2FnZQ0KY29ycnBsb3QoY29ycywgIm51bWJlciIsICJsb3dlciIpDQpjb3JycGxvdChjb3JzLCAicGllIiwgImxvd2VyIikNCmBgYA0KDQoNCg0KDQo=