library(rio)
data2=import("dataOK_all.xlsx")
## New names:
## • `` -> `...1`
dataClus=data2[,c(44:50)]
row.names(dataClus)=data2$...1
str(dataClus)
## 'data.frame': 196 obs. of 7 variables:
## $ departamento : chr "AMAZONAS" "AMAZONAS" "AMAZONAS" "AMAZONAS" ...
## $ provincia : chr "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
## $ Castillo : num 25629 8374 15671 13154 12606 ...
## $ Keiko : num 10770 5209 10473 1446 7840 ...
## $ ganaCastillo : num 1 1 1 1 1 1 1 1 1 1 ...
## $ covidPositivos : num 8126 389 2174 3481 456 ...
## $ covidFallecidos: num 462 72 281 111 88 60 336 26 31 21 ...
dataClus$departamento <- as.factor(dataClus$departamento) #
dataClus$provincia <- as.factor(dataClus$provincia)
library(cluster)
g.dist = daisy(dataClus, metric="gower")
## Warning in daisy(dataClus, metric = "gower"): binary variable(s) 5 treated as
## interval scaled
str(dataClus)
## 'data.frame': 196 obs. of 7 variables:
## $ departamento : Factor w/ 25 levels "AMAZONAS","ANCASH",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ provincia : Factor w/ 196 levels "ABANCAY","ACOBAMBA",..: 19 24 46 59 116 157 187 4 11 14 ...
## $ Castillo : num 25629 8374 15671 13154 12606 ...
## $ Keiko : num 10770 5209 10473 1446 7840 ...
## $ ganaCastillo : num 1 1 1 1 1 1 1 1 1 1 ...
## $ covidPositivos : num 8126 389 2174 3481 456 ...
## $ covidFallecidos: num 462 72 281 111 88 60 336 26 31 21 ...
dataClus <- na.omit(dataClus)
dataClus$departamento<- as.numeric(dataClus$departamento)
dataClus$provincia<- as.numeric(dataClus$provincia)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.2
## Cargando paquete requerido: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

K=3
set.seed(123)
res.pam=pam(g.dist,k = K,cluster.only = F)
dataClus$pam=res.pam$cluster
head(dataClus)
## departamento provincia Castillo Keiko ganaCastillo covidPositivos
## 1 1 19 25629 10770 1 8126
## 2 1 24 8374 5209 1 389
## 3 1 46 15671 10473 1 2174
## 4 1 59 13154 1446 1 3481
## 5 1 116 12606 7840 1 456
## 6 1 157 7967 5491 1 110
## covidFallecidos pam
## 1 462 1
## 2 72 2
## 3 281 2
## 4 111 2
## 5 88 2
## 6 60 2
res.agnes<- hcut(g.dist, k = K,hc_func='agnes',hc_method = "ward.D")
dataClus$agnes=res.agnes$cluster
res.diana <- hcut(g.dist, k = K,hc_func='diana')
dataClus$diana=res.diana$cluster
fviz_silhouette(res.pam,print.summary = F)

fviz_silhouette(res.agnes,print.summary = F)

fviz_silhouette(res.diana,print.summary = F)
