library(rio)
data2=import("dataOK_all.xlsx")
## New names:
## • `` -> `...1`
dataClus=data2[,c(44:50)]
row.names(dataClus)=data2$...1
str(dataClus)
## 'data.frame':    196 obs. of  7 variables:
##  $ departamento   : chr  "AMAZONAS" "AMAZONAS" "AMAZONAS" "AMAZONAS" ...
##  $ provincia      : chr  "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
##  $ Castillo       : num  25629 8374 15671 13154 12606 ...
##  $ Keiko          : num  10770 5209 10473 1446 7840 ...
##  $ ganaCastillo   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ covidPositivos : num  8126 389 2174 3481 456 ...
##  $ covidFallecidos: num  462 72 281 111 88 60 336 26 31 21 ...
dataClus$departamento <- as.factor(dataClus$departamento)  #
dataClus$provincia <- as.factor(dataClus$provincia)
library(cluster)
g.dist = daisy(dataClus, metric="gower")
## Warning in daisy(dataClus, metric = "gower"): binary variable(s) 5 treated as
## interval scaled
str(dataClus)
## 'data.frame':    196 obs. of  7 variables:
##  $ departamento   : Factor w/ 25 levels "AMAZONAS","ANCASH",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ provincia      : Factor w/ 196 levels "ABANCAY","ACOBAMBA",..: 19 24 46 59 116 157 187 4 11 14 ...
##  $ Castillo       : num  25629 8374 15671 13154 12606 ...
##  $ Keiko          : num  10770 5209 10473 1446 7840 ...
##  $ ganaCastillo   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ covidPositivos : num  8126 389 2174 3481 456 ...
##  $ covidFallecidos: num  462 72 281 111 88 60 336 26 31 21 ...
dataClus <- na.omit(dataClus) 
dataClus$departamento<- as.numeric(dataClus$departamento)
dataClus$provincia<- as.numeric(dataClus$provincia)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.2
## Cargando paquete requerido: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

K=3
set.seed(123)
res.pam=pam(g.dist,k = K,cluster.only = F)
dataClus$pam=res.pam$cluster
head(dataClus)
##   departamento provincia Castillo Keiko ganaCastillo covidPositivos
## 1            1        19    25629 10770            1           8126
## 2            1        24     8374  5209            1            389
## 3            1        46    15671 10473            1           2174
## 4            1        59    13154  1446            1           3481
## 5            1       116    12606  7840            1            456
## 6            1       157     7967  5491            1            110
##   covidFallecidos pam
## 1             462   1
## 2              72   2
## 3             281   2
## 4             111   2
## 5              88   2
## 6              60   2
res.agnes<- hcut(g.dist, k = K,hc_func='agnes',hc_method = "ward.D")

dataClus$agnes=res.agnes$cluster
res.diana <- hcut(g.dist, k = K,hc_func='diana')
dataClus$diana=res.diana$cluster 
fviz_silhouette(res.pam,print.summary = F)

fviz_silhouette(res.agnes,print.summary = F)

fviz_silhouette(res.diana,print.summary = F)