#PREGUNTA 2
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(rio)
data2 <- import("dataOK_all.xlsx")
## New names:
## • `` -> `...1`
##Nuevos indices
data2$porcaguared=data2$agua1_Red/data2$agua10_Total
data2$keicas=data2$Keiko/data2$Castillo
data2$fallecido_x10000POS=1000*(data2$countFallecidos/data2$countPositivos)
##Preparar para análisis
keptForCluster=c('key','porcaguared','keicas','fallecido_x10000POS')
data2clus=data2[,keptForCluster]
data2clus=data2clus[!data2clus$key=='LIMA+LIMA',]
data2clus1=data2clus
data2clus1[,-1]=BBmisc::normalize(data2clus[,-1],method='standardize')
##Clusterizar
data2Clus2=data2clus1[,-1]
row.names(data2Clus2)=data2clus1$key
data2Clus2=data2Clus2[complete.cases(data2Clus2),]
g.dist = cluster::daisy(data2Clus2, metric="gower")
###jerarquica aglomerativa
fviz_nbclust(data2Clus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")
res.agnes1<- hcut(g.dist, k = 5,hc_func='agnes',hc_method = "ward.D")
data2Clus2$agnes=res.agnes1$cluster
silAGNES=data.frame(res.agnes1$silinfo$widths)
silAGNES$key=row.names(silAGNES)
poorAGNES=silAGNES[silAGNES$sil_width<0,'key']|>
sort()
poorAGNES
## [1] "AMAZONAS+LUYA" "ANCASH+OCROS"
## [3] "APURIMAC+CHINCHEROS" "AREQUIPA+CARAVELI"
## [5] "AREQUIPA+LA UNION" "AYACUCHO+SUCRE"
## [7] "CAJAMARCA+CAJABAMBA" "CAJAMARCA+JAEN"
## [9] "HUANCAVELICA+HUAYTARA" "HUANUCO+HUANUCO"
## [11] "HUANUCO+LEONCIO PRADO" "HUANUCO+PACHITEA"
## [13] "ICA+NAZCA" "JUNIN+JAUJA"
## [15] "JUNIN+TARMA" "LA LIBERTAD+OTUZCO"
## [17] "LA LIBERTAD+SANCHEZ CARRION" "LA LIBERTAD+SANTIAGO DE CHUCO"
## [19] "LIMA+CAJATAMBO" "LORETO+LORETO"
## [21] "MADRE DE DIOS+MANU" "PASCO+OXAPAMPA"
## [23] "TACNA+TARATA" "UCAYALI+CORONEL PORTILLO"
###divisiva
fviz_nbclust(data2Clus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "diana")
set.seed(123)
res.diana1 <- hcut(g.dist, k = 5,hc_func='diana')
data2Clus2$diana=res.diana1$cluster
silDIANA=data.frame(res.diana1$silinfo$widths)
silDIANA$key=row.names(silDIANA)
poorDIANA=silDIANA[silDIANA$sil_width<0,'key']|>
sort()
poorDIANA
## [1] "AYACUCHO+LA MAR" "HUANUCO+HUACAYBAMBA" "HUANUCO+LEONCIO PRADO"
## [4] "LA LIBERTAD+ASCOPE" "LAMBAYEQUE+CHICLAYO" "PASCO+PASCO"
## [7] "PIURA+PAITA" "PIURA+SECHURA"
###partición
library(cluster)
g.dist = daisy(data2Clus2, metric="gower")
library(factoextra)
fviz_nbclust(data2Clus2, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)
set.seed(123)
res.pam1=pam(g.dist,5,cluster.only = F)
data2Clus2$pam=res.pam1$cluster
silPAM=data.frame(res.pam1$silinfo$widths)
silPAM$country=row.names(silPAM)
poorPAM=silPAM[silPAM$sil_width<0,'key']|>
sort()
poorPAM
## NULL