Pregunta 2

#PREGUNTA 2

library(factoextra)

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(rio)

data2 <- import("dataOK_all.xlsx")

## New names:
## • `` -> `...1`

##Nuevos indices

data2$porcaguared=data2$agua1_Red/data2$agua10_Total
data2$keicas=data2$Keiko/data2$Castillo
data2$fallecido_x10000POS=1000*(data2$countFallecidos/data2$countPositivos)

##Preparar para análisis

keptForCluster=c('key','porcaguared','keicas','fallecido_x10000POS')
data2clus=data2[,keptForCluster]

data2clus=data2clus[!data2clus$key=='LIMA+LIMA',]

data2clus1=data2clus
data2clus1[,-1]=BBmisc::normalize(data2clus[,-1],method='standardize')

##Clusterizar

data2Clus2=data2clus1[,-1] 
row.names(data2Clus2)=data2clus1$key 
data2Clus2=data2Clus2[complete.cases(data2Clus2),] 
g.dist = cluster::daisy(data2Clus2, metric="gower")

###jerarquica aglomerativa

fviz_nbclust(data2Clus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

res.agnes1<- hcut(g.dist, k = 5,hc_func='agnes',hc_method = "ward.D")

data2Clus2$agnes=res.agnes1$cluster

silAGNES=data.frame(res.agnes1$silinfo$widths)
silAGNES$key=row.names(silAGNES)
poorAGNES=silAGNES[silAGNES$sil_width<0,'key']|>
  sort()
poorAGNES

##  [1] "AMAZONAS+LUYA"                 "ANCASH+OCROS"                 
##  [3] "APURIMAC+CHINCHEROS"           "AREQUIPA+CARAVELI"            
##  [5] "AREQUIPA+LA UNION"             "AYACUCHO+SUCRE"               
##  [7] "CAJAMARCA+CAJABAMBA"           "CAJAMARCA+JAEN"               
##  [9] "HUANCAVELICA+HUAYTARA"         "HUANUCO+HUANUCO"              
## [11] "HUANUCO+LEONCIO PRADO"         "HUANUCO+PACHITEA"             
## [13] "ICA+NAZCA"                     "JUNIN+JAUJA"                  
## [15] "JUNIN+TARMA"                   "LA LIBERTAD+OTUZCO"           
## [17] "LA LIBERTAD+SANCHEZ CARRION"   "LA LIBERTAD+SANTIAGO DE CHUCO"
## [19] "LIMA+CAJATAMBO"                "LORETO+LORETO"                
## [21] "MADRE DE DIOS+MANU"            "PASCO+OXAPAMPA"               
## [23] "TACNA+TARATA"                  "UCAYALI+CORONEL PORTILLO"

###divisiva

fviz_nbclust(data2Clus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "diana")

set.seed(123)
res.diana1 <- hcut(g.dist, k = 5,hc_func='diana')
data2Clus2$diana=res.diana1$cluster

silDIANA=data.frame(res.diana1$silinfo$widths)
silDIANA$key=row.names(silDIANA)
poorDIANA=silDIANA[silDIANA$sil_width<0,'key']|>
  sort()
poorDIANA

## [1] "AYACUCHO+LA MAR"       "HUANUCO+HUACAYBAMBA"   "HUANUCO+LEONCIO PRADO"
## [4] "LA LIBERTAD+ASCOPE"    "LAMBAYEQUE+CHICLAYO"   "PASCO+PASCO"          
## [7] "PIURA+PAITA"           "PIURA+SECHURA"

###partición

library(cluster)
g.dist = daisy(data2Clus2, metric="gower")

library(factoextra)
fviz_nbclust(data2Clus2, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

set.seed(123)
res.pam1=pam(g.dist,5,cluster.only = F)

data2Clus2$pam=res.pam1$cluster

silPAM=data.frame(res.pam1$silinfo$widths)
silPAM$country=row.names(silPAM)
poorPAM=silPAM[silPAM$sil_width<0,'key']|>
  sort()
poorPAM

## NULL

Pregunta 2

Sebastián Pérez

2024-06-26