LIBRARY
library(rio)
library(magrittr)
library(polycor)
library(psych)
##
## Adjuntando el paquete: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
library(matrixcalc)
library(GPArotation)
##
## Adjuntando el paquete: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
library(BBmisc)
##
## Adjuntando el paquete: 'BBmisc'
## The following object is masked from 'package:base':
##
## isFALSE
DATA:
rm(list = ls())
data = import("dataOK_all.xlsx")
## New names:
## • `` -> `...1`
PROCENTAJES:
data$pct_AGUARED = data$agua1_Red / data$agua10_Total
data$pct_RAZONKPC = data$Keiko / data$Castillo
data$tasa_fallecidos <- (data$covidFallecidos / data$covidPositivos) * 1000
data <- data[!(data$provincia %in% c("LIMA")), ]
DATA:
select = c("pct_RAZONKPC", "pct_AGUARED", "tasa_fallecidos")
theData = data [,select]
head(theData,10)
## pct_RAZONKPC pct_AGUARED tasa_fallecidos
## 1 0.4202271 0.4830430 56.85454
## 2 0.6220444 0.6252053 185.08997
## 3 0.6683045 0.7605543 129.25483
## 4 0.1099285 0.1345204 31.88739
## 5 0.6219261 0.5622011 192.98246
## 6 0.6892180 0.5902965 545.45455
## 7 0.5260536 0.4871039 89.62390
## 8 0.6077419 0.7475528 329.11392
## 9 0.1558544 0.8528790 574.07407
## 10 0.2891608 0.7132928 355.93220
NORMALIZAR:
normalize <- function(x) (x - min(x)) / (max(x) - min(x))
theDataa <- as.data.frame(lapply(theData, normalize))
DATA PARA CLUSTER:
dataClus=theDataa
row.names(dataClus)=data$provincia
#distances
library(cluster)
g.dist = daisy(dataClus, metric="gower")
PAM
library(factoextra)
## Cargando paquete requerido: ggplot2
##
## Adjuntando el paquete: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)
INTERPRETACION PAM:
# pam
set.seed(123)
res.pam=pam(g.dist,k = 3,cluster.only = F)
# nueva columna
dataClus$pam=res.pam$cluster
head(dataClus)
## pct_RAZONKPC pct_AGUARED tasa_fallecidos pam
## BAGUA 0.13562051 0.5595250 0.023039387 1
## BONGARA 0.20722832 0.7252007 0.097345896 2
## CHACHAPOYAS 0.22364210 0.8829363 0.064992009 2
## CONDORCANQUI 0.02552194 0.1533573 0.008572075 1
## LUYA 0.20718633 0.6517758 0.101919224 2
## RODRÍGUEZ DE MENDOZA 0.23106252 0.6845180 0.306160516 2
#países mal clusterizados
silPAM=data.frame(res.pam$silinfo$widths)
silPAM$provincia=row.names(silPAM)
poorPAM=silPAM[silPAM$sil_width<0,'provincia']%>%sort()
poorPAM
## [1] "ACOBAMBA" "ANDAHUAYLAS" "ATALAYA" "CANGALLO"
## [5] "CANTA" "CAYLLOMA" "CHANCHAMAYO" "CHINCHA"
## [9] "FERREÑAFE" "HUAROCHIRÍ" "HUAYLAS" "ICA"
## [13] "LAMAS" "LORETO" "MARISCAL CÁCERES" "OTUZCO"
## [17] "OYÓN" "PALPA" "PICOTA" "PISCO"
## [21] "SAN MARCOS" "SAN MARTÍN" "SÁNCHEZ CARRIÓN" "SANTA"
## [25] "TAYACAJA" "VILCAS HUAMÁN"
AGNES:
fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose =
F,hc_func = "agnes")
res.agnes<- hcut(g.dist, k = 5,hc_func='agnes',hc_method = "ward.D")
dataClus$agnes=res.agnes$cluster
silAGNES=data.frame(res.agnes$silinfo$widths)
silAGNES$provincia=row.names(silAGNES)
poorAGNES=silAGNES[silAGNES$sil_width<0,'provincia']%>%sort()
poorAGNES
## [1] "CAJABAMBA" "CAJATAMBO" "CARAVELÍ"
## [4] "CHINCHEROS" "CORONEL PORTILLO" "HUÁNUCO"
## [7] "HUAYTARÁ" "JAÉN" "JAUJA"
## [10] "LA UNIÓN" "LEONCIO PRADO" "LORETO"
## [13] "LUYA" "MANU" "NAZCA"
## [16] "OCROS" "OTUZCO" "OXAPAMPA"
## [19] "PACHITEA" "SÁNCHEZ CARRIÓN" "SANTIAGO DE CHUCO"
## [22] "SUCRE" "TARATA" "TARMA"
DIANA
fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose =
F,hc_func = "diana")
res.diana <- hcut(g.dist, k = 4,hc_func='diana')
dataClus$diana=res.diana$cluster
#países mal clusterizados
silDIANA=data.frame(res.diana$silinfo$widths)
silDIANA$provincia=row.names(silDIANA)
poorDIANA=silDIANA[silDIANA$sil_width<0,'provincia']%>%sort()
poorDIANA
## [1] "HUACAYBAMBA" "LA MAR" "LEONCIO PRADO" "LORETO"
## [5] "PASCO" "PIURA" "TRUJILLO" "TUMBES"
PAM
#' Evaluate
fviz_silhouette(res.pam,print.summary = F)
DIANA
fviz_silhouette(res.diana,print.summary = F)
AGNES:
fviz_silhouette(res.agnes,print.summary = F)