rm(list = ls())
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rio)
library(magrittr)
library(polycor)
library(psych)
##
## Adjuntando el paquete: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
library(matrixcalc)
library(GPArotation)
##
## Adjuntando el paquete: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
library(BBmisc)
##
## Adjuntando el paquete: 'BBmisc'
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse, symdiff
## The following object is masked from 'package:base':
##
## isFALSE
library(ggcorrplot)
## Cargando paquete requerido: ggplot2
##
## Adjuntando el paquete: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(knitr)
library(modelsummary)
##
## Adjuntando el paquete: 'modelsummary'
## The following object is masked from 'package:psych':
##
## SD
library(kableExtra)
##
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(car)
## Cargando paquete requerido: carData
##
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.1
##
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(readxl)
dataok <- read_excel("C:/Users/JHOJAN/Downloads/dataOK_all (2).xlsx")
dataok <- dataok %>%
mutate(Razon_votacion = Keiko / Castillo)
dataok <- dataok %>%
mutate(Porcentaje_agua_red = (agua1_Red / agua10_Total) * 100)
dataok <- dataok %>%
mutate(Tasa_fallecidos_por_1000 = (countFallecidos / countPositivos) * 1000)
boxplot(dataok[,c(47:49)],horizontal = F,las=2,cex.axis = 0.5)

boxplot(normalize(dataok[,c(47:49)],method='range',range=c(0,10)))

boxplot(normalize(dataok[,c(47:49)],method='standardize'))

dataok[,c(47:49)]=normalize(dataok[,c(47:49)],method='standardize')
cor(dataok[,c(47:49)])
## Razon_votacion Porcentaje_agua_red
## Razon_votacion 1.00000000 0.1195803
## Porcentaje_agua_red 0.11958032 1.0000000
## Tasa_fallecidos_por_1000 -0.09694139 0.1035734
## Tasa_fallecidos_por_1000
## Razon_votacion -0.09694139
## Porcentaje_agua_red 0.10357342
## Tasa_fallecidos_por_1000 1.00000000
dataClus= dataok[,c(47:49)]
row.names(dataClus)=dataok$provincia
## Warning: Setting row names on a tibble is deprecated.
library(cluster)
g.dist = daisy(dataClus, metric="gower")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

library(kableExtra)
set.seed(123)
res.pam=pam(g.dist,5,cluster.only = F)
#nueva columna
dataClus$pam=res.pam$cluster
# ver
head(dataClus,15)%>%kbl()%>%kable_styling()
|
Razon_votacion
|
Porcentaje_agua_red
|
Tasa_fallecidos_por_1000
|
pam
|
|
-0.3739358
|
-0.3519838
|
-1.1415785
|
1
|
|
-0.0198897
|
0.3924045
|
-0.5365194
|
1
|
|
0.0612639
|
1.1011178
|
-0.7999689
|
2
|
|
-0.9182894
|
-2.1769143
|
-1.2593821
|
3
|
|
-0.0200974
|
0.0625030
|
-0.4992799
|
1
|
|
0.0979522
|
0.2096156
|
1.1638051
|
1
|
|
-0.1882854
|
-0.3307202
|
-0.9869613
|
1
|
|
-0.0449805
|
1.0330394
|
0.1430353
|
2
|
|
-0.8377221
|
1.5845474
|
1.2988419
|
2
|
|
-0.6038640
|
0.8536475
|
0.2695732
|
2
|
|
-0.2000002
|
0.9049263
|
0.4619025
|
2
|
|
-0.3087639
|
0.9450800
|
-0.0165583
|
2
|
|
-0.6504380
|
0.5876363
|
1.4548719
|
2
|
|
1.2963666
|
0.2832239
|
0.3638289
|
4
|
|
0.0669980
|
1.0542571
|
1.0130963
|
2
|
fviz_silhouette(res.pam,print.summary = F)

silPAM=data.frame(res.pam$silinfo$widths)
silPAM$country=row.names(silPAM)
poorPAM=silPAM[silPAM$sil_width<0,'country']%>%sort()
poorPAM
## [1] "ACOBAMBA" "AMBO" "ANDAHUAYLAS"
## [4] "AYMARAES" "CANGALLO" "CARAVELĆ"
## [7] "CASTROVIRREYNA" "CAYLLOMA" "CORONEL PORTILLO"
## [10] "DATEM DEL MARAĆĆN" "HUAMALĆES" "HUANCAVELICA"
## [13] "HUANTA" "JAUJA" "OCROS"
## [16] "OXAPAMPA" "PUERTO INCA" "SAN MIGUEL"
## [19] "SANTA CRUZ" "TARMA" "UCAYALI"
## [22] "VILCAS HUAMĆN" "YAUYOS"
aggregate(.~ pam, data=dataClus,mean)
## pam Razon_votacion Porcentaje_agua_red Tasa_fallecidos_por_1000
## 1 1 -0.06658091 -0.03680525 -0.234018075
## 2 2 -0.40093916 0.91601596 0.004838563
## 3 3 -0.75415394 -0.93152842 0.166831221
## 4 4 1.35969919 0.77707759 0.228045629
## 5 5 1.92230620 -1.91647793 -0.895573982
dataClus2= dataok[,c(47:49)]
row.names(dataClus2)=dataok$provincia
## Warning: Setting row names on a tibble is deprecated.
res.agnes<- hcut(g.dist, k = 5,hc_func='agnes',hc_method = "ward.D")
dataClus$agnes=res.agnes$cluster
# ver
head(dataClus,15)%>%kbl()%>%kable_styling()
|
Razon_votacion
|
Porcentaje_agua_red
|
Tasa_fallecidos_por_1000
|
pam
|
agnes
|
|
-0.3739358
|
-0.3519838
|
-1.1415785
|
1
|
1
|
|
-0.0198897
|
0.3924045
|
-0.5365194
|
1
|
2
|
|
0.0612639
|
1.1011178
|
-0.7999689
|
2
|
2
|
|
-0.9182894
|
-2.1769143
|
-1.2593821
|
3
|
3
|
|
-0.0200974
|
0.0625030
|
-0.4992799
|
1
|
2
|
|
0.0979522
|
0.2096156
|
1.1638051
|
1
|
2
|
|
-0.1882854
|
-0.3307202
|
-0.9869613
|
1
|
1
|
|
-0.0449805
|
1.0330394
|
0.1430353
|
2
|
2
|
|
-0.8377221
|
1.5845474
|
1.2988419
|
2
|
2
|
|
-0.6038640
|
0.8536475
|
0.2695732
|
2
|
2
|
|
-0.2000002
|
0.9049263
|
0.4619025
|
2
|
2
|
|
-0.3087639
|
0.9450800
|
-0.0165583
|
2
|
2
|
|
-0.6504380
|
0.5876363
|
1.4548719
|
2
|
2
|
|
1.2963666
|
0.2832239
|
0.3638289
|
4
|
4
|
|
0.0669980
|
1.0542571
|
1.0130963
|
2
|
2
|
fviz_dend(res.agnes, cex = 0.7, horiz = T,main = "")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ā¹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

fviz_silhouette(res.agnes,print.summary = F)

silAGNES=data.frame(res.agnes$silinfo$widths)
silAGNES$country=row.names(silAGNES)
poorAGNES=silAGNES[silAGNES$sil_width<0,'country']%>%sort()
poorAGNES
## [1] "CAJABAMBA" "CAJATAMBO" "CARAVELĆ"
## [4] "CHINCHEROS" "CORONEL PORTILLO" "HUĆNUCO"
## [7] "HUAYTARĆ" "JAĆN" "JAUJA"
## [10] "LA UNIĆN" "LEONCIO PRADO" "LORETO"
## [13] "LUYA" "MANU" "NAZCA"
## [16] "OCROS" "OTUZCO" "OXAPAMPA"
## [19] "PACHITEA" "SĆNCHEZ CARRIĆN" "SANTIAGO DE CHUCO"
## [22] "SUCRE" "TARATA" "TARMA"
aggregate(.~ agnes, data=dataClus,mean)
## agnes Razon_votacion Porcentaje_agua_red Tasa_fallecidos_por_1000 pam
## 1 1 -0.67877141 -0.3433202 -0.140302752 2.355932
## 2 2 -0.09602983 0.6193371 0.182623721 1.855263
## 3 3 -0.43942801 -1.6757364 -0.015876912 3.160000
## 4 4 1.47394814 0.8505111 -0.005452738 4.000000
## 5 5 2.82260736 -2.0871298 -1.008207099 5.000000
fviz_nbclust(dataClus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

set.seed(123)
res.diana <- hcut(g.dist, k = 5,hc_func='diana')
dataClus$diana=res.diana$cluster
# veamos
head(dataClus,15)%>%kbl%>%kable_styling()
|
Razon_votacion
|
Porcentaje_agua_red
|
Tasa_fallecidos_por_1000
|
pam
|
agnes
|
diana
|
|
-0.3739358
|
-0.3519838
|
-1.1415785
|
1
|
1
|
1
|
|
-0.0198897
|
0.3924045
|
-0.5365194
|
1
|
2
|
2
|
|
0.0612639
|
1.1011178
|
-0.7999689
|
2
|
2
|
2
|
|
-0.9182894
|
-2.1769143
|
-1.2593821
|
3
|
3
|
1
|
|
-0.0200974
|
0.0625030
|
-0.4992799
|
1
|
2
|
2
|
|
0.0979522
|
0.2096156
|
1.1638051
|
1
|
2
|
2
|
|
-0.1882854
|
-0.3307202
|
-0.9869613
|
1
|
1
|
1
|
|
-0.0449805
|
1.0330394
|
0.1430353
|
2
|
2
|
2
|
|
-0.8377221
|
1.5845474
|
1.2988419
|
2
|
2
|
2
|
|
-0.6038640
|
0.8536475
|
0.2695732
|
2
|
2
|
2
|
|
-0.2000002
|
0.9049263
|
0.4619025
|
2
|
2
|
2
|
|
-0.3087639
|
0.9450800
|
-0.0165583
|
2
|
2
|
2
|
|
-0.6504380
|
0.5876363
|
1.4548719
|
2
|
2
|
2
|
|
1.2963666
|
0.2832239
|
0.3638289
|
4
|
4
|
2
|
|
0.0669980
|
1.0542571
|
1.0130963
|
2
|
2
|
2
|
fviz_dend(res.diana, cex = 0.7, horiz = T, main = "")

fviz_silhouette(res.diana,print.summary = F)

silDIANA=data.frame(res.diana$silinfo$widths)
silDIANA$country=row.names(silDIANA)
poorDIANA=silDIANA[silDIANA$sil_width<0,'country']%>%sort()
poorDIANA
## [1] "ASCOPE" "CHICLAYO" "HUACAYBAMBA" "LA MAR"
## [5] "LEONCIO PRADO" "PAITA" "PASCO" "SECHURA"