library(rio)
data1=import("reporte.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
data1 <- data1[, -c(1, 2)]
data1 <- data1[-c(1:4), ]
data1 <- data1[1:(nrow(data1) - 3), ]
data1 <- data1[, -c(2, 4, 6, 8, 10)]
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data1 <- data1 %>%
  rename(provincia = ...3)
data1 <- data1 %>%
  rename(sis = ...5)
data1 <- data1 %>%
  rename(essalud = ...7)
data1 <- data1 %>%
  rename(fuerza_armada = ...9)
data1 <- data1 %>%
  rename(privado = ...11)
data1 <- data1 %>%
  rename(otro = ...13)
data1 <- data1[-1, ]
data1$provincia=as.factor(data1$provincia)
data1$sis=as.numeric(data1$sis)
data1$essalud=as.numeric(data1$essalud)
data1$fuerza_armada=as.numeric(data1$fuerza_armada)
data1$privado=as.numeric(data1$privado)
data1$otro=as.numeric(data1$otro)
data1=na.omit(data1)

#cambiar rango de 0 a 1

library(BBmisc)
## 
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
## 
##     coalesce, collapse, symdiff
## The following object is masked from 'package:base':
## 
##     isFALSE
boxplot(normalize(data1[,c(2:6)],method='range',range=c(0,10)))

boxplot(normalize(data1[,c(2:6)],method='standardize'))

#Correlacion

cor(data1[,c(2:6)])
##                     sis   essalud fuerza_armada   privado      otro
## sis           1.0000000 0.9796324     0.9741426 0.9627670 0.9746401
## essalud       0.9796324 1.0000000     0.9955146 0.9897139 0.9976426
## fuerza_armada 0.9741426 0.9955146     1.0000000 0.9967653 0.9987024
## privado       0.9627670 0.9897139     0.9967653 1.0000000 0.9965957
## otro          0.9746401 0.9976426     0.9987024 0.9965957 1.0000000
dataClus=data1[,c(2:6)]
row.names(dataClus)=data1$provincia

#Matriz de distancias

library(cluster)
g.dist = daisy(dataClus, metric="gower")

#Estrategia de particion

## para PAM

library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

#sugiere 1 cluster

library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
set.seed(123)
res.pam=pam(g.dist,1,cluster.only = F)

#nueva columna
dataClus$pam=res.pam$cluster

# ver

head(dataClus,15)%>%kbl()%>%kable_styling()
sis essalud fuerza_armada privado otro pam
Amazonas, provincia: Chachapoyas 32860 12110 985 407 306 1
Amazonas, provincia: Bagua 54088 9471 403 206 236 1
Amazonas, provincia: Bongara 18057 2585 449 131 84 1
Amazonas, provincia: Condorcanqui 33802 2515 190 49 172 1
Amazonas, provincia: Luya 36541 3080 272 95 68 1
Amazonas, provincia: Rodríguez de Mendoza 20815 3030 331 103 115 1
Amazonas, provincia: Utcubamba 80664 10954 883 291 339 1
Áncash, provincia: Huaraz 90834 34932 2096 3723 1117 1
Áncash, provincia: Aija 4542 1190 29 275 11 1
Áncash, provincia: Antonio Raymondi 11048 1599 25 22 30 1
Áncash, provincia: Asunción 5337 1308 24 19 23 1
Áncash, provincia: Bolognesi 15559 3387 171 252 160 1
Áncash, provincia: Carhuaz 36131 3653 202 219 100 1
Áncash, provincia: Carlos Fermín Fitzcarrald 15101 1588 36 25 25 1
Áncash, provincia: Casma 27818 9395 378 485 277 1