library(rio)
data1=import("reporte.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
data1 <- data1[, -c(1, 2)]
data1 <- data1[-c(1:4), ]
data1 <- data1[1:(nrow(data1) - 3), ]
data1 <- data1[, -c(2, 4, 6, 8, 10)]
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data1 <- data1 %>%
rename(provincia = ...3)
data1 <- data1 %>%
rename(sis = ...5)
data1 <- data1 %>%
rename(essalud = ...7)
data1 <- data1 %>%
rename(fuerza_armada = ...9)
data1 <- data1 %>%
rename(privado = ...11)
data1 <- data1 %>%
rename(otro = ...13)
data1 <- data1[-1, ]
data1$provincia=as.factor(data1$provincia)
data1$sis=as.numeric(data1$sis)
data1$essalud=as.numeric(data1$essalud)
data1$fuerza_armada=as.numeric(data1$fuerza_armada)
data1$privado=as.numeric(data1$privado)
data1$otro=as.numeric(data1$otro)
data1=na.omit(data1)
#cambiar rango de 0 a 1
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse, symdiff
## The following object is masked from 'package:base':
##
## isFALSE
boxplot(normalize(data1[,c(2:6)],method='range',range=c(0,10)))
boxplot(normalize(data1[,c(2:6)],method='standardize'))
#Correlacion
cor(data1[,c(2:6)])
## sis essalud fuerza_armada privado otro
## sis 1.0000000 0.9796324 0.9741426 0.9627670 0.9746401
## essalud 0.9796324 1.0000000 0.9955146 0.9897139 0.9976426
## fuerza_armada 0.9741426 0.9955146 1.0000000 0.9967653 0.9987024
## privado 0.9627670 0.9897139 0.9967653 1.0000000 0.9965957
## otro 0.9746401 0.9976426 0.9987024 0.9965957 1.0000000
dataClus=data1[,c(2:6)]
row.names(dataClus)=data1$provincia
#Matriz de distancias
library(cluster)
g.dist = daisy(dataClus, metric="gower")
#Estrategia de particion
## para PAM
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)
#sugiere 1 cluster
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
set.seed(123)
res.pam=pam(g.dist,1,cluster.only = F)
#nueva columna
dataClus$pam=res.pam$cluster
# ver
head(dataClus,15)%>%kbl()%>%kable_styling()
| sis | essalud | fuerza_armada | privado | otro | pam | |
|---|---|---|---|---|---|---|
| Amazonas, provincia: Chachapoyas | 32860 | 12110 | 985 | 407 | 306 | 1 |
| Amazonas, provincia: Bagua | 54088 | 9471 | 403 | 206 | 236 | 1 |
| Amazonas, provincia: Bongara | 18057 | 2585 | 449 | 131 | 84 | 1 |
| Amazonas, provincia: Condorcanqui | 33802 | 2515 | 190 | 49 | 172 | 1 |
| Amazonas, provincia: Luya | 36541 | 3080 | 272 | 95 | 68 | 1 |
| Amazonas, provincia: Rodríguez de Mendoza | 20815 | 3030 | 331 | 103 | 115 | 1 |
| Amazonas, provincia: Utcubamba | 80664 | 10954 | 883 | 291 | 339 | 1 |
| Áncash, provincia: Huaraz | 90834 | 34932 | 2096 | 3723 | 1117 | 1 |
| Áncash, provincia: Aija | 4542 | 1190 | 29 | 275 | 11 | 1 |
| Áncash, provincia: Antonio Raymondi | 11048 | 1599 | 25 | 22 | 30 | 1 |
| Áncash, provincia: Asunción | 5337 | 1308 | 24 | 19 | 23 | 1 |
| Áncash, provincia: Bolognesi | 15559 | 3387 | 171 | 252 | 160 | 1 |
| Áncash, provincia: Carhuaz | 36131 | 3653 | 202 | 219 | 100 | 1 |
| Áncash, provincia: Carlos Fermín Fitzcarrald | 15101 | 1588 | 36 | 25 | 25 | 1 |
| Áncash, provincia: Casma | 27818 | 9395 | 378 | 485 | 277 | 1 |