head(eco)
## Pais Comida Desastre Poblacion Agua
## 1 Burundi 5 5 5 5
## 2 Central African Republic 5 5 5 5
## 3 Republic of the Congo 5 5 5 5
## 4 Kenya 5 5 5 5
## 5 Mozambique 5 5 5 5
## 6 Malawi 5 5 5 5
head(paz)
## Pais Seguridad Militar Conflictos
## 1 Afghanistan 4.127 2.472 3.650
## 2 Albania 2.120 1.666 1.403
## 3 Algeria 2.302 2.041 2.068
## 4 Angola 2.413 1.706 1.666
## 5 Argentina 2.656 1.611 1.201
## 6 Armenia 1.977 2.041 1.990
data=merge(paz,eco)
dataClus=data[,c(2:8)]
row.names(dataClus)=data$Pais
#Los nombres de los países se vuelven solo los nombres de las filas, pero ya no son una columna como tal
- Con la data original del Democracy Index pide 4 clusters y compara
el resultado con lo propuesto por The Economist. ¿Qué observas?
#MATRIZ DE DISTANCIAS
library(cluster)
g.dist = daisy(dataClus, metric="gower")
set.seed(123)
res.pam=pam(g.dist,9,cluster.only = F) #Aquí el programa creó los clusters cono sus respectivos medoides
#nueva columna
dataClus$pam=res.pam$cluster
#Siluetas:
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_silhouette(res.pam,print.summary = F)

#
library(magrittr)
silPAM=data.frame(res.pam$silinfo$widths)
silPAM$country=row.names(silPAM)
lowPAM=silPAM[silPAM$sil_width<0,'country']%>%sort()
lowPAM
## [1] "Argentina" "Armenia"
## [3] "Australia" "Austria"
## [5] "Azerbaijan" "Belarus"
## [7] "Egypt" "El Salvador"
## [9] "Ireland" "Moldova"
## [11] "New Zealand" "North Korea"
## [13] "North Macedonia" "Pakistan"
## [15] "Panama" "Qatar"
## [17] "The Gambia" "Trinidad and Tobago"
## [19] "Uganda" "United States of America"
## [21] "Zimbabwe"
#Normalizacion
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following object is masked from 'package:DescTools':
##
## %nin%
## The following object is masked from 'package:base':
##
## isFALSE
dataClus[,c(2:8)]=normalize(data[,c(2:8)],method='standardize')
summary(dataClus)
## Seguridad Militar Conflictos Comida
## Min. :1.236 Min. :-1.8669093 Min. :-2.0815 Min. :-1.2320
## 1st Qu.:1.953 1st Qu.:-0.7293367 1st Qu.:-0.6326 1st Qu.:-0.6274
## Median :2.413 Median : 0.0004867 Median :-0.1839 Median :-0.2869
## Mean :2.413 Mean : 0.0000000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:2.803 3rd Qu.: 0.6184567 3rd Qu.: 0.4262 3rd Qu.: 0.4001
## Max. :4.127 Max. : 2.7198722 Max. : 4.7486 Max. : 3.0103
## Desastre Poblacion Agua pam
## Min. :-1.27426 Min. :-1.9979 Min. :-1.0526 Min. :-1.6318
## 1st Qu.:-0.59102 1st Qu.:-0.4902 1st Qu.:-1.0526 1st Qu.:-0.9939
## Median : 0.09222 Median : 0.2636 Median : 0.1686 Median : 0.2817
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.77546 3rd Qu.: 1.0175 3rd Qu.: 1.3897 3rd Qu.: 0.9196
## Max. : 1.45870 Max. : 1.0175 Max. : 1.3897 Max. : 0.9196
#MATRIZ DE DISTANCIAS
library(cluster)
g.dist = daisy(dataClus, metric="gower")
fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

set.seed(123)
res.pam=pam(g.dist,6,cluster.only = F) #Aquí el programa creó los clusters cono sus respectivos medoides
#nueva columna
dataClus$pam=res.pam$cluster
#Siluetas:
library(factoextra)
fviz_silhouette(res.pam,print.summary = F)

#
library(magrittr)
silPAM=data.frame(res.pam$silinfo$widths)
silPAM$country=row.names(silPAM)
lowPAM=silPAM[silPAM$sil_width<0,'country']%>%sort()
lowPAM
## [1] "Armenia" "Azerbaijan" "Bhutan"
## [4] "Indonesia" "Kenya" "Kosovo"
## [7] "Lesotho" "Namibia" "Pakistan"
## [10] "Panama" "Qatar" "Republic of the Congo"
## [13] "The Gambia" "Uganda" "Vietnam"
## [16] "Zimbabwe"