C4

head(eco)

##                       Pais Comida Desastre Poblacion Agua
## 1                  Burundi      5        5         5    5
## 2 Central African Republic      5        5         5    5
## 3    Republic of the Congo      5        5         5    5
## 4                    Kenya      5        5         5    5
## 5               Mozambique      5        5         5    5
## 6                   Malawi      5        5         5    5

head(paz)

##          Pais Seguridad Militar Conflictos
## 1 Afghanistan     4.127   2.472      3.650
## 2     Albania     2.120   1.666      1.403
## 3     Algeria     2.302   2.041      2.068
## 4      Angola     2.413   1.706      1.666
## 5   Argentina     2.656   1.611      1.201
## 6     Armenia     1.977   2.041      1.990

data=merge(paz,eco)
dataClus=data[,c(2:8)]

row.names(dataClus)=data$Pais
#Los nombres de los países se vuelven solo los nombres de las filas, pero ya no son una columna como tal

Con la data original del Democracy Index pide 4 clusters y compara el resultado con lo propuesto por The Economist. ¿Qué observas?

#MATRIZ DE DISTANCIAS
library(cluster)
g.dist = daisy(dataClus, metric="gower")

set.seed(123)
res.pam=pam(g.dist,9,cluster.only = F) #Aquí el programa creó los clusters cono sus respectivos medoides

#nueva columna
dataClus$pam=res.pam$cluster

#Siluetas:
library(factoextra)

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

fviz_silhouette(res.pam,print.summary = F)

#
library(magrittr)
silPAM=data.frame(res.pam$silinfo$widths)
silPAM$country=row.names(silPAM)
lowPAM=silPAM[silPAM$sil_width<0,'country']%>%sort()
lowPAM

##  [1] "Argentina"                "Armenia"                 
##  [3] "Australia"                "Austria"                 
##  [5] "Azerbaijan"               "Belarus"                 
##  [7] "Egypt"                    "El Salvador"             
##  [9] "Ireland"                  "Moldova"                 
## [11] "New Zealand"              "North Korea"             
## [13] "North Macedonia"          "Pakistan"                
## [15] "Panama"                   "Qatar"                   
## [17] "The Gambia"               "Trinidad and Tobago"     
## [19] "Uganda"                   "United States of America"
## [21] "Zimbabwe"

#Normalizacion
library(BBmisc)

## 
## Attaching package: 'BBmisc'

## The following object is masked from 'package:DescTools':
## 
##     %nin%

## The following object is masked from 'package:base':
## 
##     isFALSE

dataClus[,c(2:8)]=normalize(data[,c(2:8)],method='standardize')
summary(dataClus)

##    Seguridad        Militar             Conflictos          Comida       
##  Min.   :1.236   Min.   :-1.8669093   Min.   :-2.0815   Min.   :-1.2320  
##  1st Qu.:1.953   1st Qu.:-0.7293367   1st Qu.:-0.6326   1st Qu.:-0.6274  
##  Median :2.413   Median : 0.0004867   Median :-0.1839   Median :-0.2869  
##  Mean   :2.413   Mean   : 0.0000000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.:2.803   3rd Qu.: 0.6184567   3rd Qu.: 0.4262   3rd Qu.: 0.4001  
##  Max.   :4.127   Max.   : 2.7198722   Max.   : 4.7486   Max.   : 3.0103  
##     Desastre          Poblacion            Agua              pam         
##  Min.   :-1.27426   Min.   :-1.9979   Min.   :-1.0526   Min.   :-1.6318  
##  1st Qu.:-0.59102   1st Qu.:-0.4902   1st Qu.:-1.0526   1st Qu.:-0.9939  
##  Median : 0.09222   Median : 0.2636   Median : 0.1686   Median : 0.2817  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.77546   3rd Qu.: 1.0175   3rd Qu.: 1.3897   3rd Qu.: 0.9196  
##  Max.   : 1.45870   Max.   : 1.0175   Max.   : 1.3897   Max.   : 0.9196

#MATRIZ DE DISTANCIAS
library(cluster)
g.dist = daisy(dataClus, metric="gower")

fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

set.seed(123)
res.pam=pam(g.dist,6,cluster.only = F) #Aquí el programa creó los clusters cono sus respectivos medoides

#nueva columna
dataClus$pam=res.pam$cluster

#Siluetas:
library(factoextra)
fviz_silhouette(res.pam,print.summary = F)

#
library(magrittr)
silPAM=data.frame(res.pam$silinfo$widths)
silPAM$country=row.names(silPAM)
lowPAM=silPAM[silPAM$sil_width<0,'country']%>%sort()
lowPAM

##  [1] "Armenia"               "Azerbaijan"            "Bhutan"               
##  [4] "Indonesia"             "Kenya"                 "Kosovo"               
##  [7] "Lesotho"               "Namibia"               "Pakistan"             
## [10] "Panama"                "Qatar"                 "Republic of the Congo"
## [13] "The Gambia"            "Uganda"                "Vietnam"              
## [16] "Zimbabwe"

C4

Carlos Chávarri

2022-11-25