rm(list = ls())
library(htmltab)

# links
WhereDEMO=list(page="https://en.wikipedia.org/wiki/Democracy_Index",
               xpath='//*[@id="mw-content-text"]/div[1]/table[6]/tbody')
demo  = htmltab(doc = WhereDEMO$page, 
                which  = WhereDEMO$xpath,
                encoding = "UTF-8")
library(kableExtra)
library(magrittr)
head(demo, 15)%>%kbl()%>%
  kable_styling(bootstrap_options = "striped", font_size = 10)
Rank >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes .mw-parser-output .tooltip-dotted{border-bottom:1px dotted;cursor:help}Δ Rank >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes Country >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes Regime type >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes Overall score >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes Δ Score >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes Elec­toral pro­cessand plura­lism >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes Func­tioningof govern­ment >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes Poli­ticalpartici­pation >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes Poli­ticalcul­ture >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes Civilliber­ties >> Full democracies >> Flawed democracies >> Hybrid regimes >> Authoritarian regimes
3 1 NA  Norway Full democracy 9.75 0.06 10.00 9.64 10.00 10.00 9.12
4 2 2  New Zealand Full democracy 9.37 0.12 10.00 8.93 9.44 8.75 9.71
5 3 3  Finland Full democracy 9.27 0.07 10.00 9.29 8.89 8.75 9.41
6 4 1  Sweden Full democracy 9.26 NA 9.58 9.29 8.33 10.00 9.12
7 5 3  Iceland Full democracy 9.18 0.19 10.00 8.21 8.89 9.38 9.41
8 6 1  Denmark Full democracy 9.09 0.06 10.00 8.93 8.33 9.38 8.82
9 7 1  Ireland Full democracy 9.00 0.05 10.00 7.86 8.33 9.38 9.41
10 8 3  Taiwan Full democracy 8.99 0.05 10.00 9.64 7.78 8.13 9.41
11 9 NA  Australia Full democracy 8.90 0.06 10.00 8.57 7.78 8.75 9.41
12 9 2  Switzerland Full democracy 8.90 0.07 9.58 8.93 7.78 9.38 8.82
13 11 1  Netherlands Full democracy 8.88 0.08 9.58 8.93 8.33 8.75 8.82
14 12 7  Canada Full democracy 8.87 0.37 10.00 8.21 8.89 8.13 9.12
15 13 2  Uruguay Full democracy 8.85 0.24 10.00 8.57 7.22 8.75 9.71
16 14 1  Luxembourg Full democracy 8.68 NA 10.00 8.57 6.67 8.75 9.41
17 15 1  Germany Full democracy 8.67 NA 9.58 8.21 8.33 8.13 9.12
WhereIDH='https://github.com/Estadistica-AnalisisPolitico/DataFiles-estadistica/raw/main/HDR21-22_Statistical_Annex_HDI_Table.xlsx'

#carga
idh  = rio::import(WhereIDH,skip=4,.name_repair='minimal')
head(idh, 15)%>%kbl()%>%
  kable_styling(bootstrap_options = "striped", font_size = 10)
Human Development Index (HDI) Life expectancy at birth Expected years of schooling Mean years of schooling Gross national income (GNI) per capita GNI per capita rank minus HDI rank HDI rank
HDI rank Country Value NA (years) NA (years) NA (years) NA (2017 PPP $) NA NA NA NA
NA NA 2021 NA 2021 NA 2021 a 2021 a 2021 NA 2021 b 2020
NA VERY HIGH HUMAN DEVELOPMENT NA NA NA NA NA NA NA NA NA NA NA NA NA
1 Switzerland 0.96199999999999997 NA 83.987200000000001 NA 16.50029945 NA 13.85966015 NA 66933.004539999994 NA 5 NA 3
2 Norway 0.96099999999999997 NA 83.233900000000006 NA 18.185199740000002 c 13.00362968 NA 64660.106220000001 NA 6 NA 1
3 Iceland 0.95899999999999996 NA 82.678200000000004 NA 19.163059230000002 c 13.76716995 NA 55782.049809999997 NA 11 NA 2
4 Hong Kong, China (SAR) 0.95199999999999996 NA 85.473399999999998 d 17.278169630000001 NA 12.22620964 NA 62606.845399999998 NA 6 NA 4
5 Australia 0.95099999999999996 NA 84.526499999999999 NA 21.054590229999999 c 12.726819989999999 NA 49238.433349999999 NA 18 NA 5
6 Denmark 0.94799999999999995 NA 81.375299999999996 NA 18.714799880000001 c 12.96049023 NA 60364.785949999998 NA 6 NA 5
7 Sweden 0.94699999999999995 NA 82.9833 NA 19.418529509999999 c 12.609720230000001 NA 54489.37401 NA 9 NA 9
8 Ireland 0.94499999999999995 NA 81.997600000000006 NA 18.94522095 c 11.58222303 e 76168.984429999997 f -3 NA 8
9 Germany 0.94199999999999995 NA 80.630099999999999 NA 17.010139469999999 NA 14.090966910000001 e 54534.216820000001 NA 6 NA 7
10 Netherlands 0.94099999999999995 NA 81.687299999999993 NA 18.693165220000001 c,e 12.581629749999999 NA 55979.411 NA 3 NA 10
11 Finland 0.94 NA 82.0381 NA 19.051929470000001 c 12.87362003 NA 49452.166720000001 NA 11 NA 12
12 Singapore 0.93899999999999995 NA 82.754499999999993 NA 16.524320599999999 NA 11.924880030000001 NA 90918.644709999993 f -10 NA 10
idh=idh[,c(2,3,5,7,9,11)]
demo=demo[,-c(1,2,6)]

# recombrando columns
newDemo=c("Pais","RegimeType","Score","Electoral","Functioning","participation","culture",'Civilliberties')
newIDH=c('Pais','puntuacion','EsperanzaVida','EscolaridadDuracion','EscolaridadPromedio','PBI')
names(demo)=newDemo
names(idh)=newIDH

#seleccionando filas
idh=idh[c(1:202),]
idh=idh[!is.na(idh$Pais),]

# tipo de datos
str(demo)
## 'data.frame':    167 obs. of  8 variables:
##  $ Pais          : chr  " Norway" " New Zealand" " Finland" " Sweden" ...
##  $ RegimeType    : chr  "Full democracy" "Full democracy" "Full democracy" "Full democracy" ...
##  $ Score         : chr  "9.75" "9.37" "9.27" "9.26" ...
##  $ Electoral     : chr  "10.00" "10.00" "10.00" "9.58" ...
##  $ Functioning   : chr  "9.64" "8.93" "9.29" "9.29" ...
##  $ participation : chr  "10.00" "9.44" "8.89" "8.33" ...
##  $ culture       : chr  "10.00" "8.75" "8.75" "10.00" ...
##  $ Civilliberties: chr  "9.12" "9.71" "9.41" "9.12" ...
str(idh)
## 'data.frame':    201 obs. of  6 variables:
##  $ Pais               : chr  "Country" "VERY HIGH HUMAN DEVELOPMENT" "Switzerland" "Norway" ...
##  $ puntuacion         : chr  "Value" NA "0.96199999999999997" "0.96099999999999997" ...
##  $ EsperanzaVida      : chr  "(years)" NA "83.987200000000001" "83.233900000000006" ...
##  $ EscolaridadDuracion: chr  "(years)" NA "16.50029945" "18.185199740000002" ...
##  $ EscolaridadPromedio: chr  "(years)" NA "13.85966015" "13.00362968" ...
##  $ PBI                : chr  "(2017 PPP $)" NA "66933.004539999994" "64660.106220000001" ...
OrdinalVector=c('Authoritarian','Hybrid regime','Flawed democracy','Full democracy')
demo$RegimeType=factor(demo$RegimeType,
                          levels = OrdinalVector,
                          ordered = T)

# formateo: texto a numero
idh[,-1]=lapply(idh[,-1], as.numeric)
## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion
demo[,3:8]=lapply(demo[,3:8],as.numeric)
idh[!complete.cases(idh[,-1]),]%>%kbl()%>%
  kable_styling(bootstrap_options = "striped", font_size = 10)
Pais puntuacion EsperanzaVida EscolaridadDuracion EscolaridadPromedio PBI
1 Country NA NA NA NA NA
3 VERY HIGH HUMAN DEVELOPMENT NA NA NA NA NA
70 HIGH HUMAN DEVELOPMENT NA NA NA NA NA
120 MEDIUM HUMAN DEVELOPMENT NA NA NA NA NA
165 LOW HUMAN DEVELOPMENT NA NA NA NA NA
198 OTHER COUNTRIES OR TERRITORIES NA NA NA NA NA
199 Korea (Democratic People’s Rep. of) NA 73.2845 10.78317 NA NA
200 Monaco NA 85.9463 NA NA NA
201 Nauru NA 63.6170 11.69042 NA 17729.741
202 Somalia NA 55.2803 NA NA 1017.968
idh=idh[complete.cases(idh[,-1]),]
row.names(idh)=NULL # resetear numero de filas

##Merge

idh$Pais= trimws(idh$Pais,whitespace = "[\\h\\v]")
demo$Pais= trimws(demo$Pais,whitespace = "[\\h\\v]") 
idh[idh$Pais=="Bolivia (Plurinational State of)",'Pais']= "Bolivia"
idh[idh$Pais=="Cabo Verde",'Pais']= "Cape Verde"
idh[idh$Pais=="Czechia",'Pais']= "Czech Republic"
idh[idh$Pais=="Congo (Democratic Republic of the)",'Pais']= "Democratic Republic of the Congo"
idh[idh$Pais=="Timor-Leste",'Pais']=  "East Timor"
idh[idh$Pais=="Eswatini (Kingdom of)",'Pais']= "Eswatini"
idh[idh$Pais=="Hong Kong, China (SAR)",'Pais']= "Hong Kong"
idh[idh$Pais=="Iran (Islamic Republic of)",'Pais']= "Iran"
idh[idh$Pais=="Côte d'Ivoire",'Pais']= "Ivory Coast"
idh[idh$Pais=="Lao People's Democratic Republic" ,'Pais']= "Laos"
idh[idh$Pais=="Moldova (Republic of)",'Pais']= "Moldova"
idh[idh$Pais=="Palestine, State of",'Pais']= "Palestine"
idh[idh$Pais=="Congo",'Pais']= "Republic of the Congo"
idh[idh$Pais=="Russian Federation",'Pais']=  "Russia"
idh[idh$Pais=="Korea (Republic of)",'Pais']= "South Korea"
idh[idh$Pais=="Syrian Arab Republic",'Pais']="Syria"
idh[idh$Pais=="Tanzania (United Republic of)",'Pais']= "Tanzania"
idh[idh$Pais=="Türkiye" ,'Pais']= "Turkey"
idh[idh$Pais=="Venezuela (Bolivarian Republic of)",'Pais']="Venezuela"
idh[idh$Pais=="Viet Nam" ,'Pais']="Vietnam"
idhdemo=merge(idh,demo)
summary(idhdemo)
##      Pais             puntuacion     EsperanzaVida   EscolaridadDuracion
##  Length:165         Min.   :0.3940   Min.   :52.53   Min.   : 6.957     
##  Class :character   1st Qu.:0.5860   1st Qu.:65.27   1st Qu.:11.468     
##  Mode  :character   Median :0.7310   Median :71.91   Median :13.644     
##                     Mean   :0.7204   Mean   :71.22   Mean   :13.607     
##                     3rd Qu.:0.8480   3rd Qu.:76.94   3rd Qu.:15.765     
##                     Max.   :0.9620   Max.   :85.47   Max.   :21.055     
##  EscolaridadPromedio      PBI                     RegimeType     Score      
##  Min.   : 2.115      Min.   :  731.8   Authoritarian   :58   Min.   :0.320  
##  1st Qu.: 5.916      1st Qu.: 4566.3   Hybrid regime   :34   1st Qu.:3.220  
##  Median : 9.424      Median :12578.2   Flawed democracy:53   Median :5.610  
##  Mean   : 8.926      Mean   :20108.4   Full democracy  :20   Mean   :5.284  
##  3rd Qu.:11.654      3rd Qu.:30690.5                         3rd Qu.:7.060  
##  Max.   :14.091      Max.   :90918.6                         Max.   :9.750  
##    Electoral       Functioning    participation       culture      
##  Min.   : 0.000   Min.   :0.000   Min.   : 0.000   Min.   : 1.250  
##  1st Qu.: 1.500   1st Qu.:2.710   1st Qu.: 3.890   1st Qu.: 3.750  
##  Median : 7.000   Median :5.000   Median : 5.560   Median : 5.000  
##  Mean   : 5.636   Mean   :4.623   Mean   : 5.401   Mean   : 5.389  
##  3rd Qu.: 9.170   3rd Qu.:6.430   3rd Qu.: 6.670   3rd Qu.: 6.250  
##  Max.   :10.000   Max.   :9.640   Max.   :10.000   Max.   :10.000  
##  Civilliberties 
##  Min.   :0.000  
##  1st Qu.:3.240  
##  Median :5.590  
##  Mean   :5.378  
##  3rd Qu.:7.650  
##  Max.   :9.710
library(BBmisc)
## 
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
## 
##     isFALSE
boxplot(normalize(idhdemo[,c(3:6)],method='range',range=c(0,10)))

boxplot(normalize(idhdemo[,c(3:6)],method='standardize'))

idhdemo[,c(3:6)]=normalize(idhdemo[,c(3:6)],method='standardize')
cor(idhdemo[,c(3:6)])
##                     EsperanzaVida EscolaridadDuracion EscolaridadPromedio
## EsperanzaVida           1.0000000           0.8057425           0.7659252
## EscolaridadDuracion     0.8057425           1.0000000           0.8159101
## EscolaridadPromedio     0.7659252           0.8159101           1.0000000
## PBI                     0.7838335           0.7311884           0.7139462
##                           PBI
## EsperanzaVida       0.7838335
## EscolaridadDuracion 0.7311884
## EscolaridadPromedio 0.7139462
## PBI                 1.0000000
dataClus=idhdemo[,c(3:6)]
row.names(dataClus)=idhdemo$Pais
library(cluster)
g.dist = daisy(dataClus, metric="gower")
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

set.seed(123)
res.pam=pam(g.dist,3,cluster.only = F)

#nueva columna
dataClus$pam=res.pam$cluster

# ver

head(dataClus,15)%>%kbl()%>%kable_styling()
EsperanzaVida EscolaridadDuracion EscolaridadPromedio PBI pam
Afghanistan -1.1720163 -1.1223433 -1.7899045 -0.9036118 1
Albania 0.6655042 0.2825071 0.7113515 -0.2954005 2
Algeria 0.6546035 0.3425723 -0.2580009 -0.4600137 2
Angola -1.2150350 -0.4816372 -1.0570320 -0.7236515 1
Argentina 0.5293798 1.4330952 0.6694140 0.0403686 2
Armenia 0.1046748 -0.1644630 0.7245624 -0.3434922 2
Australia 1.6888036 2.5007014 1.1453419 1.4396130 3
Austria 1.3148582 0.8062709 1.0036890 1.6560856 3
Azerbaijan -0.2350715 -0.0368326 0.4873350 -0.2891916 2
Bahrain 0.9571050 0.9035056 0.6390787 0.9582010 3
Bangladesh 0.1475666 -0.3910236 -0.4659696 -0.7233309 2
Belarus 0.1547871 0.5249118 0.9696084 -0.0622427 2
Belgium 1.3528009 2.0137324 1.0395414 1.5905903 3
Benin -1.4462954 -0.9535138 -1.3922662 -0.8252918 1
Bhutan 0.0757291 -0.1281194 -1.1314933 -0.5273580 1
fviz_silhouette(res.pam,print.summary = F)

silPAM=data.frame(res.pam$silinfo$widths)
silPAM$country=row.names(silPAM)
poorPAM=silPAM[silPAM$sil_width<0,'country']%>%sort()
poorPAM
## [1] "Chile"  "Latvia"

##Promedio de cada Cluster

aggregate(.~ pam, data=dataClus,mean)
##   pam EsperanzaVida EscolaridadDuracion EscolaridadPromedio        PBI
## 1   1    -1.0811743          -1.0681695          -1.1822831 -0.8111138
## 2   2     0.1340138           0.1819924           0.3393855 -0.2301917
## 3   3     1.2520905           1.1502463           1.0317146  1.5181170
idhdemo$pamIDHpoor=idhdemo$Pais%in%poorPAM
idhdemo$pamIDH=as.ordered(dataClus$pam)
dataClus$pam=NULL

##Jerarquización

fviz_nbclust(dataClus, hcut, diss = g.dist, method = "gap_stat", k.max = 10, verbose = F, hc_func= "agnes")

###Vía AGNES

set.seed(123)
library(factoextra)

res.agnes<- hcut(g.dist, k = 3,hc_func='agnes',hc_method = "ward.D")

dataClus$agnes=res.agnes$cluster

# ver

head(dataClus,15)%>%kbl()%>%kable_styling()
EsperanzaVida EscolaridadDuracion EscolaridadPromedio PBI agnes
Afghanistan -1.1720163 -1.1223433 -1.7899045 -0.9036118 1
Albania 0.6655042 0.2825071 0.7113515 -0.2954005 2
Algeria 0.6546035 0.3425723 -0.2580009 -0.4600137 2
Angola -1.2150350 -0.4816372 -1.0570320 -0.7236515 1
Argentina 0.5293798 1.4330952 0.6694140 0.0403686 2
Armenia 0.1046748 -0.1644630 0.7245624 -0.3434922 2
Australia 1.6888036 2.5007014 1.1453419 1.4396130 3
Austria 1.3148582 0.8062709 1.0036890 1.6560856 3
Azerbaijan -0.2350715 -0.0368326 0.4873350 -0.2891916 2
Bahrain 0.9571050 0.9035056 0.6390787 0.9582010 2
Bangladesh 0.1475666 -0.3910236 -0.4659696 -0.7233309 2
Belarus 0.1547871 0.5249118 0.9696084 -0.0622427 2
Belgium 1.3528009 2.0137324 1.0395414 1.5905903 3
Benin -1.4462954 -0.9535138 -1.3922662 -0.8252918 1
Bhutan 0.0757291 -0.1281194 -1.1314933 -0.5273580 2
fviz_silhouette(res.agnes,print.summary = F)

silAGNES=data.frame(res.agnes$silinfo$widths)
silAGNES$country=row.names(silAGNES)
poorAGNES=silAGNES[silAGNES$sil_width<0,'country']%>%sort()
poorAGNES
##  [1] "Bahrain"        "Bhutan"         "Cape Verde"     "Czech Republic"
##  [5] "Estonia"        "Greece"         "Kuwait"         "Lithuania"     
##  [9] "Poland"         "Portugal"       "Saudi Arabia"   "Spain"
aggregate(.~ agnes, data=dataClus,mean)
##   agnes EsperanzaVida EscolaridadDuracion EscolaridadPromedio         PBI
## 1     1    -1.1025984          -1.0855778          -1.1832237 -0.81636850
## 2     2     0.2356679           0.2867675           0.3801487 -0.08496801
## 3     3     1.3867428           1.2105608           1.1283409  1.76038883
idhdemo$agnesIDHpoor=idhdemo$Pais%in%poorAGNES
idhdemo$agnesIDH=as.ordered(dataClus$agnes)
dataClus$agnes=NULL

##Estrategia divisiva

fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "diana")

set.seed(123)
res.diana <- hcut(g.dist, k = 4,hc_func='diana')
dataClus$diana=res.diana$cluster
# veamos
head(dataClus,15)%>%kbl%>%kable_styling()
EsperanzaVida EscolaridadDuracion EscolaridadPromedio PBI diana
Afghanistan -1.1720163 -1.1223433 -1.7899045 -0.9036118 1
Albania 0.6655042 0.2825071 0.7113515 -0.2954005 2
Algeria 0.6546035 0.3425723 -0.2580009 -0.4600137 2
Angola -1.2150350 -0.4816372 -1.0570320 -0.7236515 1
Argentina 0.5293798 1.4330952 0.6694140 0.0403686 2
Armenia 0.1046748 -0.1644630 0.7245624 -0.3434922 2
Australia 1.6888036 2.5007014 1.1453419 1.4396130 3
Austria 1.3148582 0.8062709 1.0036890 1.6560856 3
Azerbaijan -0.2350715 -0.0368326 0.4873350 -0.2891916 2
Bahrain 0.9571050 0.9035056 0.6390787 0.9582010 3
Bangladesh 0.1475666 -0.3910236 -0.4659696 -0.7233309 4
Belarus 0.1547871 0.5249118 0.9696084 -0.0622427 2
Belgium 1.3528009 2.0137324 1.0395414 1.5905903 3
Benin -1.4462954 -0.9535138 -1.3922662 -0.8252918 1
Bhutan 0.0757291 -0.1281194 -1.1314933 -0.5273580 4
fviz_silhouette(res.diana,print.summary = F)

silDIANA=data.frame(res.diana$silinfo$widths)
silDIANA$country=row.names(silDIANA)
poorDIANA=silDIANA[silDIANA$sil_width<0,'country']%>%sort()
poorDIANA
## [1] "Azerbaijan"   "Ecuador"      "Mongolia"     "Turkmenistan"