library(htmltab)
WhereDEMO=list(page="https://en.wikipedia.org/wiki/Democracy_Index#Components",
               xpath='//*[@id="mw-content-text"]/div[1]/table[6]/tbody')
demo  = htmltab(doc = WhereDEMO$page, 
                which  = WhereDEMO$xpath,
                encoding = "UTF-8")
demo=demo[,-c(1,2,6)]
newDemo=c("Pais","RegimeType","Score","Electoral","Functioning","participation","culture",'Civilliberties')
names(demo)=newDemo
str(demo)
## 'data.frame':    167 obs. of  8 variables:
##  $ Pais          : chr  " Norway" " New Zealand" " Finland" " Sweden" ...
##  $ RegimeType    : chr  "Full democracy" "Full democracy" "Full democracy" "Full democracy" ...
##  $ Score         : chr  "9.75" "9.37" "9.27" "9.26" ...
##  $ Electoral     : chr  "10.00" "10.00" "10.00" "9.58" ...
##  $ Functioning   : chr  "9.64" "8.93" "9.29" "9.29" ...
##  $ participation : chr  "10.00" "9.44" "8.89" "8.33" ...
##  $ culture       : chr  "10.00" "8.75" "8.75" "10.00" ...
##  $ Civilliberties: chr  "9.12" "9.71" "9.41" "9.12" ...
OrdinalVector=c('Authoritarian','Hybrid regime','Flawed democracy','Full democracy')
demo$RegimeType=factor(demo$RegimeType,
                          levels = OrdinalVector,
                          ordered = T)
demo[,3:8]=lapply(demo[,3:8],as.numeric)
summary(demo)
##      Pais                      RegimeType     Score         Electoral     
##  Length:167         Authoritarian   :59   Min.   :0.320   Min.   : 0.000  
##  Class :character   Hybrid regime   :34   1st Qu.:3.210   1st Qu.: 1.460  
##  Mode  :character   Flawed democracy:53   Median :5.610   Median : 7.000  
##                     Full democracy  :21   Mean   :5.281   Mean   : 5.628  
##                                           3rd Qu.:7.095   3rd Qu.: 9.170  
##                                           Max.   :9.750   Max.   :10.000  
##   Functioning    participation       culture      Civilliberties
##  Min.   :0.000   Min.   : 0.000   Min.   : 1.25   Min.   :0.00  
##  1st Qu.:2.605   1st Qu.: 3.890   1st Qu.: 3.75   1st Qu.:3.24  
##  Median :5.000   Median : 5.560   Median : 5.00   Median :5.59  
##  Mean   :4.640   Mean   : 5.393   Mean   : 5.38   Mean   :5.37  
##  3rd Qu.:6.430   3rd Qu.: 6.670   3rd Qu.: 6.25   3rd Qu.:7.65  
##  Max.   :9.640   Max.   :10.000   Max.   :10.00   Max.   :9.71
library(kableExtra)
library(magrittr)
demo[!complete.cases(demo[,-1]),]%>%kbl()%>%
  kable_styling(bootstrap_options = "striped", font_size = 10)
Pais RegimeType Score Electoral Functioning participation culture Civilliberties
demo=demo[complete.cases(demo),]
row.names(demo)=NULL
dataClus=demo[,c(4:8)]
row.names(dataClus)=demo$Pais
library(BBmisc)
## 
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
## 
##     isFALSE
boxplot(demo[,c(4:8)],horizontal = F,las=2,cex.axis = 0.5)

demo[,c(4:8)]=normalize(demo[,c(4:8)],method='standardize')
cor(demo[,c(4:8)])
##                Electoral Functioning participation   culture Civilliberties
## Electoral      1.0000000   0.8104563     0.7960529 0.5166325      0.9122255
## Functioning    0.8104563   1.0000000     0.7143889 0.6411997      0.8427678
## participation  0.7960529   0.7143889     1.0000000 0.5640728      0.7937461
## culture        0.5166325   0.6411997     0.5640728 1.0000000      0.6329489
## Civilliberties 0.9122255   0.8427678     0.7937461 0.6329489      1.0000000
library(cluster)
g.dist = daisy(dataClus, metric="gower")
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

set.seed(123)
res.pam=pam(g.dist,7,cluster.only = F)
dataClus$pam=res.pam$cluster
head(dataClus,15)%>%kbl()%>%kable_styling()
Electoral Functioning participation culture Civilliberties pam
 Norway 10.00 9.64 10.00 10.00 9.12 1
 New Zealand 10.00 8.93 9.44 8.75 9.71 1
 Finland 10.00 9.29 8.89 8.75 9.41 1
 Sweden 9.58 9.29 8.33 10.00 9.12 1
 Iceland 10.00 8.21 8.89 9.38 9.41 1
 Denmark 10.00 8.93 8.33 9.38 8.82 1
 Ireland 10.00 7.86 8.33 9.38 9.41 1
 Taiwan 10.00 9.64 7.78 8.13 9.41 1
 Australia 10.00 8.57 7.78 8.75 9.41 1
 Switzerland 9.58 8.93 7.78 9.38 8.82 1
 Netherlands 9.58 8.93 8.33 8.75 8.82 1
 Canada 10.00 8.21 8.89 8.13 9.12 1
 Uruguay 10.00 8.57 7.22 8.75 9.71 1
 Luxembourg 10.00 8.57 6.67 8.75 9.41 1
 Germany 9.58 8.21 8.33 8.13 9.12 1
fviz_silhouette(res.pam,print.summary = F)

silPAM=data.frame(res.pam$silinfo$widths)
silPAM$country=row.names(silPAM)
poorPAM=silPAM[silPAM$sil_width<0,'country']%>%sort()
poorPAM
##  [1] " Albania"             " Angola"              " Eswatini"           
##  [4] " Ghana"               " Guyana"              " Hong Kong"          
##  [7] " Kenya"               " Laos"                " Mexico"             
## [10] " Namibia"             " Qatar"               " Slovakia"           
## [13] " Trinidad and Tobago" " Tunisia"             " Turkey"             
## [16] " Ukraine"
aggregate(.~ pam, data=dataClus,mean)
##   pam Electoral Functioning participation  culture Civilliberties
## 1   1  9.888000    8.785333      8.332667 8.960667       9.254667
## 2   2  9.442963    7.014074      6.893704 7.016296       8.214815
## 3   3  8.734333    5.828333      6.240667 4.377667       6.745333
## 4   4  6.463103    4.810345      5.480000 5.217586       5.517241
## 5   5  2.919600    2.446000      4.666000 4.902400       3.988800
## 6   6  0.233871    2.322581      3.458065 4.478387       2.086452
## 7   7  0.692000    0.622000      1.945000 3.065000       0.939000
fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

Último ejercicio

library(rio)
WhereIDH='https://github.com/Estadistica-AnalisisPolitico/DataFiles-estadistica/raw/main/HDR21-22_Statistical_Annex_HDI_Table.xlsx'
idh  = rio::import(WhereIDH,skip=4,.name_repair='minimal')
head(idh, 15)%>%kbl()%>%
  kable_styling(bootstrap_options = "striped", font_size = 10)
Human Development Index (HDI) Life expectancy at birth Expected years of schooling Mean years of schooling Gross national income (GNI) per capita GNI per capita rank minus HDI rank HDI rank
HDI rank Country Value NA (years) NA (years) NA (years) NA (2017 PPP $) NA NA NA NA
NA NA 2021 NA 2021 NA 2021 a 2021 a 2021 NA 2021 b 2020
NA VERY HIGH HUMAN DEVELOPMENT NA NA NA NA NA NA NA NA NA NA NA NA NA
1 Switzerland 0.96199999999999997 NA 83.987200000000001 NA 16.50029945 NA 13.85966015 NA 66933.004539999994 NA 5 NA 3
2 Norway 0.96099999999999997 NA 83.233900000000006 NA 18.185199740000002 c 13.00362968 NA 64660.106220000001 NA 6 NA 1
3 Iceland 0.95899999999999996 NA 82.678200000000004 NA 19.163059230000002 c 13.76716995 NA 55782.049809999997 NA 11 NA 2
4 Hong Kong, China (SAR) 0.95199999999999996 NA 85.473399999999998 d 17.278169630000001 NA 12.22620964 NA 62606.845399999998 NA 6 NA 4
5 Australia 0.95099999999999996 NA 84.526499999999999 NA 21.054590229999999 c 12.726819989999999 NA 49238.433349999999 NA 18 NA 5
6 Denmark 0.94799999999999995 NA 81.375299999999996 NA 18.714799880000001 c 12.96049023 NA 60364.785949999998 NA 6 NA 5
7 Sweden 0.94699999999999995 NA 82.9833 NA 19.418529509999999 c 12.609720230000001 NA 54489.37401 NA 9 NA 9
8 Ireland 0.94499999999999995 NA 81.997600000000006 NA 18.94522095 c 11.58222303 e 76168.984429999997 f -3 NA 8
9 Germany 0.94199999999999995 NA 80.630099999999999 NA 17.010139469999999 NA 14.090966910000001 e 54534.216820000001 NA 6 NA 7
10 Netherlands 0.94099999999999995 NA 81.687299999999993 NA 18.693165220000001 c,e 12.581629749999999 NA 55979.411 NA 3 NA 10
11 Finland 0.94 NA 82.0381 NA 19.051929470000001 c 12.87362003 NA 49452.166720000001 NA 11 NA 12
12 Singapore 0.93899999999999995 NA 82.754499999999993 NA 16.524320599999999 NA 11.924880030000001 NA 90918.644709999993 f -10 NA 10
idh=idh[,c(2,3,5,7,9,11)]
newIDH=c('Pais','puntuacion','EsperanzaVida','EscolaridadDuracion','EscolaridadPromedio','PBI')
names(idh)=newIDH
idh=idh[c(1:202),]
idh=idh[!is.na(idh$Pais),]
idh[,-1]=lapply(idh[,-1], as.numeric)
## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion
idh[!complete.cases(idh[,-1]),]%>%kbl()%>%
  kable_styling(bootstrap_options = "striped", font_size = 10)
Pais puntuacion EsperanzaVida EscolaridadDuracion EscolaridadPromedio PBI
1 Country NA NA NA NA NA
3 VERY HIGH HUMAN DEVELOPMENT NA NA NA NA NA
70 HIGH HUMAN DEVELOPMENT NA NA NA NA NA
120 MEDIUM HUMAN DEVELOPMENT NA NA NA NA NA
165 LOW HUMAN DEVELOPMENT NA NA NA NA NA
198 OTHER COUNTRIES OR TERRITORIES NA NA NA NA NA
199 Korea (Democratic People’s Rep. of) NA 73.2845 10.78317 NA NA
200 Monaco NA 85.9463 NA NA NA
201 Nauru NA 63.6170 11.69042 NA 17729.741
202 Somalia NA 55.2803 NA NA 1017.968
idh=idh[complete.cases(idh[,-1]),]
row.names(idh)=NULL
idh$Pais= trimws(idh$Pais,whitespace = "[\\h\\v]")
demo$Pais= trimws(demo$Pais,whitespace = "[\\h\\v]") 
sort(setdiff(idh$Pais,demo$Pais))
##  [1] "Andorra"                            "Antigua and Barbuda"               
##  [3] "Bahamas"                            "Barbados"                          
##  [5] "Belize"                             "Bolivia (Plurinational State of)"  
##  [7] "Brunei Darussalam"                  "Cabo Verde"                        
##  [9] "Congo"                              "Congo (Democratic Republic of the)"
## [11] "Côte d'Ivoire"                      "Czechia"                           
## [13] "Dominica"                           "Eswatini (Kingdom of)"             
## [15] "Grenada"                            "Hong Kong, China (SAR)"            
## [17] "Iran (Islamic Republic of)"         "Kiribati"                          
## [19] "Korea (Republic of)"                "Lao People's Democratic Republic"  
## [21] "Liechtenstein"                      "Maldives"                          
## [23] "Marshall Islands"                   "Micronesia (Federated States of)"  
## [25] "Moldova (Republic of)"              "Palau"                             
## [27] "Palestine, State of"                "Russian Federation"                
## [29] "Saint Kitts and Nevis"              "Saint Lucia"                       
## [31] "Saint Vincent and the Grenadines"   "Samoa"                             
## [33] "San Marino"                         "Sao Tome and Principe"             
## [35] "Seychelles"                         "Solomon Islands"                   
## [37] "South Sudan"                        "Syrian Arab Republic"              
## [39] "Tanzania (United Republic of)"      "Timor-Leste"                       
## [41] "Tonga"                              "Türkiye"                           
## [43] "Tuvalu"                             "Vanuatu"                           
## [45] "Venezuela (Bolivarian Republic of)" "Viet Nam"
sort(setdiff(demo$Pais,idh$Pais))
##  [1] "Bolivia"                          "Cape Verde"                      
##  [3] "Czech Republic"                   "Democratic Republic of the Congo"
##  [5] "East Timor"                       "Eswatini"                        
##  [7] "Hong Kong"                        "Iran"                            
##  [9] "Ivory Coast"                      "Laos"                            
## [11] "Moldova"                          "North Korea"                     
## [13] "Palestine"                        "Republic of the Congo"           
## [15] "Russia"                           "South Korea"                     
## [17] "Syria"                            "Taiwan"                          
## [19] "Tanzania"                         "Turkey"                          
## [21] "Venezuela"                        "Vietnam"
idhdemo=merge(idh,demo)
boxplot(idhdemo[,c(3:6, 9:13)],horizontal = F,las=2,cex.axis = 0.5)

idhdemo[,c(3:6, 9:13)]=normalize(idhdemo[,c(3:6, 9:13)],method='standardize')
dataClus2=idhdemo[,c(3:6, 9:13)]
row.names(dataClus2)=idhdemo$Pais
g.dist = daisy(dataClus2, metric="gower")
fviz_nbclust(dataClus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

set.seed(123)
res.agnes<- hcut(g.dist, k = 3,hc_func='agnes',hc_method = "ward.D")

dataClus2$agnes=res.agnes$cluster
fviz_silhouette(res.agnes,print.summary = F)

fviz_nbclust(dataClus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "diana")

set.seed(123)
res.diana <- hcut(g.dist, k = 3,hc_func='diana')
dataClus2$diana=res.diana$cluster
fviz_silhouette(res.diana,print.summary = F)