library(htmltab)
WhereDEMO=list(page="https://en.wikipedia.org/wiki/Democracy_Index#Components",
xpath='//*[@id="mw-content-text"]/div[1]/table[6]/tbody')
demo = htmltab(doc = WhereDEMO$page,
which = WhereDEMO$xpath,
encoding = "UTF-8")
demo=demo[,-c(1,2,6)]
newDemo=c("Pais","RegimeType","Score","Electoral","Functioning","participation","culture",'Civilliberties')
names(demo)=newDemo
str(demo)
## 'data.frame': 167 obs. of 8 variables:
## $ Pais : chr " Norway" " New Zealand" " Finland" " Sweden" ...
## $ RegimeType : chr "Full democracy" "Full democracy" "Full democracy" "Full democracy" ...
## $ Score : chr "9.75" "9.37" "9.27" "9.26" ...
## $ Electoral : chr "10.00" "10.00" "10.00" "9.58" ...
## $ Functioning : chr "9.64" "8.93" "9.29" "9.29" ...
## $ participation : chr "10.00" "9.44" "8.89" "8.33" ...
## $ culture : chr "10.00" "8.75" "8.75" "10.00" ...
## $ Civilliberties: chr "9.12" "9.71" "9.41" "9.12" ...
OrdinalVector=c('Authoritarian','Hybrid regime','Flawed democracy','Full democracy')
demo$RegimeType=factor(demo$RegimeType,
levels = OrdinalVector,
ordered = T)
demo[,3:8]=lapply(demo[,3:8],as.numeric)
summary(demo)
## Pais RegimeType Score Electoral
## Length:167 Authoritarian :59 Min. :0.320 Min. : 0.000
## Class :character Hybrid regime :34 1st Qu.:3.210 1st Qu.: 1.460
## Mode :character Flawed democracy:53 Median :5.610 Median : 7.000
## Full democracy :21 Mean :5.281 Mean : 5.628
## 3rd Qu.:7.095 3rd Qu.: 9.170
## Max. :9.750 Max. :10.000
## Functioning participation culture Civilliberties
## Min. :0.000 Min. : 0.000 Min. : 1.25 Min. :0.00
## 1st Qu.:2.605 1st Qu.: 3.890 1st Qu.: 3.75 1st Qu.:3.24
## Median :5.000 Median : 5.560 Median : 5.00 Median :5.59
## Mean :4.640 Mean : 5.393 Mean : 5.38 Mean :5.37
## 3rd Qu.:6.430 3rd Qu.: 6.670 3rd Qu.: 6.25 3rd Qu.:7.65
## Max. :9.640 Max. :10.000 Max. :10.00 Max. :9.71
library(kableExtra)
library(magrittr)
demo[!complete.cases(demo[,-1]),]%>%kbl()%>%
kable_styling(bootstrap_options = "striped", font_size = 10)
|
Pais
|
RegimeType
|
Score
|
Electoral
|
Functioning
|
participation
|
culture
|
Civilliberties
|
demo=demo[complete.cases(demo),]
row.names(demo)=NULL
dataClus=demo[,c(4:8)]
row.names(dataClus)=demo$Pais
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
##
## isFALSE
boxplot(demo[,c(4:8)],horizontal = F,las=2,cex.axis = 0.5)

demo[,c(4:8)]=normalize(demo[,c(4:8)],method='standardize')
cor(demo[,c(4:8)])
## Electoral Functioning participation culture Civilliberties
## Electoral 1.0000000 0.8104563 0.7960529 0.5166325 0.9122255
## Functioning 0.8104563 1.0000000 0.7143889 0.6411997 0.8427678
## participation 0.7960529 0.7143889 1.0000000 0.5640728 0.7937461
## culture 0.5166325 0.6411997 0.5640728 1.0000000 0.6329489
## Civilliberties 0.9122255 0.8427678 0.7937461 0.6329489 1.0000000
library(cluster)
g.dist = daisy(dataClus, metric="gower")
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

set.seed(123)
res.pam=pam(g.dist,7,cluster.only = F)
dataClus$pam=res.pam$cluster
head(dataClus,15)%>%kbl()%>%kable_styling()
|
|
Electoral
|
Functioning
|
participation
|
culture
|
Civilliberties
|
pam
|
|
Norway
|
10.00
|
9.64
|
10.00
|
10.00
|
9.12
|
1
|
|
New Zealand
|
10.00
|
8.93
|
9.44
|
8.75
|
9.71
|
1
|
|
Finland
|
10.00
|
9.29
|
8.89
|
8.75
|
9.41
|
1
|
|
Sweden
|
9.58
|
9.29
|
8.33
|
10.00
|
9.12
|
1
|
|
Iceland
|
10.00
|
8.21
|
8.89
|
9.38
|
9.41
|
1
|
|
Denmark
|
10.00
|
8.93
|
8.33
|
9.38
|
8.82
|
1
|
|
Ireland
|
10.00
|
7.86
|
8.33
|
9.38
|
9.41
|
1
|
|
Taiwan
|
10.00
|
9.64
|
7.78
|
8.13
|
9.41
|
1
|
|
Australia
|
10.00
|
8.57
|
7.78
|
8.75
|
9.41
|
1
|
|
Switzerland
|
9.58
|
8.93
|
7.78
|
9.38
|
8.82
|
1
|
|
Netherlands
|
9.58
|
8.93
|
8.33
|
8.75
|
8.82
|
1
|
|
Canada
|
10.00
|
8.21
|
8.89
|
8.13
|
9.12
|
1
|
|
Uruguay
|
10.00
|
8.57
|
7.22
|
8.75
|
9.71
|
1
|
|
Luxembourg
|
10.00
|
8.57
|
6.67
|
8.75
|
9.41
|
1
|
|
Germany
|
9.58
|
8.21
|
8.33
|
8.13
|
9.12
|
1
|
fviz_silhouette(res.pam,print.summary = F)

silPAM=data.frame(res.pam$silinfo$widths)
silPAM$country=row.names(silPAM)
poorPAM=silPAM[silPAM$sil_width<0,'country']%>%sort()
poorPAM
## [1] " Albania" " Angola" " Eswatini"
## [4] " Ghana" " Guyana" " Hong Kong"
## [7] " Kenya" " Laos" " Mexico"
## [10] " Namibia" " Qatar" " Slovakia"
## [13] " Trinidad and Tobago" " Tunisia" " Turkey"
## [16] " Ukraine"
aggregate(.~ pam, data=dataClus,mean)
## pam Electoral Functioning participation culture Civilliberties
## 1 1 9.888000 8.785333 8.332667 8.960667 9.254667
## 2 2 9.442963 7.014074 6.893704 7.016296 8.214815
## 3 3 8.734333 5.828333 6.240667 4.377667 6.745333
## 4 4 6.463103 4.810345 5.480000 5.217586 5.517241
## 5 5 2.919600 2.446000 4.666000 4.902400 3.988800
## 6 6 0.233871 2.322581 3.458065 4.478387 2.086452
## 7 7 0.692000 0.622000 1.945000 3.065000 0.939000
fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

Último ejercicio
library(rio)
WhereIDH='https://github.com/Estadistica-AnalisisPolitico/DataFiles-estadistica/raw/main/HDR21-22_Statistical_Annex_HDI_Table.xlsx'
idh = rio::import(WhereIDH,skip=4,.name_repair='minimal')
head(idh, 15)%>%kbl()%>%
kable_styling(bootstrap_options = "striped", font_size = 10)
|
|
|
Human Development Index (HDI)
|
|
Life expectancy at birth
|
|
Expected years of schooling
|
|
Mean years of schooling
|
|
Gross national income (GNI) per capita
|
|
GNI per capita rank minus HDI rank
|
|
HDI rank
|
|
HDI rank
|
Country
|
Value
|
NA
|
(years)
|
NA
|
(years)
|
NA
|
(years)
|
NA
|
(2017 PPP $)
|
NA
|
NA
|
NA
|
NA
|
|
NA
|
NA
|
2021
|
NA
|
2021
|
NA
|
2021
|
a
|
2021
|
a
|
2021
|
NA
|
2021
|
b
|
2020
|
|
NA
|
VERY HIGH HUMAN DEVELOPMENT
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
NA
|
|
1
|
Switzerland
|
0.96199999999999997
|
NA
|
83.987200000000001
|
NA
|
16.50029945
|
NA
|
13.85966015
|
NA
|
66933.004539999994
|
NA
|
5
|
NA
|
3
|
|
2
|
Norway
|
0.96099999999999997
|
NA
|
83.233900000000006
|
NA
|
18.185199740000002
|
c
|
13.00362968
|
NA
|
64660.106220000001
|
NA
|
6
|
NA
|
1
|
|
3
|
Iceland
|
0.95899999999999996
|
NA
|
82.678200000000004
|
NA
|
19.163059230000002
|
c
|
13.76716995
|
NA
|
55782.049809999997
|
NA
|
11
|
NA
|
2
|
|
4
|
Hong Kong, China (SAR)
|
0.95199999999999996
|
NA
|
85.473399999999998
|
d
|
17.278169630000001
|
NA
|
12.22620964
|
NA
|
62606.845399999998
|
NA
|
6
|
NA
|
4
|
|
5
|
Australia
|
0.95099999999999996
|
NA
|
84.526499999999999
|
NA
|
21.054590229999999
|
c
|
12.726819989999999
|
NA
|
49238.433349999999
|
NA
|
18
|
NA
|
5
|
|
6
|
Denmark
|
0.94799999999999995
|
NA
|
81.375299999999996
|
NA
|
18.714799880000001
|
c
|
12.96049023
|
NA
|
60364.785949999998
|
NA
|
6
|
NA
|
5
|
|
7
|
Sweden
|
0.94699999999999995
|
NA
|
82.9833
|
NA
|
19.418529509999999
|
c
|
12.609720230000001
|
NA
|
54489.37401
|
NA
|
9
|
NA
|
9
|
|
8
|
Ireland
|
0.94499999999999995
|
NA
|
81.997600000000006
|
NA
|
18.94522095
|
c
|
11.58222303
|
e
|
76168.984429999997
|
f
|
-3
|
NA
|
8
|
|
9
|
Germany
|
0.94199999999999995
|
NA
|
80.630099999999999
|
NA
|
17.010139469999999
|
NA
|
14.090966910000001
|
e
|
54534.216820000001
|
NA
|
6
|
NA
|
7
|
|
10
|
Netherlands
|
0.94099999999999995
|
NA
|
81.687299999999993
|
NA
|
18.693165220000001
|
c,e
|
12.581629749999999
|
NA
|
55979.411
|
NA
|
3
|
NA
|
10
|
|
11
|
Finland
|
0.94
|
NA
|
82.0381
|
NA
|
19.051929470000001
|
c
|
12.87362003
|
NA
|
49452.166720000001
|
NA
|
11
|
NA
|
12
|
|
12
|
Singapore
|
0.93899999999999995
|
NA
|
82.754499999999993
|
NA
|
16.524320599999999
|
NA
|
11.924880030000001
|
NA
|
90918.644709999993
|
f
|
-10
|
NA
|
10
|
idh=idh[,c(2,3,5,7,9,11)]
newIDH=c('Pais','puntuacion','EsperanzaVida','EscolaridadDuracion','EscolaridadPromedio','PBI')
names(idh)=newIDH
idh=idh[c(1:202),]
idh=idh[!is.na(idh$Pais),]
idh[,-1]=lapply(idh[,-1], as.numeric)
## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion
## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion
## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion
## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion
## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion
idh[!complete.cases(idh[,-1]),]%>%kbl()%>%
kable_styling(bootstrap_options = "striped", font_size = 10)
|
|
Pais
|
puntuacion
|
EsperanzaVida
|
EscolaridadDuracion
|
EscolaridadPromedio
|
PBI
|
|
1
|
Country
|
NA
|
NA
|
NA
|
NA
|
NA
|
|
3
|
VERY HIGH HUMAN DEVELOPMENT
|
NA
|
NA
|
NA
|
NA
|
NA
|
|
70
|
HIGH HUMAN DEVELOPMENT
|
NA
|
NA
|
NA
|
NA
|
NA
|
|
120
|
MEDIUM HUMAN DEVELOPMENT
|
NA
|
NA
|
NA
|
NA
|
NA
|
|
165
|
LOW HUMAN DEVELOPMENT
|
NA
|
NA
|
NA
|
NA
|
NA
|
|
198
|
OTHER COUNTRIES OR TERRITORIES
|
NA
|
NA
|
NA
|
NA
|
NA
|
|
199
|
Korea (Democratic People’s Rep. of)
|
NA
|
73.2845
|
10.78317
|
NA
|
NA
|
|
200
|
Monaco
|
NA
|
85.9463
|
NA
|
NA
|
NA
|
|
201
|
Nauru
|
NA
|
63.6170
|
11.69042
|
NA
|
17729.741
|
|
202
|
Somalia
|
NA
|
55.2803
|
NA
|
NA
|
1017.968
|
idh=idh[complete.cases(idh[,-1]),]
row.names(idh)=NULL
idh$Pais= trimws(idh$Pais,whitespace = "[\\h\\v]")
demo$Pais= trimws(demo$Pais,whitespace = "[\\h\\v]")
sort(setdiff(idh$Pais,demo$Pais))
## [1] "Andorra" "Antigua and Barbuda"
## [3] "Bahamas" "Barbados"
## [5] "Belize" "Bolivia (Plurinational State of)"
## [7] "Brunei Darussalam" "Cabo Verde"
## [9] "Congo" "Congo (Democratic Republic of the)"
## [11] "Côte d'Ivoire" "Czechia"
## [13] "Dominica" "Eswatini (Kingdom of)"
## [15] "Grenada" "Hong Kong, China (SAR)"
## [17] "Iran (Islamic Republic of)" "Kiribati"
## [19] "Korea (Republic of)" "Lao People's Democratic Republic"
## [21] "Liechtenstein" "Maldives"
## [23] "Marshall Islands" "Micronesia (Federated States of)"
## [25] "Moldova (Republic of)" "Palau"
## [27] "Palestine, State of" "Russian Federation"
## [29] "Saint Kitts and Nevis" "Saint Lucia"
## [31] "Saint Vincent and the Grenadines" "Samoa"
## [33] "San Marino" "Sao Tome and Principe"
## [35] "Seychelles" "Solomon Islands"
## [37] "South Sudan" "Syrian Arab Republic"
## [39] "Tanzania (United Republic of)" "Timor-Leste"
## [41] "Tonga" "Türkiye"
## [43] "Tuvalu" "Vanuatu"
## [45] "Venezuela (Bolivarian Republic of)" "Viet Nam"
sort(setdiff(demo$Pais,idh$Pais))
## [1] "Bolivia" "Cape Verde"
## [3] "Czech Republic" "Democratic Republic of the Congo"
## [5] "East Timor" "Eswatini"
## [7] "Hong Kong" "Iran"
## [9] "Ivory Coast" "Laos"
## [11] "Moldova" "North Korea"
## [13] "Palestine" "Republic of the Congo"
## [15] "Russia" "South Korea"
## [17] "Syria" "Taiwan"
## [19] "Tanzania" "Turkey"
## [21] "Venezuela" "Vietnam"
idhdemo=merge(idh,demo)
boxplot(idhdemo[,c(3:6, 9:13)],horizontal = F,las=2,cex.axis = 0.5)

idhdemo[,c(3:6, 9:13)]=normalize(idhdemo[,c(3:6, 9:13)],method='standardize')
dataClus2=idhdemo[,c(3:6, 9:13)]
row.names(dataClus2)=idhdemo$Pais
g.dist = daisy(dataClus2, metric="gower")
fviz_nbclust(dataClus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

set.seed(123)
res.agnes<- hcut(g.dist, k = 3,hc_func='agnes',hc_method = "ward.D")
dataClus2$agnes=res.agnes$cluster
fviz_silhouette(res.agnes,print.summary = F)

fviz_nbclust(dataClus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "diana")

set.seed(123)
res.diana <- hcut(g.dist, k = 3,hc_func='diana')
dataClus2$diana=res.diana$cluster
fviz_silhouette(res.diana,print.summary = F)
