Practica 4

library(htmltab)
WhereDEMO=list(page="https://en.wikipedia.org/wiki/Democracy_Index#Components",
               xpath='//*[@id="mw-content-text"]/div[1]/table[6]/tbody')
demo  = htmltab(doc = WhereDEMO$page, 
                which  = WhereDEMO$xpath,
                encoding = "UTF-8")

demo=demo[,-c(1,2,6)]
newDemo=c("Pais","RegimeType","Score","Electoral","Functioning","participation","culture",'Civilliberties')
names(demo)=newDemo
str(demo)

## 'data.frame':    167 obs. of  8 variables:
##  $ Pais          : chr  " Norway" " New Zealand" " Finland" " Sweden" ...
##  $ RegimeType    : chr  "Full democracy" "Full democracy" "Full democracy" "Full democracy" ...
##  $ Score         : chr  "9.75" "9.37" "9.27" "9.26" ...
##  $ Electoral     : chr  "10.00" "10.00" "10.00" "9.58" ...
##  $ Functioning   : chr  "9.64" "8.93" "9.29" "9.29" ...
##  $ participation : chr  "10.00" "9.44" "8.89" "8.33" ...
##  $ culture       : chr  "10.00" "8.75" "8.75" "10.00" ...
##  $ Civilliberties: chr  "9.12" "9.71" "9.41" "9.12" ...

OrdinalVector=c('Authoritarian','Hybrid regime','Flawed democracy','Full democracy')
demo$RegimeType=factor(demo$RegimeType,
                          levels = OrdinalVector,
                          ordered = T)
demo[,3:8]=lapply(demo[,3:8],as.numeric)

summary(demo)

##      Pais                      RegimeType     Score         Electoral     
##  Length:167         Authoritarian   :59   Min.   :0.320   Min.   : 0.000  
##  Class :character   Hybrid regime   :34   1st Qu.:3.210   1st Qu.: 1.460  
##  Mode  :character   Flawed democracy:53   Median :5.610   Median : 7.000  
##                     Full democracy  :21   Mean   :5.281   Mean   : 5.628  
##                                           3rd Qu.:7.095   3rd Qu.: 9.170  
##                                           Max.   :9.750   Max.   :10.000  
##   Functioning    participation       culture      Civilliberties
##  Min.   :0.000   Min.   : 0.000   Min.   : 1.25   Min.   :0.00  
##  1st Qu.:2.605   1st Qu.: 3.890   1st Qu.: 3.75   1st Qu.:3.24  
##  Median :5.000   Median : 5.560   Median : 5.00   Median :5.59  
##  Mean   :4.640   Mean   : 5.393   Mean   : 5.38   Mean   :5.37  
##  3rd Qu.:6.430   3rd Qu.: 6.670   3rd Qu.: 6.25   3rd Qu.:7.65  
##  Max.   :9.640   Max.   :10.000   Max.   :10.00   Max.   :9.71

library(kableExtra)
library(magrittr)
demo[!complete.cases(demo[,-1]),]%>%kbl()%>%
  kable_styling(bootstrap_options = "striped", font_size = 10)

Pais	RegimeType	Score	Electoral	Functioning	participation	culture	Civilliberties

demo=demo[complete.cases(demo),]
row.names(demo)=NULL

dataClus=demo[,c(4:8)]
row.names(dataClus)=demo$Pais

library(BBmisc)

## 
## Attaching package: 'BBmisc'

## The following object is masked from 'package:base':
## 
##     isFALSE

boxplot(demo[,c(4:8)],horizontal = F,las=2,cex.axis = 0.5)

demo[,c(4:8)]=normalize(demo[,c(4:8)],method='standardize')

cor(demo[,c(4:8)])

##                Electoral Functioning participation   culture Civilliberties
## Electoral      1.0000000   0.8104563     0.7960529 0.5166325      0.9122255
## Functioning    0.8104563   1.0000000     0.7143889 0.6411997      0.8427678
## participation  0.7960529   0.7143889     1.0000000 0.5640728      0.7937461
## culture        0.5166325   0.6411997     0.5640728 1.0000000      0.6329489
## Civilliberties 0.9122255   0.8427678     0.7937461 0.6329489      1.0000000

library(cluster)
g.dist = daisy(dataClus, metric="gower")

library(factoextra)

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

set.seed(123)
res.pam=pam(g.dist,7,cluster.only = F)
dataClus$pam=res.pam$cluster
head(dataClus,15)%>%kbl()%>%kable_styling()

	Electoral	Functioning	participation	culture	Civilliberties	pam
Norway	10.00	9.64	10.00	10.00	9.12	1
New Zealand	10.00	8.93	9.44	8.75	9.71	1
Finland	10.00	9.29	8.89	8.75	9.41	1
Sweden	9.58	9.29	8.33	10.00	9.12	1
Iceland	10.00	8.21	8.89	9.38	9.41	1
Denmark	10.00	8.93	8.33	9.38	8.82	1
Ireland	10.00	7.86	8.33	9.38	9.41	1
Taiwan	10.00	9.64	7.78	8.13	9.41	1
Australia	10.00	8.57	7.78	8.75	9.41	1
Switzerland	9.58	8.93	7.78	9.38	8.82	1
Netherlands	9.58	8.93	8.33	8.75	8.82	1
Canada	10.00	8.21	8.89	8.13	9.12	1
Uruguay	10.00	8.57	7.22	8.75	9.71	1
Luxembourg	10.00	8.57	6.67	8.75	9.41	1
Germany	9.58	8.21	8.33	8.13	9.12	1

fviz_silhouette(res.pam,print.summary = F)

silPAM=data.frame(res.pam$silinfo$widths)
silPAM$country=row.names(silPAM)
poorPAM=silPAM[silPAM$sil_width<0,'country']%>%sort()
poorPAM

##  [1] " Albania"             " Angola"              " Eswatini"           
##  [4] " Ghana"               " Guyana"              " Hong Kong"          
##  [7] " Kenya"               " Laos"                " Mexico"             
## [10] " Namibia"             " Qatar"               " Slovakia"           
## [13] " Trinidad and Tobago" " Tunisia"             " Turkey"             
## [16] " Ukraine"

aggregate(.~ pam, data=dataClus,mean)

##   pam Electoral Functioning participation  culture Civilliberties
## 1   1  9.888000    8.785333      8.332667 8.960667       9.254667
## 2   2  9.442963    7.014074      6.893704 7.016296       8.214815
## 3   3  8.734333    5.828333      6.240667 4.377667       6.745333
## 4   4  6.463103    4.810345      5.480000 5.217586       5.517241
## 5   5  2.919600    2.446000      4.666000 4.902400       3.988800
## 6   6  0.233871    2.322581      3.458065 4.478387       2.086452
## 7   7  0.692000    0.622000      1.945000 3.065000       0.939000

fviz_nbclust(dataClus, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

Último ejercicio

library(rio)
WhereIDH='https://github.com/Estadistica-AnalisisPolitico/DataFiles-estadistica/raw/main/HDR21-22_Statistical_Annex_HDI_Table.xlsx'
idh  = rio::import(WhereIDH,skip=4,.name_repair='minimal')
head(idh, 15)%>%kbl()%>%
  kable_styling(bootstrap_options = "striped", font_size = 10)

		Human Development Index (HDI)		Life expectancy at birth		Expected years of schooling		Mean years of schooling		Gross national income (GNI) per capita		GNI per capita rank minus HDI rank		HDI rank
HDI rank	Country	Value	NA	(years)	NA	(years)	NA	(years)	NA	(2017 PPP $)	NA	NA	NA	NA
NA	NA	2021	NA	2021	NA	2021	a	2021	a	2021	NA	2021	b	2020
NA	VERY HIGH HUMAN DEVELOPMENT	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA
1	Switzerland	0.96199999999999997	NA	83.987200000000001	NA	16.50029945	NA	13.85966015	NA	66933.004539999994	NA	5	NA	3
2	Norway	0.96099999999999997	NA	83.233900000000006	NA	18.185199740000002	c	13.00362968	NA	64660.106220000001	NA	6	NA	1
3	Iceland	0.95899999999999996	NA	82.678200000000004	NA	19.163059230000002	c	13.76716995	NA	55782.049809999997	NA	11	NA	2
4	Hong Kong, China (SAR)	0.95199999999999996	NA	85.473399999999998	d	17.278169630000001	NA	12.22620964	NA	62606.845399999998	NA	6	NA	4
5	Australia	0.95099999999999996	NA	84.526499999999999	NA	21.054590229999999	c	12.726819989999999	NA	49238.433349999999	NA	18	NA	5
6	Denmark	0.94799999999999995	NA	81.375299999999996	NA	18.714799880000001	c	12.96049023	NA	60364.785949999998	NA	6	NA	5
7	Sweden	0.94699999999999995	NA	82.9833	NA	19.418529509999999	c	12.609720230000001	NA	54489.37401	NA	9	NA	9
8	Ireland	0.94499999999999995	NA	81.997600000000006	NA	18.94522095	c	11.58222303	e	76168.984429999997	f	-3	NA	8
9	Germany	0.94199999999999995	NA	80.630099999999999	NA	17.010139469999999	NA	14.090966910000001	e	54534.216820000001	NA	6	NA	7
10	Netherlands	0.94099999999999995	NA	81.687299999999993	NA	18.693165220000001	c,e	12.581629749999999	NA	55979.411	NA	3	NA	10
11	Finland	0.94	NA	82.0381	NA	19.051929470000001	c	12.87362003	NA	49452.166720000001	NA	11	NA	12
12	Singapore	0.93899999999999995	NA	82.754499999999993	NA	16.524320599999999	NA	11.924880030000001	NA	90918.644709999993	f	-10	NA	10

idh=idh[,c(2,3,5,7,9,11)]
newIDH=c('Pais','puntuacion','EsperanzaVida','EscolaridadDuracion','EscolaridadPromedio','PBI')
names(idh)=newIDH
idh=idh[c(1:202),]
idh=idh[!is.na(idh$Pais),]

idh[,-1]=lapply(idh[,-1], as.numeric)

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

idh[!complete.cases(idh[,-1]),]%>%kbl()%>%
  kable_styling(bootstrap_options = "striped", font_size = 10)

	Pais	puntuacion	EsperanzaVida	EscolaridadDuracion	EscolaridadPromedio	PBI
1	Country	NA	NA	NA	NA	NA
3	VERY HIGH HUMAN DEVELOPMENT	NA	NA	NA	NA	NA
70	HIGH HUMAN DEVELOPMENT	NA	NA	NA	NA	NA
120	MEDIUM HUMAN DEVELOPMENT	NA	NA	NA	NA	NA
165	LOW HUMAN DEVELOPMENT	NA	NA	NA	NA	NA
198	OTHER COUNTRIES OR TERRITORIES	NA	NA	NA	NA	NA
199	Korea (Democratic People’s Rep. of)	NA	73.2845	10.78317	NA	NA
200	Monaco	NA	85.9463	NA	NA	NA
201	Nauru	NA	63.6170	11.69042	NA	17729.741
202	Somalia	NA	55.2803	NA	NA	1017.968

idh=idh[complete.cases(idh[,-1]),]
row.names(idh)=NULL

idh$Pais= trimws(idh$Pais,whitespace = "[\\h\\v]")
demo$Pais= trimws(demo$Pais,whitespace = "[\\h\\v]")

sort(setdiff(idh$Pais,demo$Pais))

##  [1] "Andorra"                            "Antigua and Barbuda"               
##  [3] "Bahamas"                            "Barbados"                          
##  [5] "Belize"                             "Bolivia (Plurinational State of)"  
##  [7] "Brunei Darussalam"                  "Cabo Verde"                        
##  [9] "Congo"                              "Congo (Democratic Republic of the)"
## [11] "Côte d'Ivoire"                      "Czechia"                           
## [13] "Dominica"                           "Eswatini (Kingdom of)"             
## [15] "Grenada"                            "Hong Kong, China (SAR)"            
## [17] "Iran (Islamic Republic of)"         "Kiribati"                          
## [19] "Korea (Republic of)"                "Lao People's Democratic Republic"  
## [21] "Liechtenstein"                      "Maldives"                          
## [23] "Marshall Islands"                   "Micronesia (Federated States of)"  
## [25] "Moldova (Republic of)"              "Palau"                             
## [27] "Palestine, State of"                "Russian Federation"                
## [29] "Saint Kitts and Nevis"              "Saint Lucia"                       
## [31] "Saint Vincent and the Grenadines"   "Samoa"                             
## [33] "San Marino"                         "Sao Tome and Principe"             
## [35] "Seychelles"                         "Solomon Islands"                   
## [37] "South Sudan"                        "Syrian Arab Republic"              
## [39] "Tanzania (United Republic of)"      "Timor-Leste"                       
## [41] "Tonga"                              "Türkiye"                           
## [43] "Tuvalu"                             "Vanuatu"                           
## [45] "Venezuela (Bolivarian Republic of)" "Viet Nam"

sort(setdiff(demo$Pais,idh$Pais))

##  [1] "Bolivia"                          "Cape Verde"                      
##  [3] "Czech Republic"                   "Democratic Republic of the Congo"
##  [5] "East Timor"                       "Eswatini"                        
##  [7] "Hong Kong"                        "Iran"                            
##  [9] "Ivory Coast"                      "Laos"                            
## [11] "Moldova"                          "North Korea"                     
## [13] "Palestine"                        "Republic of the Congo"           
## [15] "Russia"                           "South Korea"                     
## [17] "Syria"                            "Taiwan"                          
## [19] "Tanzania"                         "Turkey"                          
## [21] "Venezuela"                        "Vietnam"

idhdemo=merge(idh,demo)

boxplot(idhdemo[,c(3:6, 9:13)],horizontal = F,las=2,cex.axis = 0.5)

idhdemo[,c(3:6, 9:13)]=normalize(idhdemo[,c(3:6, 9:13)],method='standardize')

dataClus2=idhdemo[,c(3:6, 9:13)]
row.names(dataClus2)=idhdemo$Pais

g.dist = daisy(dataClus2, metric="gower")

fviz_nbclust(dataClus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "agnes")

set.seed(123)
res.agnes<- hcut(g.dist, k = 3,hc_func='agnes',hc_method = "ward.D")

dataClus2$agnes=res.agnes$cluster
fviz_silhouette(res.agnes,print.summary = F)

fviz_nbclust(dataClus2, hcut,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F,hc_func = "diana")

set.seed(123)
res.diana <- hcut(g.dist, k = 3,hc_func='diana')
dataClus2$diana=res.diana$cluster
fviz_silhouette(res.diana,print.summary = F)

Practica 4

Luis Dávila

2022-11-11

Último ejercicio