library(rvest)
idh = "https://en.wikipedia.org/wiki/List_of_countries_by_Human_Development_Index"
fullxpath = '/html/body/div[2]/div/div[3]/main/div[3]/div[3]/div[1]/table[2]'
idh = read_html(idh)
idh = html_nodes(idh, xpath= fullxpath)
idh = html_table (idh)
idh = data.frame(idh)
library(rvest)
demo = "https://en.wikipedia.org/wiki/The_Economist_Democracy_Index"
fullxpath = '/html/body/div[2]/div/div[3]/main/div[3]/div[3]/div[1]/table[6]'
demo = read_html(demo)
demo = html_nodes(demo, xpath= fullxpath)
demo = html_table (demo)
demo = data.frame(demo)
str(idh)
## 'data.frame': 192 obs. of 5 variables:
## $ Rank : chr "2021 data (2022 report)[2]" "1" "2" "3" ...
## $ Rank.1: chr "Change since 2015[20]" "" "" "" ...
## $ Nation: chr "Nation" "Switzerland" "Norway" "Iceland" ...
## $ HDI : chr "2021 data (2022 report)[2]" "0.962" "0.961" "0.959" ...
## $ HDI.1 : chr "Average annual growth (2010–2021)[20]" "0.19%" "0.19%" "0.56%" ...
names(idh)
## [1] "Rank" "Rank.1" "Nation" "HDI" "HDI.1"
str(demo)
## 'data.frame': 172 obs. of 11 variables:
## $ Rank : chr "" "Full democracies" "1" "2" ...
## $ .mw.parser.output..tooltip.dotted.border.bottom.1px.dotted.cursor.help.Δ.Rank: chr "" "Full democracies" "" "" ...
## $ Country : chr "" "Full democracies" "Norway" "New Zealand" ...
## $ Regime.type : chr "" "Full democracies" "Full democracy" "Full democracy" ...
## $ Overall.score : chr "" "Full democracies" "9.81" "9.61" ...
## $ Δ.Score : chr "" "Full democracies" "0.06" "0.14" ...
## $ Elec.toral.pro.cessand.plura.lism : chr "" "Full democracies" "10.00" "10.00" ...
## $ Func.tioningof.govern.ment : chr "" "Full democracies" "9.64" "9.29" ...
## $ Poli.ticalpartici.pation : chr "" "Full democracies" "10.00" "10.00" ...
## $ Poli.ticalcul.ture : chr "" "Full democracies" "10.00" "8.75" ...
## $ Civilliber.ties : chr "" "Full democracies" "9.41" "10.00" ...
names(demo)
## [1] "Rank"
## [2] ".mw.parser.output..tooltip.dotted.border.bottom.1px.dotted.cursor.help.Δ.Rank"
## [3] "Country"
## [4] "Regime.type"
## [5] "Overall.score"
## [6] "Δ.Score"
## [7] "Elec.toral.pro.cessand.plura.lism"
## [8] "Func.tioningof.govern.ment"
## [9] "Poli.ticalpartici.pation"
## [10] "Poli.ticalcul.ture"
## [11] "Civilliber.ties"
#seleccionando columns
idh=idh[,c(3,4,5)]
demo=demo[,-c(1,2)]
# recombrando columns
newDemo=c("Pais","RegimeType","Score","Electoral","Functioning","participation","culture",'Civilliberties')
newIDH=c('Pais','puntuacion','PBI')
names(demo)=newDemo
names(idh)=newIDH
#seleccionando filas
idh=idh[c(1:202),]
idh=idh[!is.na(idh$Pais),]
# tipo de datos
str(demo)
## 'data.frame': 172 obs. of 9 variables:
## $ Pais : chr "" "Full democracies" "Norway" "New Zealand" ...
## $ RegimeType : chr "" "Full democracies" "Full democracy" "Full democracy" ...
## $ Score : chr "" "Full democracies" "9.81" "9.61" ...
## $ Electoral : chr "" "Full democracies" "0.06" "0.14" ...
## $ Functioning : chr "" "Full democracies" "10.00" "10.00" ...
## $ participation : chr "" "Full democracies" "9.64" "9.29" ...
## $ culture : chr "" "Full democracies" "10.00" "10.00" ...
## $ Civilliberties: chr "" "Full democracies" "10.00" "8.75" ...
## $ NA : chr "" "Full democracies" "9.41" "10.00" ...
str(idh)
## 'data.frame': 192 obs. of 3 variables:
## $ Pais : chr "Nation" "Switzerland" "Norway" "Iceland" ...
## $ puntuacion: chr "2021 data (2022 report)[2]" "0.962" "0.961" "0.959" ...
## $ PBI : chr "Average annual growth (2010–2021)[20]" "0.19%" "0.19%" "0.56%" ...
# formateo: texto a ordinal
OrdinalVector=c('Authoritarian','Hybrid regime','Flawed democracy','Full democracy')
demo$RegimeType=factor(demo$RegimeType,
levels = OrdinalVector,
ordered = T)
# formateo: texto a numero
idh[,-1]=lapply(idh[,-1], as.numeric)
## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion
## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion
demo[,3:8]=lapply(demo[,3:8],as.numeric)
## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion
## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion
## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion
## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion
## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion
## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion
library(kableExtra)
idh[!complete.cases(idh[,-1]),]%>%kbl()%>%
kable_styling(bootstrap_options = "striped", font_size = 10)
|
Pais
|
puntuacion
|
PBI
|
|
Nation
|
NA
|
NA
|
|
Switzerland
|
0.962
|
NA
|
|
Norway
|
0.961
|
NA
|
|
Iceland
|
0.959
|
NA
|
|
Hong Kong
|
0.952
|
NA
|
|
Australia
|
0.951
|
NA
|
|
Denmark
|
0.948
|
NA
|
|
Sweden
|
0.947
|
NA
|
|
Ireland
|
0.945
|
NA
|
|
Germany
|
0.942
|
NA
|
|
Netherlands
|
0.941
|
NA
|
|
Finland
|
0.940
|
NA
|
|
Singapore
|
0.939
|
NA
|
|
Belgium
|
0.937
|
NA
|
|
New Zealand
|
0.937
|
NA
|
|
Canada
|
0.936
|
NA
|
|
Liechtenstein
|
0.935
|
NA
|
|
Luxembourg
|
0.930
|
NA
|
|
United Kingdom
|
0.929
|
NA
|
|
Japan
|
0.925
|
NA
|
|
South Korea
|
0.925
|
NA
|
|
United States
|
0.921
|
NA
|
|
Israel
|
0.919
|
NA
|
|
Malta
|
0.918
|
NA
|
|
Slovenia
|
0.918
|
NA
|
|
Austria
|
0.916
|
NA
|
|
United Arab Emirates
|
0.911
|
NA
|
|
Spain
|
0.905
|
NA
|
|
France
|
0.903
|
NA
|
|
Cyprus
|
0.896
|
NA
|
|
Italy
|
0.895
|
NA
|
|
Estonia
|
0.890
|
NA
|
|
Czech Republic
|
0.889
|
NA
|
|
Greece
|
0.887
|
NA
|
|
Poland
|
0.876
|
NA
|
|
Bahrain
|
0.875
|
NA
|
|
Lithuania
|
0.875
|
NA
|
|
Saudi Arabia
|
0.875
|
NA
|
|
Portugal
|
0.866
|
NA
|
|
Latvia
|
0.863
|
NA
|
|
Andorra
|
0.858
|
NA
|
|
Croatia
|
0.858
|
NA
|
|
Chile
|
0.855
|
NA
|
|
Qatar
|
0.855
|
NA
|
|
San Marino
|
0.853
|
NA
|
|
Slovakia
|
0.848
|
NA
|
|
Hungary
|
0.846
|
NA
|
|
Argentina
|
0.842
|
NA
|
|
Turkey
|
0.838
|
NA
|
|
Montenegro
|
0.832
|
NA
|
|
Kuwait
|
0.831
|
NA
|
|
Brunei
|
0.829
|
NA
|
|
Russia
|
0.822
|
NA
|
|
Romania
|
0.821
|
NA
|
|
Oman
|
0.816
|
NA
|
|
Bahamas
|
0.812
|
NA
|
|
Kazakhstan
|
0.811
|
NA
|
|
Trinidad and Tobago
|
0.810
|
NA
|
|
Costa Rica
|
0.809
|
NA
|
|
Uruguay
|
0.809
|
NA
|
|
Belarus
|
0.808
|
NA
|
|
Panama
|
0.805
|
NA
|
|
Malaysia
|
0.803
|
NA
|
|
Georgia
|
0.802
|
NA
|
|
Mauritius
|
0.802
|
NA
|
|
Serbia
|
0.802
|
NA
|
|
Thailand
|
0.800
|
NA
|
|
Albania
|
0.796
|
NA
|
|
Bulgaria
|
0.795
|
NA
|
|
Grenada
|
0.795
|
NA
|
|
Barbados
|
0.790
|
NA
|
|
Antigua and Barbuda
|
0.788
|
NA
|
|
Seychelles
|
0.785
|
NA
|
|
Sri Lanka
|
0.782
|
NA
|
|
Bosnia and Herzegovina
|
0.780
|
NA
|
|
Saint Kitts and Nevis
|
0.777
|
NA
|
|
Iran
|
0.774
|
NA
|
|
Ukraine
|
0.773
|
NA
|
|
North Macedonia
|
0.770
|
NA
|
|
China
|
0.768
|
NA
|
|
Dominican Republic
|
0.767
|
NA
|
|
Moldova
|
0.767
|
NA
|
|
Palau
|
0.767
|
NA
|
|
Cuba
|
0.764
|
NA
|
|
Peru
|
0.762
|
NA
|
|
Armenia
|
0.759
|
NA
|
|
Mexico
|
0.758
|
NA
|
|
Brazil
|
0.754
|
NA
|
|
Colombia
|
0.752
|
NA
|
|
Saint Vincent and the Grenadines
|
0.751
|
NA
|
|
Maldives
|
0.747
|
NA
|
|
Algeria
|
0.745
|
NA
|
|
Azerbaijan
|
0.745
|
NA
|
|
Tonga
|
0.745
|
NA
|
|
Turkmenistan
|
0.745
|
NA
|
|
Ecuador
|
0.740
|
NA
|
|
Mongolia
|
0.739
|
NA
|
|
Egypt
|
0.731
|
NA
|
|
Tunisia
|
0.731
|
NA
|
|
Fiji
|
0.730
|
NA
|
|
Suriname
|
0.730
|
NA
|
|
Uzbekistan
|
0.727
|
NA
|
|
Dominica
|
0.720
|
NA
|
|
Jordan
|
0.720
|
NA
|
|
Libya
|
0.718
|
NA
|
|
Paraguay
|
0.717
|
NA
|
|
Palestine
|
0.715
|
NA
|
|
Saint Lucia
|
0.715
|
NA
|
|
Guyana
|
0.714
|
NA
|
|
South Africa
|
0.713
|
NA
|
|
Jamaica
|
0.709
|
NA
|
|
Samoa
|
0.707
|
NA
|
|
Gabon
|
0.706
|
NA
|
|
Lebanon
|
0.706
|
NA
|
|
Indonesia
|
0.705
|
NA
|
|
Vietnam
|
0.703
|
NA
|
|
Philippines
|
0.699
|
NA
|
|
Botswana
|
0.693
|
NA
|
|
Bolivia
|
0.692
|
NA
|
|
Kyrgyzstan
|
0.692
|
NA
|
|
Venezuela
|
0.691
|
NA
|
|
Iraq
|
0.686
|
NA
|
|
Tajikistan
|
0.685
|
NA
|
|
Belize
|
0.683
|
NA
|
|
Morocco
|
0.683
|
NA
|
|
El Salvador
|
0.675
|
NA
|
|
Nicaragua
|
0.667
|
NA
|
|
Bhutan
|
0.666
|
NA
|
|
Cape Verde
|
0.662
|
NA
|
|
Bangladesh
|
0.661
|
NA
|
|
Tuvalu
|
0.641
|
NA
|
|
Marshall Islands
|
0.639
|
NA
|
|
India
|
0.633
|
NA
|
|
Ghana
|
0.632
|
NA
|
|
Micronesia
|
0.628
|
NA
|
|
Guatemala
|
0.627
|
NA
|
|
Kiribati
|
0.624
|
NA
|
|
Honduras
|
0.621
|
NA
|
|
Sao Tome and Principe
|
0.618
|
NA
|
|
Namibia
|
0.615
|
NA
|
|
Laos
|
0.607
|
NA
|
|
East Timor
|
0.607
|
NA
|
|
Vanuatu
|
0.607
|
NA
|
|
Nepal
|
0.602
|
NA
|
|
Eswatini
|
0.597
|
NA
|
|
Equatorial Guinea
|
0.596
|
NA
|
|
Cambodia
|
0.593
|
NA
|
|
Zimbabwe
|
0.593
|
NA
|
|
Angola
|
0.586
|
NA
|
|
Myanmar
|
0.585
|
NA
|
|
Syria
|
0.577
|
NA
|
|
Cameroon
|
0.576
|
NA
|
|
Kenya
|
0.575
|
NA
|
|
Republic of the Congo
|
0.571
|
NA
|
|
Zambia
|
0.565
|
NA
|
|
Solomon Islands
|
0.564
|
NA
|
|
Comoros
|
0.558
|
NA
|
|
Papua New Guinea
|
0.558
|
NA
|
|
Mauritania
|
0.556
|
NA
|
|
Ivory Coast
|
0.550
|
NA
|
|
Tanzania
|
0.549
|
NA
|
|
Pakistan
|
0.544
|
NA
|
|
Togo
|
0.539
|
NA
|
|
Haiti
|
0.535
|
NA
|
|
Nigeria
|
0.535
|
NA
|
|
Rwanda
|
0.534
|
NA
|
|
Benin
|
0.525
|
NA
|
|
Uganda
|
0.525
|
NA
|
|
Lesotho
|
0.514
|
NA
|
|
Malawi
|
0.512
|
NA
|
|
Senegal
|
0.511
|
NA
|
|
Djibouti
|
0.509
|
NA
|
|
Sudan
|
0.508
|
NA
|
|
Madagascar
|
0.501
|
NA
|
|
Gambia
|
0.500
|
NA
|
|
Ethiopia
|
0.498
|
NA
|
|
Eritrea
|
0.492
|
NA
|
|
Guinea-Bissau
|
0.483
|
NA
|
|
Liberia
|
0.481
|
NA
|
|
Democratic Republic of the Congo
|
0.479
|
NA
|
|
Afghanistan
|
0.478
|
NA
|
|
Sierra Leone
|
0.477
|
NA
|
|
Guinea
|
0.465
|
NA
|
|
Yemen
|
0.455
|
NA
|
|
Burkina Faso
|
0.449
|
NA
|
|
Mozambique
|
0.446
|
NA
|
|
Mali
|
0.428
|
NA
|
|
Burundi
|
0.426
|
NA
|
|
Central African Republic
|
0.404
|
NA
|
|
Niger
|
0.400
|
NA
|
|
Chad
|
0.394
|
NA
|
|
South Sudan
|
0.385
|
NA
|
idh$Pais= trimws(idh$Pais,whitespace = "[\\h\\v]")
demo$Pais= trimws(demo$Pais,whitespace = "[\\h\\v]")
sort(setdiff(idh$Pais,demo$Pais))
## [1] "Andorra" "Antigua and Barbuda"
## [3] "Bahamas" "Barbados"
## [5] "Belize" "Brunei"
## [7] "Dominica" "Grenada"
## [9] "Kiribati" "Liechtenstein"
## [11] "Maldives" "Marshall Islands"
## [13] "Micronesia" "Nation"
## [15] "Palau" "Saint Kitts and Nevis"
## [17] "Saint Lucia" "Saint Vincent and the Grenadines"
## [19] "Samoa" "San Marino"
## [21] "Sao Tome and Principe" "Seychelles"
## [23] "Solomon Islands" "South Sudan"
## [25] "Tonga" "Tuvalu"
## [27] "Vanuatu"
sort(setdiff(demo$Pais,idh$Pais))
## [1] "" "Authoritarian regimes" "Flawed democracies"
## [4] "Full democracies" "Hybrid regimes" "North Korea"
## [7] "Taiwan"
set.seed(123)
idhdemo=merge(idh,demo)
summary(idhdemo)
## Pais puntuacion PBI RegimeType
## Length:165 Min. :0.3940 Min. : NA Authoritarian :58
## Class :character 1st Qu.:0.5860 1st Qu.: NA Hybrid regime :36
## Mode :character Median :0.7310 Median : NA Flawed democracy:48
## Mean :0.7204 Mean :NaN Full democracy :23
## 3rd Qu.:0.8480 3rd Qu.: NA
## Max. :0.9620 Max. : NA
## NA's :165
## Score Electoral Functioning participation
## Min. :0.320 Min. :0.0100 Min. : 0.000 Min. :0.000
## 1st Qu.:3.120 1st Qu.:0.0700 1st Qu.: 1.170 1st Qu.:2.710
## Median :5.540 Median :0.1300 Median : 7.000 Median :5.000
## Mean :5.296 Mean :0.1772 Mean : 5.595 Mean :4.682
## 3rd Qu.:7.070 3rd Qu.:0.2175 3rd Qu.: 9.170 3rd Qu.:6.790
## Max. :9.810 Max. :0.9600 Max. :10.000 Max. :9.640
## NA's :47
## culture Civilliberties NA
## Min. : 0.000 Min. : 1.250 Length:165
## 1st Qu.: 3.890 1st Qu.: 3.750 Class :character
## Median : 5.560 Median : 5.630 Mode :character
## Mean : 5.444 Mean : 5.328
## 3rd Qu.: 6.670 3rd Qu.: 6.250
## Max. :10.000 Max. :10.000
##
selected_columns <- c(2, 4, 5, 6, 7, 8, 9)
selected_columns <- selected_columns[selected_columns <= ncol(idhdemo)]
column_types <- sapply(idhdemo[, selected_columns], class)
# Filtrar las columnas numéricas
numeric_columns <- selected_columns[column_types == "numeric"]
# Calcular la correlación solo en las columnas numéricas
correlation <- cor(idhdemo[, numeric_columns])
print(correlation)
## puntuacion Score Electoral Functioning participation
## puntuacion 1.0000000 0.6509108 NA 0.5468508 0.6997162
## Score 0.6509108 1.0000000 NA 0.9396533 0.9266066
## Electoral NA NA 1 NA NA
## Functioning 0.5468508 0.9396533 NA 1.0000000 0.8294423
## participation 0.6997162 0.9266066 NA 0.8294423 1.0000000
## culture 0.5669529 0.8580811 NA 0.7874725 0.7183730
## culture
## puntuacion 0.5669529
## Score 0.8580811
## Electoral NA
## Functioning 0.7874725
## participation 0.7183730
## culture 1.0000000
idhdemo$Democracy <- ifelse(idhdemo$RegimeType %in% c("Full democracy", "Flawed democracy"), 1, 0)
# Realizar el análisis de regresión logística
model <- glm(RegimeType ~ Score, data = idhdemo, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Obtener los resultados del modelo
summary(model)
##
## Call:
## glm(formula = RegimeType ~ Score, family = binomial, data = idhdemo)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1795.2 127342.8 -0.014 0.989
## Score 449.4 31877.9 0.014 0.989
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2.1397e+02 on 164 degrees of freedom
## Residual deviance: 5.9114e-07 on 163 degrees of freedom
## AIC: 4
##
## Number of Fisher Scoring iterations: 25
boxplot(idhdemo[,c(3:6)],horizontal = F,las=2,cex.axis = 0.5)
