library(rvest)

idh = "https://en.wikipedia.org/wiki/List_of_countries_by_Human_Development_Index"
fullxpath = '/html/body/div[2]/div/div[3]/main/div[3]/div[3]/div[1]/table[2]'

idh = read_html(idh)
idh = html_nodes(idh, xpath= fullxpath)
idh = html_table (idh)
idh = data.frame(idh)
library(rvest)

demo = "https://en.wikipedia.org/wiki/The_Economist_Democracy_Index"
fullxpath = '/html/body/div[2]/div/div[3]/main/div[3]/div[3]/div[1]/table[6]'

demo = read_html(demo)
demo = html_nodes(demo, xpath= fullxpath)
demo = html_table (demo)
demo = data.frame(demo)
str(idh)
## 'data.frame':    192 obs. of  5 variables:
##  $ Rank  : chr  "2021 data (2022 report)​[2]" "1" "2" "3" ...
##  $ Rank.1: chr  "Change since 2015​[20]" "" "" "" ...
##  $ Nation: chr  "Nation" "Switzerland" "Norway" "Iceland" ...
##  $ HDI   : chr  "2021 data (2022 report)​[2]" "0.962" "0.961" "0.959" ...
##  $ HDI.1 : chr  "Average annual growth (2010–2021)​[20]" "0.19%" "0.19%" "0.56%" ...
names(idh)
## [1] "Rank"   "Rank.1" "Nation" "HDI"    "HDI.1"
str(demo)
## 'data.frame':    172 obs. of  11 variables:
##  $ Rank                                                                         : chr  "" "Full democracies" "1" "2" ...
##  $ .mw.parser.output..tooltip.dotted.border.bottom.1px.dotted.cursor.help.Δ.Rank: chr  "" "Full democracies" "" "" ...
##  $ Country                                                                      : chr  "" "Full democracies" "Norway" "New Zealand" ...
##  $ Regime.type                                                                  : chr  "" "Full democracies" "Full democracy" "Full democracy" ...
##  $ Overall.score                                                                : chr  "" "Full democracies" "9.81" "9.61" ...
##  $ Δ.Score                                                                      : chr  "" "Full democracies" "0.06" "0.14" ...
##  $ Elec.toral.pro.cessand.plura.lism                                            : chr  "" "Full democracies" "10.00" "10.00" ...
##  $ Func.tioningof.govern.ment                                                   : chr  "" "Full democracies" "9.64" "9.29" ...
##  $ Poli.ticalpartici.pation                                                     : chr  "" "Full democracies" "10.00" "10.00" ...
##  $ Poli.ticalcul.ture                                                           : chr  "" "Full democracies" "10.00" "8.75" ...
##  $ Civilliber.ties                                                              : chr  "" "Full democracies" "9.41" "10.00" ...
names(demo)
##  [1] "Rank"                                                                         
##  [2] ".mw.parser.output..tooltip.dotted.border.bottom.1px.dotted.cursor.help.Δ.Rank"
##  [3] "Country"                                                                      
##  [4] "Regime.type"                                                                  
##  [5] "Overall.score"                                                                
##  [6] "Δ.Score"                                                                      
##  [7] "Elec.toral.pro.cessand.plura.lism"                                            
##  [8] "Func.tioningof.govern.ment"                                                   
##  [9] "Poli.ticalpartici.pation"                                                     
## [10] "Poli.ticalcul.ture"                                                           
## [11] "Civilliber.ties"
#seleccionando columns
idh=idh[,c(3,4,5)]
demo=demo[,-c(1,2)]

# recombrando columns
newDemo=c("Pais","RegimeType","Score","Electoral","Functioning","participation","culture",'Civilliberties')
newIDH=c('Pais','puntuacion','PBI')
names(demo)=newDemo
names(idh)=newIDH

#seleccionando filas
idh=idh[c(1:202),]
idh=idh[!is.na(idh$Pais),]

# tipo de datos
str(demo)
## 'data.frame':    172 obs. of  9 variables:
##  $ Pais          : chr  "" "Full democracies" "Norway" "New Zealand" ...
##  $ RegimeType    : chr  "" "Full democracies" "Full democracy" "Full democracy" ...
##  $ Score         : chr  "" "Full democracies" "9.81" "9.61" ...
##  $ Electoral     : chr  "" "Full democracies" "0.06" "0.14" ...
##  $ Functioning   : chr  "" "Full democracies" "10.00" "10.00" ...
##  $ participation : chr  "" "Full democracies" "9.64" "9.29" ...
##  $ culture       : chr  "" "Full democracies" "10.00" "10.00" ...
##  $ Civilliberties: chr  "" "Full democracies" "10.00" "8.75" ...
##  $ NA            : chr  "" "Full democracies" "9.41" "10.00" ...
str(idh)
## 'data.frame':    192 obs. of  3 variables:
##  $ Pais      : chr  "Nation" "Switzerland" "Norway" "Iceland" ...
##  $ puntuacion: chr  "2021 data (2022 report)​[2]" "0.962" "0.961" "0.959" ...
##  $ PBI       : chr  "Average annual growth (2010–2021)​[20]" "0.19%" "0.19%" "0.56%" ...
# formateo: texto a ordinal
OrdinalVector=c('Authoritarian','Hybrid regime','Flawed democracy','Full democracy')
demo$RegimeType=factor(demo$RegimeType,
                          levels = OrdinalVector,
                          ordered = T)

# formateo: texto a numero
idh[,-1]=lapply(idh[,-1], as.numeric)
## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion

## Warning in lapply(idh[, -1], as.numeric): NAs introduced by coercion
demo[,3:8]=lapply(demo[,3:8],as.numeric)
## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion

## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion

## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion

## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion

## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion

## Warning in lapply(demo[, 3:8], as.numeric): NAs introduced by coercion
library(kableExtra)
idh[!complete.cases(idh[,-1]),]%>%kbl()%>%
  kable_styling(bootstrap_options = "striped", font_size = 10)
Pais puntuacion PBI
Nation NA NA
Switzerland 0.962 NA
Norway 0.961 NA
Iceland 0.959 NA
Hong Kong 0.952 NA
Australia 0.951 NA
Denmark 0.948 NA
Sweden 0.947 NA
Ireland 0.945 NA
Germany 0.942 NA
Netherlands 0.941 NA
Finland 0.940 NA
Singapore 0.939 NA
Belgium 0.937 NA
New Zealand 0.937 NA
Canada 0.936 NA
Liechtenstein 0.935 NA
Luxembourg 0.930 NA
United Kingdom 0.929 NA
Japan 0.925 NA
South Korea 0.925 NA
United States 0.921 NA
Israel 0.919 NA
Malta 0.918 NA
Slovenia 0.918 NA
Austria 0.916 NA
United Arab Emirates 0.911 NA
Spain 0.905 NA
France 0.903 NA
Cyprus 0.896 NA
Italy 0.895 NA
Estonia 0.890 NA
Czech Republic 0.889 NA
Greece 0.887 NA
Poland 0.876 NA
Bahrain 0.875 NA
Lithuania 0.875 NA
Saudi Arabia 0.875 NA
Portugal 0.866 NA
Latvia 0.863 NA
Andorra 0.858 NA
Croatia 0.858 NA
Chile 0.855 NA
Qatar 0.855 NA
San Marino 0.853 NA
Slovakia 0.848 NA
Hungary 0.846 NA
Argentina 0.842 NA
Turkey 0.838 NA
Montenegro 0.832 NA
Kuwait 0.831 NA
Brunei 0.829 NA
Russia 0.822 NA
Romania 0.821 NA
Oman 0.816 NA
Bahamas 0.812 NA
Kazakhstan 0.811 NA
Trinidad and Tobago 0.810 NA
Costa Rica 0.809 NA
Uruguay 0.809 NA
Belarus 0.808 NA
Panama 0.805 NA
Malaysia 0.803 NA
Georgia 0.802 NA
Mauritius 0.802 NA
Serbia 0.802 NA
Thailand 0.800 NA
Albania 0.796 NA
Bulgaria 0.795 NA
Grenada 0.795 NA
Barbados 0.790 NA
Antigua and Barbuda 0.788 NA
Seychelles 0.785 NA
Sri Lanka 0.782 NA
Bosnia and Herzegovina 0.780 NA
Saint Kitts and Nevis 0.777 NA
Iran 0.774 NA
Ukraine 0.773 NA
North Macedonia 0.770 NA
China 0.768 NA
Dominican Republic 0.767 NA
Moldova 0.767 NA
Palau 0.767 NA
Cuba 0.764 NA
Peru 0.762 NA
Armenia 0.759 NA
Mexico 0.758 NA
Brazil 0.754 NA
Colombia 0.752 NA
Saint Vincent and the Grenadines 0.751 NA
Maldives 0.747 NA
Algeria 0.745 NA
Azerbaijan 0.745 NA
Tonga 0.745 NA
Turkmenistan 0.745 NA
Ecuador 0.740 NA
Mongolia 0.739 NA
Egypt 0.731 NA
Tunisia 0.731 NA
Fiji 0.730 NA
Suriname 0.730 NA
Uzbekistan 0.727 NA
Dominica 0.720 NA
Jordan 0.720 NA
Libya 0.718 NA
Paraguay 0.717 NA
Palestine 0.715 NA
Saint Lucia 0.715 NA
Guyana 0.714 NA
South Africa 0.713 NA
Jamaica 0.709 NA
Samoa 0.707 NA
Gabon 0.706 NA
Lebanon 0.706 NA
Indonesia 0.705 NA
Vietnam 0.703 NA
Philippines 0.699 NA
Botswana 0.693 NA
Bolivia 0.692 NA
Kyrgyzstan 0.692 NA
Venezuela 0.691 NA
Iraq 0.686 NA
Tajikistan 0.685 NA
Belize 0.683 NA
Morocco 0.683 NA
El Salvador 0.675 NA
Nicaragua 0.667 NA
Bhutan 0.666 NA
Cape Verde 0.662 NA
Bangladesh 0.661 NA
Tuvalu 0.641 NA
Marshall Islands 0.639 NA
India 0.633 NA
Ghana 0.632 NA
Micronesia 0.628 NA
Guatemala 0.627 NA
Kiribati 0.624 NA
Honduras 0.621 NA
Sao Tome and Principe 0.618 NA
Namibia 0.615 NA
Laos 0.607 NA
East Timor 0.607 NA
Vanuatu 0.607 NA
Nepal 0.602 NA
Eswatini 0.597 NA
Equatorial Guinea 0.596 NA
Cambodia 0.593 NA
Zimbabwe 0.593 NA
Angola 0.586 NA
Myanmar 0.585 NA
Syria 0.577 NA
Cameroon 0.576 NA
Kenya 0.575 NA
Republic of the Congo 0.571 NA
Zambia 0.565 NA
Solomon Islands 0.564 NA
Comoros 0.558 NA
Papua New Guinea 0.558 NA
Mauritania 0.556 NA
Ivory Coast 0.550 NA
Tanzania 0.549 NA
Pakistan 0.544 NA
Togo 0.539 NA
Haiti 0.535 NA
Nigeria 0.535 NA
Rwanda 0.534 NA
Benin 0.525 NA
Uganda 0.525 NA
Lesotho 0.514 NA
Malawi 0.512 NA
Senegal 0.511 NA
Djibouti 0.509 NA
Sudan 0.508 NA
Madagascar 0.501 NA
Gambia 0.500 NA
Ethiopia 0.498 NA
Eritrea 0.492 NA
Guinea-Bissau 0.483 NA
Liberia 0.481 NA
Democratic Republic of the Congo 0.479 NA
Afghanistan 0.478 NA
Sierra Leone 0.477 NA
Guinea 0.465 NA
Yemen 0.455 NA
Burkina Faso 0.449 NA
Mozambique 0.446 NA
Mali 0.428 NA
Burundi 0.426 NA
Central African Republic 0.404 NA
Niger 0.400 NA
Chad 0.394 NA
South Sudan 0.385 NA
idh$Pais= trimws(idh$Pais,whitespace = "[\\h\\v]")
demo$Pais= trimws(demo$Pais,whitespace = "[\\h\\v]") 
sort(setdiff(idh$Pais,demo$Pais))
##  [1] "Andorra"                          "Antigua and Barbuda"             
##  [3] "Bahamas"                          "Barbados"                        
##  [5] "Belize"                           "Brunei"                          
##  [7] "Dominica"                         "Grenada"                         
##  [9] "Kiribati"                         "Liechtenstein"                   
## [11] "Maldives"                         "Marshall Islands"                
## [13] "Micronesia"                       "Nation"                          
## [15] "Palau"                            "Saint Kitts and Nevis"           
## [17] "Saint Lucia"                      "Saint Vincent and the Grenadines"
## [19] "Samoa"                            "San Marino"                      
## [21] "Sao Tome and Principe"            "Seychelles"                      
## [23] "Solomon Islands"                  "South Sudan"                     
## [25] "Tonga"                            "Tuvalu"                          
## [27] "Vanuatu"
sort(setdiff(demo$Pais,idh$Pais))
## [1] ""                      "Authoritarian regimes" "Flawed democracies"   
## [4] "Full democracies"      "Hybrid regimes"        "North Korea"          
## [7] "Taiwan"
set.seed(123)
idhdemo=merge(idh,demo)
summary(idhdemo)
##      Pais             puntuacion          PBI                 RegimeType
##  Length:165         Min.   :0.3940   Min.   : NA   Authoritarian   :58  
##  Class :character   1st Qu.:0.5860   1st Qu.: NA   Hybrid regime   :36  
##  Mode  :character   Median :0.7310   Median : NA   Flawed democracy:48  
##                     Mean   :0.7204   Mean   :NaN   Full democracy  :23  
##                     3rd Qu.:0.8480   3rd Qu.: NA                        
##                     Max.   :0.9620   Max.   : NA                        
##                                      NA's   :165                        
##      Score         Electoral       Functioning     participation  
##  Min.   :0.320   Min.   :0.0100   Min.   : 0.000   Min.   :0.000  
##  1st Qu.:3.120   1st Qu.:0.0700   1st Qu.: 1.170   1st Qu.:2.710  
##  Median :5.540   Median :0.1300   Median : 7.000   Median :5.000  
##  Mean   :5.296   Mean   :0.1772   Mean   : 5.595   Mean   :4.682  
##  3rd Qu.:7.070   3rd Qu.:0.2175   3rd Qu.: 9.170   3rd Qu.:6.790  
##  Max.   :9.810   Max.   :0.9600   Max.   :10.000   Max.   :9.640  
##                  NA's   :47                                       
##     culture       Civilliberties        NA           
##  Min.   : 0.000   Min.   : 1.250   Length:165        
##  1st Qu.: 3.890   1st Qu.: 3.750   Class :character  
##  Median : 5.560   Median : 5.630   Mode  :character  
##  Mean   : 5.444   Mean   : 5.328                     
##  3rd Qu.: 6.670   3rd Qu.: 6.250                     
##  Max.   :10.000   Max.   :10.000                     
## 
selected_columns <- c(2, 4, 5, 6, 7, 8, 9)
selected_columns <- selected_columns[selected_columns <= ncol(idhdemo)]
column_types <- sapply(idhdemo[, selected_columns], class)

# Filtrar las columnas numéricas
numeric_columns <- selected_columns[column_types == "numeric"]

# Calcular la correlación solo en las columnas numéricas
correlation <- cor(idhdemo[, numeric_columns])

print(correlation)
##               puntuacion     Score Electoral Functioning participation
## puntuacion     1.0000000 0.6509108        NA   0.5468508     0.6997162
## Score          0.6509108 1.0000000        NA   0.9396533     0.9266066
## Electoral             NA        NA         1          NA            NA
## Functioning    0.5468508 0.9396533        NA   1.0000000     0.8294423
## participation  0.6997162 0.9266066        NA   0.8294423     1.0000000
## culture        0.5669529 0.8580811        NA   0.7874725     0.7183730
##                 culture
## puntuacion    0.5669529
## Score         0.8580811
## Electoral            NA
## Functioning   0.7874725
## participation 0.7183730
## culture       1.0000000
idhdemo$Democracy <- ifelse(idhdemo$RegimeType %in% c("Full democracy", "Flawed democracy"), 1, 0)

# Realizar el análisis de regresión logística
model <- glm(RegimeType ~ Score, data = idhdemo, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Obtener los resultados del modelo
summary(model)
## 
## Call:
## glm(formula = RegimeType ~ Score, family = binomial, data = idhdemo)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -1795.2   127342.8  -0.014    0.989
## Score          449.4    31877.9   0.014    0.989
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2.1397e+02  on 164  degrees of freedom
## Residual deviance: 5.9114e-07  on 163  degrees of freedom
## AIC: 4
## 
## Number of Fisher Scoring iterations: 25
boxplot(idhdemo[,c(3:6)],horizontal = F,las=2,cex.axis = 0.5)