library(htmltab)
links=list(web="https://en.wikipedia.org/wiki/Democracy_Index",
           xpath ='//*[@id="mw-content-text"]/div/table[2]/tbody')
indice<- htmltab(doc = links$web, which =links$xpath)
names(indice)
##  [1] "Rank >> Rank"                                                          
##  [2] "Country >> Country"                                                    
##  [3] "Score >> Score"                                                        
##  [4] "Elec­toral pro­cessand plura­lism >> Elec­toral pro­cessand plura­lism"
##  [5] "Functio­ning ofgovern­ment >> Functio­ning ofgovern­ment"              
##  [6] "Poli­ticalpartici­pation >> Poli­ticalpartici­pation"                  
##  [7] "Poli­ticalculture >> Poli­ticalculture"                                
##  [8] "Civilliber­ties >> Civilliber­ties"                                    
##  [9] "Regimetype >> Regimetype"                                              
## [10] "Conti­nent >> Conti­nent"
newNames= c("rank", "conutry", "score", "electoral", "functioning", "Participation", "PoliticalCulture", "civiliber", "regimetype", "continent")
names(indice)= newNames
str(indice)
## 'data.frame':    167 obs. of  10 variables:
##  $ rank            : chr  "1" "2" "3" "4" ...
##  $ conutry         : chr  " Norway" " Iceland" " Sweden" " New Zealand" ...
##  $ score           : chr  "9.87" "9.58" "9.39" "9.26" ...
##  $ electoral       : chr  "10.00" "10.00" "9.58" "10.00" ...
##  $ functioning     : chr  "9.64" "9.29" "9.64" "9.29" ...
##  $ Participation   : chr  "10.00" "8.89" "8.33" "8.89" ...
##  $ PoliticalCulture: chr  "10.00" "10.00" "10.00" "8.13" ...
##  $ civiliber       : chr  "9.71" "9.71" "9.41" "10.00" ...
##  $ regimetype      : chr  "Full democracy" "Full democracy" "Full democracy" "Full democracy" ...
##  $ continent       : chr  "Europe" "Europe" "Europe" "Oceania" ...

QUITAR ESPACIOS EN BLANCO

indice[,]=lapply(indice[,],trimws,whitespace = "[\\h\\v]")

CONVERTIR EN NOMINAL

indice[,]=lapply(indice[,],trimws,whitespace = "[\\h\\v]")

PARA ORDINAL

table(indice$regimetype)
## 
##    Authoritarian Flawed democracy   Full democracy    Hybrid regime 
##               53               55               20               39

AJUSTAR NIVEL

ordenOK=c('Authoritarian', "Hybrid regime","Flawed democracy","Full democracy")

indice$regimetype=ordered(indice$regimetype,levels=ordenOK)

CONVERTIR EN VARIABLE NUMÉRICA

indice[,-c(2,9,10)]=lapply(indice[,-c(2,9,10)],as.numeric)
## Warning in lapply(indice[, -c(2, 9, 10)], as.numeric): NAs introduced by
## coercion

VALORES PERDIDOS

indice[!complete.cases(indice),]
##     rank      conutry score electoral functioning Participation
## 24    NA        Chile  7.97      9.58        8.57          4.44
## 25    NA      Estonia  7.97      9.58        8.21          6.67
## 37    NA     Slovenia  7.50      9.58        6.79          6.67
## 38    NA    Lithuania  7.50      9.58        6.43          6.11
## 48    NA    Argentina  7.02      9.17        5.36          6.11
## 49    NA      Jamaica  7.02      8.75        7.14          4.44
## 55    NA       Poland  6.67      9.17        6.07          6.11
## 56    NA       Guyana  6.67      9.17        5.71          6.11
## 58    NA        Ghana  6.63      8.33        5.71          6.67
## 59    NA      Hungary  6.63      8.75        6.07          5.00
## 64    NA       Serbia  6.41      8.25        5.36          6.11
## 65    NA      Tunisia  6.41      6.42        5.71          7.78
## 67    NA    Singapore  6.38      4.33        7.86          6.11
## 68    NA      Romania  6.38      9.17        5.71          5.00
## 72    NA    Sri Lanka  6.19      7.83        5.71          5.00
## 73    NA       Mexico  6.19      8.33        6.07          7.22
## 74    NA    Hong Kong  6.15      3.08        6.07          5.56
## 75    NA      Senegal  6.15      7.50        6.07          4.44
## 80    NA      Moldova  5.85      7.08        4.64          6.11
## 81    NA         Fiji  5.85      6.58        5.36          6.11
## 82    NA   Montenegro  5.74      6.08        5.36          6.11
## 83    NA        Benin  5.74      6.50        5.71          5.00
## 92    NA     Tanzania  5.41      7.00        5.00          5.00
## 93    NA         Mali  5.41      7.42        3.93          3.89
## 99    NA        Kenya  5.11      3.50        5.36          6.67
## 100   NA   Kyrgyzstan  5.11      6.58        2.93          6.67
## 107   NA      Lebanon  4.63      3.92        2.21          6.67
## 108   NA     Thailand  4.63      3.00        4.29          5.00
## 117   NA   Mozambique  3.85      3.58        2.14          5.00
## 118   NA       Kuwait  3.85      3.17        4.29          3.89
## 129   NA     Ethiopia  3.35      0.00        3.57          5.56
## 130   NA       Rwanda  3.35      1.67        5.00          2.78
## 135   NA     Zimbabwe  3.16      0.50        2.00          4.44
## 136   NA    Venezuela  3.16      1.67        1.79          4.44
## 145   NA   Kazakhstan  2.94      0.50        2.14          4.44
## 146   NA       Russia  2.94      2.17        1.79          5.00
## 152   NA      Eritrea  2.37      0.00        2.14          1.67
## 153   NA         Laos  2.37      0.83        2.86          1.67
## 160   NA Saudi Arabia  1.93      0.00        2.86          2.22
## 161   NA   Tajikistan  1.93      0.08        0.79          1.67
##     PoliticalCulture civiliber       regimetype     continent
## 24              8.13      9.12 Flawed democracy South America
## 25              6.88      8.53 Flawed democracy        Europe
## 37              6.25      8.24 Flawed democracy        Europe
## 38              6.25      9.12 Flawed democracy        Europe
## 48              6.25      8.24 Flawed democracy South America
## 49              6.25      8.53 Flawed democracy North America
## 55              4.38      7.65 Flawed democracy        Europe
## 56              5.00      7.35 Flawed democracy South America
## 58              6.25      6.18 Flawed democracy        Africa
## 59              6.25      7.06 Flawed democracy        Europe
## 64              5.00      7.35 Flawed democracy        Europe
## 65              6.25      5.88 Flawed democracy        Africa
## 67              6.25      7.35 Flawed democracy          Asia
## 68              4.38      7.65 Flawed democracy        Europe
## 72              6.25      6.18 Flawed democracy          Asia
## 73              3.13      6.18 Flawed democracy North America
## 74              7.50      8.53 Flawed democracy          Asia
## 75              6.25      6.47 Flawed democracy        Africa
## 80              4.38      7.06    Hybrid regime        Europe
## 81              5.63      5.59    Hybrid regime       Oceania
## 82              4.38      6.76    Hybrid regime        Europe
## 83              5.63      5.88    Hybrid regime        Africa
## 92              5.63      4.41    Hybrid regime        Africa
## 93              5.63      6.18    Hybrid regime        Africa
## 99              5.63      4.41    Hybrid regime        Africa
## 100             4.38      5.00    Hybrid regime          Asia
## 107             5.63      4.71    Hybrid regime          Asia
## 108             5.00      5.88    Hybrid regime          Asia
## 117             5.00      3.53    Authoritarian        Africa
## 118             4.38      3.53    Authoritarian          Asia
## 129             5.00      2.65    Authoritarian        Africa
## 130             4.38      2.94    Authoritarian        Africa
## 135             5.63      3.24    Authoritarian        Africa
## 136             4.38      3.53    Authoritarian South America
## 145             4.38      3.24    Authoritarian        Europe
## 146             2.50      3.24    Authoritarian        Europe
## 152             6.88      1.18    Authoritarian        Africa
## 153             5.00      1.47    Authoritarian          Asia
## 160             3.13      1.47    Authoritarian          Asia
## 161             6.25      0.88    Authoritarian          Asia

DESECHANDO COLUMNAS

indice$rank= NULL

RESUMEN ESTADÍSTICO

summary(indice)
##    conutry              score         electoral       functioning   
##  Length:167         Min.   :1.080   Min.   : 0.000   Min.   :0.000  
##  Class :character   1st Qu.:3.545   1st Qu.: 3.000   1st Qu.:2.860  
##  Mode  :character   Median :5.690   Median : 6.580   Median :5.000  
##                     Mean   :5.479   Mean   : 5.903   Mean   :4.885  
##                     3rd Qu.:7.175   3rd Qu.: 9.170   3rd Qu.:6.790  
##                     Max.   :9.870   Max.   :10.000   Max.   :9.640  
##  Participation   PoliticalCulture   civiliber                 regimetype
##  Min.   : 1.11   Min.   : 1.250   Min.   : 0.000   Authoritarian   :53  
##  1st Qu.: 3.89   1st Qu.: 4.380   1st Qu.: 3.530   Hybrid regime   :39  
##  Median : 5.56   Median : 5.630   Median : 5.880   Flawed democracy:55  
##  Mean   : 5.25   Mean   : 5.594   Mean   : 5.768   Full democracy  :20  
##  3rd Qu.: 6.67   3rd Qu.: 6.250   3rd Qu.: 8.240                        
##  Max.   :10.00   Max.   :10.000   Max.   :10.000                        
##   continent        
##  Length:167        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

RELACIÓN ENTRE SCORE Y CONTINENTE (RELACIÓN NUMÉRICA-CATEGÓRICA)

library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
ggqqplot(data=indice,x="score") + facet_grid(. ~ continent)

f1=formula(score~continent)
normalidadTest=function(x) {y =shapiro.test(x); 
                            c(y$statistic, y$p.value)}

resultado= aggregate(f1, indice,
                     FUN = normalidadTest) 



library(knitr)

shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("SW_Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))
continent SW_Statistic Probabilidad
Africa 0.9653422 0.1487353
Asia 0.9486915 0.0579857
Europe 0.9370922 0.0168389
North America 0.9740888 0.9260364
Oceania 0.7752667 0.0647579
South America 0.8433322 0.0304021
kruskal.test(f1,indice)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  score by continent
## Kruskal-Wallis chi-squared = 52.932, df = 5, p-value = 3.473e-10

PROBABILIDAD MENOR A 0.05

ggplot(data=indice, aes(x=continent, y=score)) + geom_boxplot(notch = T)
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.

RELACION CONTINENTE Y TIPO DE RÉGIMEN (RELACIÓN CATEGÓRICA-CATEGÓRICA)

TABLA DE CONTINGENCIA

columna=indice$continent
fila=indice$regimetype

(t=table(fila,columna))
##                   columna
## fila               Africa Asia Europe North America Oceania South America
##   Authoritarian        26   20      4             2       0             1
##   Hybrid regime        15    9      9             4       1             1
##   Flawed democracy      8   13     18             6       1             9
##   Full democracy        1    0     14             2       2             1

EN PORCENTAJES:

prop_t=prop.table(t,margin = 2)
round(prop_t,2)
##                   columna
## fila               Africa Asia Europe North America Oceania South America
##   Authoritarian      0.52 0.48   0.09          0.14    0.00          0.08
##   Hybrid regime      0.30 0.21   0.20          0.29    0.25          0.08
##   Flawed democracy   0.16 0.31   0.40          0.43    0.25          0.75
##   Full democracy     0.02 0.00   0.31          0.14    0.50          0.08
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
balloonplot(t(prop_t), main ="tabla",
            label = T, show.margins = FALSE)

DETERMINAR INDEPENDENCIA

chisq.test(t,simulate.p.value = T)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  t
## X-squared = 64.445, df = NA, p-value = 0.0004998

si no hay independencia hallar intesidad