library(htmltab)
links=list(web="https://en.wikipedia.org/wiki/Democracy_Index",
xpath ='//*[@id="mw-content-text"]/div/table[2]/tbody')
indice<- htmltab(doc = links$web, which =links$xpath)
names(indice)
## [1] "Rank >> Rank"
## [2] "Country >> Country"
## [3] "Score >> Score"
## [4] "Electoral processand pluralism >> Electoral processand pluralism"
## [5] "Functioning ofgovernment >> Functioning ofgovernment"
## [6] "Politicalparticipation >> Politicalparticipation"
## [7] "Politicalculture >> Politicalculture"
## [8] "Civilliberties >> Civilliberties"
## [9] "Regimetype >> Regimetype"
## [10] "Continent >> Continent"
newNames= c("rank", "conutry", "score", "electoral", "functioning", "Participation", "PoliticalCulture", "civiliber", "regimetype", "continent")
names(indice)= newNames
str(indice)
## 'data.frame': 167 obs. of 10 variables:
## $ rank : chr "1" "2" "3" "4" ...
## $ conutry : chr " Norway" " Iceland" " Sweden" " New Zealand" ...
## $ score : chr "9.87" "9.58" "9.39" "9.26" ...
## $ electoral : chr "10.00" "10.00" "9.58" "10.00" ...
## $ functioning : chr "9.64" "9.29" "9.64" "9.29" ...
## $ Participation : chr "10.00" "8.89" "8.33" "8.89" ...
## $ PoliticalCulture: chr "10.00" "10.00" "10.00" "8.13" ...
## $ civiliber : chr "9.71" "9.71" "9.41" "10.00" ...
## $ regimetype : chr "Full democracy" "Full democracy" "Full democracy" "Full democracy" ...
## $ continent : chr "Europe" "Europe" "Europe" "Oceania" ...
QUITAR ESPACIOS EN BLANCO
indice[,]=lapply(indice[,],trimws,whitespace = "[\\h\\v]")
CONVERTIR EN NOMINAL
indice[,]=lapply(indice[,],trimws,whitespace = "[\\h\\v]")
PARA ORDINAL
table(indice$regimetype)
##
## Authoritarian Flawed democracy Full democracy Hybrid regime
## 53 55 20 39
AJUSTAR NIVEL
ordenOK=c('Authoritarian', "Hybrid regime","Flawed democracy","Full democracy")
indice$regimetype=ordered(indice$regimetype,levels=ordenOK)
CONVERTIR EN VARIABLE NUMÉRICA
indice[,-c(2,9,10)]=lapply(indice[,-c(2,9,10)],as.numeric)
## Warning in lapply(indice[, -c(2, 9, 10)], as.numeric): NAs introduced by
## coercion
VALORES PERDIDOS
indice[!complete.cases(indice),]
## rank conutry score electoral functioning Participation
## 24 NA Chile 7.97 9.58 8.57 4.44
## 25 NA Estonia 7.97 9.58 8.21 6.67
## 37 NA Slovenia 7.50 9.58 6.79 6.67
## 38 NA Lithuania 7.50 9.58 6.43 6.11
## 48 NA Argentina 7.02 9.17 5.36 6.11
## 49 NA Jamaica 7.02 8.75 7.14 4.44
## 55 NA Poland 6.67 9.17 6.07 6.11
## 56 NA Guyana 6.67 9.17 5.71 6.11
## 58 NA Ghana 6.63 8.33 5.71 6.67
## 59 NA Hungary 6.63 8.75 6.07 5.00
## 64 NA Serbia 6.41 8.25 5.36 6.11
## 65 NA Tunisia 6.41 6.42 5.71 7.78
## 67 NA Singapore 6.38 4.33 7.86 6.11
## 68 NA Romania 6.38 9.17 5.71 5.00
## 72 NA Sri Lanka 6.19 7.83 5.71 5.00
## 73 NA Mexico 6.19 8.33 6.07 7.22
## 74 NA Hong Kong 6.15 3.08 6.07 5.56
## 75 NA Senegal 6.15 7.50 6.07 4.44
## 80 NA Moldova 5.85 7.08 4.64 6.11
## 81 NA Fiji 5.85 6.58 5.36 6.11
## 82 NA Montenegro 5.74 6.08 5.36 6.11
## 83 NA Benin 5.74 6.50 5.71 5.00
## 92 NA Tanzania 5.41 7.00 5.00 5.00
## 93 NA Mali 5.41 7.42 3.93 3.89
## 99 NA Kenya 5.11 3.50 5.36 6.67
## 100 NA Kyrgyzstan 5.11 6.58 2.93 6.67
## 107 NA Lebanon 4.63 3.92 2.21 6.67
## 108 NA Thailand 4.63 3.00 4.29 5.00
## 117 NA Mozambique 3.85 3.58 2.14 5.00
## 118 NA Kuwait 3.85 3.17 4.29 3.89
## 129 NA Ethiopia 3.35 0.00 3.57 5.56
## 130 NA Rwanda 3.35 1.67 5.00 2.78
## 135 NA Zimbabwe 3.16 0.50 2.00 4.44
## 136 NA Venezuela 3.16 1.67 1.79 4.44
## 145 NA Kazakhstan 2.94 0.50 2.14 4.44
## 146 NA Russia 2.94 2.17 1.79 5.00
## 152 NA Eritrea 2.37 0.00 2.14 1.67
## 153 NA Laos 2.37 0.83 2.86 1.67
## 160 NA Saudi Arabia 1.93 0.00 2.86 2.22
## 161 NA Tajikistan 1.93 0.08 0.79 1.67
## PoliticalCulture civiliber regimetype continent
## 24 8.13 9.12 Flawed democracy South America
## 25 6.88 8.53 Flawed democracy Europe
## 37 6.25 8.24 Flawed democracy Europe
## 38 6.25 9.12 Flawed democracy Europe
## 48 6.25 8.24 Flawed democracy South America
## 49 6.25 8.53 Flawed democracy North America
## 55 4.38 7.65 Flawed democracy Europe
## 56 5.00 7.35 Flawed democracy South America
## 58 6.25 6.18 Flawed democracy Africa
## 59 6.25 7.06 Flawed democracy Europe
## 64 5.00 7.35 Flawed democracy Europe
## 65 6.25 5.88 Flawed democracy Africa
## 67 6.25 7.35 Flawed democracy Asia
## 68 4.38 7.65 Flawed democracy Europe
## 72 6.25 6.18 Flawed democracy Asia
## 73 3.13 6.18 Flawed democracy North America
## 74 7.50 8.53 Flawed democracy Asia
## 75 6.25 6.47 Flawed democracy Africa
## 80 4.38 7.06 Hybrid regime Europe
## 81 5.63 5.59 Hybrid regime Oceania
## 82 4.38 6.76 Hybrid regime Europe
## 83 5.63 5.88 Hybrid regime Africa
## 92 5.63 4.41 Hybrid regime Africa
## 93 5.63 6.18 Hybrid regime Africa
## 99 5.63 4.41 Hybrid regime Africa
## 100 4.38 5.00 Hybrid regime Asia
## 107 5.63 4.71 Hybrid regime Asia
## 108 5.00 5.88 Hybrid regime Asia
## 117 5.00 3.53 Authoritarian Africa
## 118 4.38 3.53 Authoritarian Asia
## 129 5.00 2.65 Authoritarian Africa
## 130 4.38 2.94 Authoritarian Africa
## 135 5.63 3.24 Authoritarian Africa
## 136 4.38 3.53 Authoritarian South America
## 145 4.38 3.24 Authoritarian Europe
## 146 2.50 3.24 Authoritarian Europe
## 152 6.88 1.18 Authoritarian Africa
## 153 5.00 1.47 Authoritarian Asia
## 160 3.13 1.47 Authoritarian Asia
## 161 6.25 0.88 Authoritarian Asia
DESECHANDO COLUMNAS
indice$rank= NULL
RESUMEN ESTADÍSTICO
summary(indice)
## conutry score electoral functioning
## Length:167 Min. :1.080 Min. : 0.000 Min. :0.000
## Class :character 1st Qu.:3.545 1st Qu.: 3.000 1st Qu.:2.860
## Mode :character Median :5.690 Median : 6.580 Median :5.000
## Mean :5.479 Mean : 5.903 Mean :4.885
## 3rd Qu.:7.175 3rd Qu.: 9.170 3rd Qu.:6.790
## Max. :9.870 Max. :10.000 Max. :9.640
## Participation PoliticalCulture civiliber regimetype
## Min. : 1.11 Min. : 1.250 Min. : 0.000 Authoritarian :53
## 1st Qu.: 3.89 1st Qu.: 4.380 1st Qu.: 3.530 Hybrid regime :39
## Median : 5.56 Median : 5.630 Median : 5.880 Flawed democracy:55
## Mean : 5.25 Mean : 5.594 Mean : 5.768 Full democracy :20
## 3rd Qu.: 6.67 3rd Qu.: 6.250 3rd Qu.: 8.240
## Max. :10.00 Max. :10.000 Max. :10.000
## continent
## Length:167
## Class :character
## Mode :character
##
##
##
RELACIÓN ENTRE SCORE Y CONTINENTE (RELACIÓN NUMÉRICA-CATEGÓRICA)
library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
ggqqplot(data=indice,x="score") + facet_grid(. ~ continent)
f1=formula(score~continent)
normalidadTest=function(x) {y =shapiro.test(x);
c(y$statistic, y$p.value)}
resultado= aggregate(f1, indice,
FUN = normalidadTest)
library(knitr)
shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("SW_Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))
continent | SW_Statistic | Probabilidad |
---|---|---|
Africa | 0.9653422 | 0.1487353 |
Asia | 0.9486915 | 0.0579857 |
Europe | 0.9370922 | 0.0168389 |
North America | 0.9740888 | 0.9260364 |
Oceania | 0.7752667 | 0.0647579 |
South America | 0.8433322 | 0.0304021 |
kruskal.test(f1,indice)
##
## Kruskal-Wallis rank sum test
##
## data: score by continent
## Kruskal-Wallis chi-squared = 52.932, df = 5, p-value = 3.473e-10
PROBABILIDAD MENOR A 0.05
ggplot(data=indice, aes(x=continent, y=score)) + geom_boxplot(notch = T)
## notch went outside hinges. Try setting notch=FALSE.
## notch went outside hinges. Try setting notch=FALSE.
RELACION CONTINENTE Y TIPO DE RÉGIMEN (RELACIÓN CATEGÓRICA-CATEGÓRICA)
TABLA DE CONTINGENCIA
columna=indice$continent
fila=indice$regimetype
(t=table(fila,columna))
## columna
## fila Africa Asia Europe North America Oceania South America
## Authoritarian 26 20 4 2 0 1
## Hybrid regime 15 9 9 4 1 1
## Flawed democracy 8 13 18 6 1 9
## Full democracy 1 0 14 2 2 1
EN PORCENTAJES:
prop_t=prop.table(t,margin = 2)
round(prop_t,2)
## columna
## fila Africa Asia Europe North America Oceania South America
## Authoritarian 0.52 0.48 0.09 0.14 0.00 0.08
## Hybrid regime 0.30 0.21 0.20 0.29 0.25 0.08
## Flawed democracy 0.16 0.31 0.40 0.43 0.25 0.75
## Full democracy 0.02 0.00 0.31 0.14 0.50 0.08
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
balloonplot(t(prop_t), main ="tabla",
label = T, show.margins = FALSE)
DETERMINAR INDEPENDENCIA
chisq.test(t,simulate.p.value = T)
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: t
## X-squared = 64.445, df = NA, p-value = 0.0004998
si no hay independencia hallar intesidad