https://en.wikipedia.org/wiki/Democracy_Index
library(httr)
library(XML)
## Warning: package 'XML' was built under R version 3.5.2
r <- GET("https://en.wikipedia.org/wiki/Democracy_Index")
r
## Response [https://en.wikipedia.org/wiki/Democracy_Index]
## Date: 2019-11-10 08:28
## Status: 200
## Content-Type: text/html; charset=UTF-8
## Size: 244 kB
## <!DOCTYPE html>
## <html class="client-nojs" lang="en" dir="ltr">
## <head>
## <meta charset="UTF-8"/>
## <title>Democracy Index - Wikipedia</title>
## <script>document.documentElement.className="client-js";RLCONF={"wgBreakF...
## "Articles with specifically marked weasel-worded phrases from November 2...
## "wgNoticeProject":"wikipedia","wgWikibaseItemId":"Q326174","wgCentralAut...
## "ext.gadget.watchlist-notice","ext.gadget.DRN-wizard","ext.gadget.charin...
## <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.implement("user.t...
## ...
doc <- readHTMLTable(
doc = content(r, "text"))
str(doc)
## List of 7
## $ NULL :'data.frame': 1 obs. of 4 variables:
## ..$ V1: Factor w/ 1 level "Full democracies 9.01–10 8.01–9": 1
## ..$ V2: Factor w/ 1 level "Flawed democracies 7.01–8 6.01–7": 1
## ..$ V3: Factor w/ 1 level "Hybrid regimes 5.01–6 4.01–5": 1
## ..$ V4: Factor w/ 1 level "Authoritarian regimes 3.01–4 2.01–3 0–2": 1
## $ NULL :'data.frame': 9 obs. of 1 variable:
## ..$ V1: Factor w/ 9 levels "Anarchism\nKleroterion\nDemocratic capitalism\nDemocratic centralism\nDemocratic confederalism\nDemocratic repu"| __truncated__,..: 5 3 4 8 2 7 1 6 9
## $ Democracy Index 2018
## :'data.frame': 169 obs. of 10 variables:
## ..$ V1 : Factor w/ 168 levels "1","10","100",..: 160 1 73 83 93 103 112 148 129 138 ...
## ..$ V2 : Factor w/ 168 levels "1.93","2.37",..: 53 123 83 148 117 59 88 21 69 28 ...
## ..$ V3 : Factor w/ 158 levels "0.08","0.83",..: 158 157 156 155 154 153 151 156 150 149 ...
## ..$ V4 : Factor w/ 67 levels "0.00","0.08",..: 67 14 14 65 14 14 65 66 14 14 ...
## ..$ V5 : Factor w/ 46 levels "0.00","0.07",..: 46 45 44 45 44 44 40 39 43 43 ...
## ..$ V6 : Factor w/ 24 levels "1.11","1.67",..: 24 3 23 21 23 21 21 22 21 20 ...
## ..$ V7 : Factor w/ 30 levels "0.88","1.25",..: 30 5 5 5 25 29 5 5 27 27 ...
## ..$ V8 : Factor w/ 38 levels "0.00","0.59",..: 35 33 33 32 6 31 6 37 33 6 ...
## ..$ V9 : Factor w/ 11 levels "Africa","Asia",..: 10 6 6 6 6 6 6 8 6 6 ...
## ..$ V10: Factor w/ 7 levels "Africa","Asia",..: 3 4 4 4 6 4 4 NA 4 6 ...
## $ NULL :'data.frame': 5 obs. of 5 variables:
## ..$ V1: Factor w/ 5 levels "Authoritarian regimes",..: 5 3 2 4 1
## ..$ V2: Factor w/ 5 levels "4 < s = 6","6 < s = 8",..: 5 3 2 1 4
## ..$ V3: Factor w/ 5 levels "20","39","53",..: 5 1 4 2 3
## ..$ V4: Factor w/ 5 levels "12.0","23.4",..: 5 1 4 2 3
## ..$ V5: Factor w/ 5 levels "16.7","35.6",..: 5 3 4 1 2
## $ NULL :'data.frame': 9 obs. of 14 variables:
## ..$ V1 : Factor w/ 9 levels "","1","2","3",..: 9 2 3 4 5 6 7 8 1
## ..$ V2 : Factor w/ 9 levels "Asia and Australasia",..: 6 5 8 3 1 2 7 4 9
## ..$ V3 : Factor w/ 8 levels "167","2","20",..: 8 2 4 5 6 6 7 3 1
## ..$ V4 : Factor w/ 9 levels "2006[4]","3.54",..: 1 9 8 7 4 6 3 2 5
## ..$ V5 : Factor w/ 9 levels "2008[13]","3.48",..: 1 9 8 7 5 6 3 2 4
## ..$ V6 : Factor w/ 9 levels "2010[3]","3.52",..: 1 9 8 7 5 6 3 2 4
## ..$ V7 : Factor w/ 9 levels "2011[14]","3.62",..: 1 9 8 7 6 5 3 2 4
## ..$ V8 : Factor w/ 9 levels "2012[2]","3.73",..: 1 9 8 7 6 4 3 2 5
## ..$ V9 : Factor w/ 8 levels "2013[15]","3.68",..: 1 8 7 6 5 4 3 2 4
## ..$ V10: Factor w/ 9 levels "2014[16]","3.65",..: 1 9 8 7 6 5 3 2 4
## ..$ V11: Factor w/ 8 levels "2015[6]","3.58",..: 1 8 7 6 5 4 3 2 4
## ..$ V12: Factor w/ 9 levels "2016[17]","3.56",..: 1 9 8 7 6 4 3 2 5
## ..$ V13: Factor w/ 9 levels "2017[18]","3.54",..: 1 9 8 7 6 4 3 2 5
## ..$ V14: Factor w/ 9 levels "2018[1]","3.54",..: 1 9 8 7 6 4 3 2 5
## $ NULL :'data.frame': 1 obs. of 2 variables:
## ..$ V1: Factor w/ 1 level "": 1
## ..$ V2: Factor w/ 1 level "Wikimedia Commons has media related to Democracy Index.": 1
## $ NULL :'data.frame': 8 obs. of 2 variables:
## ..$ V1: Factor w/ 8 levels "Competitiveness",..: 8 3 2 1 4 7 6 5
## ..$ V2: Factor w/ 6 levels "Bribes\nCorruption barometer\nCorruption perceptions",..: NA 5 1 2 4 6 3 NA
Сохраним третью таблицу
DI2018 <- doc[[3]]
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 3.5.3
DI2018[1:20,] %>%
kable() %>%
kable_styling()
| V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | V10 |
|---|---|---|---|---|---|---|---|---|---|
| Rank | Country | Score | Electoral processand pluralism | Functioning ofgovernment | Politicalparticipation | Politicalculture | Civilliberties | Regimetype | Continent |
| 1 | Norway | 9.87 | 10.00 | 9.64 | 10.00 | 10.00 | 9.71 | Full democracy | Europe |
| 2 | Iceland | 9.58 | 10.00 | 9.29 | 8.89 | 10.00 | 9.71 | Full democracy | Europe |
| 3 | Sweden | 9.39 | 9.58 | 9.64 | 8.33 | 10.00 | 9.41 | Full democracy | Europe |
| 4 | New Zealand | 9.26 | 10.00 | 9.29 | 8.89 | 8.13 | 10.00 | Full democracy | Oceania |
| 5 | Denmark | 9.22 | 10.00 | 9.29 | 8.33 | 9.38 | 9.12 | Full democracy | Europe |
| 6 | Ireland | 9.15 | 9.58 | 7.86 | 8.33 | 10.00 | 10.00 | Full democracy | Europe |
| Canada | 9.15 | 9.58 | 9.64 | 7.78 | 8.75 | 10.00 | Full democracy | North America | NA |
| 8 | Finland | 9.14 | 10.00 | 8.93 | 8.33 | 8.75 | 9.71 | Full democracy | Europe |
| 9 | Australia | 9.09 | 10.00 | 8.93 | 7.78 | 8.75 | 10.00 | Full democracy | Oceania |
| 10 | Switzerland | 9.03 | 9.58 | 9.29 | 7.78 | 9.38 | 9.12 | Full democracy | Europe |
| 11 | Netherlands | 8.89 | 9.58 | 9.29 | 8.33 | 8.13 | 9.12 | Full democracy | Europe |
| 12 | Luxembourg | 8.81 | 10.00 | 8.93 | 6.67 | 8.75 | 9.71 | Full democracy | Europe |
| 13 | Germany | 8.68 | 9.58 | 8.57 | 8.33 | 7.50 | 9.41 | Full democracy | Europe |
| 14 | United Kingdom | 8.53 | 9.58 | 7.50 | 8.33 | 8.13 | 9.12 | Full democracy | Europe |
| 15 | Uruguay | 8.38 | 10.00 | 8.57 | 6.11 | 7.50 | 9.71 | Full democracy | South America |
| 16 | Austria | 8.29 | 9.58 | 7.86 | 8.33 | 6.88 | 8.82 | Full democracy | Europe |
| 17 | Mauritius | 8.22 | 9.17 | 8.21 | 5.56 | 8.75 | 9.41 | Full democracy | Africa |
| 18 | Malta | 8.21 | 9.17 | 8.21 | 6.11 | 8.75 | 8.82 | Full democracy | Europe |
| 19 | Spain | 8.08 | 9.17 | 7.14 | 7.78 | 7.50 | 8.82 | Full democracy | Europe |
names(DI2018) <- c("rank", "country", "score", "EPP", "FofG", "PP", "PC", "CL","regime", "continent")
DI2018 <- DI2018[-1,]
for (i in 1:10){
DI2018[, i] <- as.character(DI2018[, i])
}
Убираем последнюю пустую строчку
DI2018 <- DI2018[-168, ]
Сдвигаем строчки, в которых нет значения для континента
for (i in 1:nrow(DI2018))
{
if(is.na(DI2018$continent[i]))
{
DI2018[i, 2:10] <- DI2018[i, 1:9]
}
}
DI2018[1:10,]
## rank country score EPP FofG PP PC CL regime
## 2 1 Norway 9.87 10.00 9.64 10.00 10.00 9.71 Full democracy
## 3 2 Iceland 9.58 10.00 9.29 8.89 10.00 9.71 Full democracy
## 4 3 Sweden 9.39 9.58 9.64 8.33 10.00 9.41 Full democracy
## 5 4 New Zealand 9.26 10.00 9.29 8.89 8.13 10.00 Full democracy
## 6 5 Denmark 9.22 10.00 9.29 8.33 9.38 9.12 Full democracy
## 7 6 Ireland 9.15 9.58 7.86 8.33 10.00 10.00 Full democracy
## 8 Canada Canada 9.15 9.58 9.64 7.78 8.75 10.00 Full democracy
## 9 8 Finland 9.14 10.00 8.93 8.33 8.75 9.71 Full democracy
## 10 9 Australia 9.09 10.00 8.93 7.78 8.75 10.00 Full democracy
## 11 10 Switzerland 9.03 9.58 9.29 7.78 9.38 9.12 Full democracy
## continent
## 2 Europe
## 3 Europe
## 4 Europe
## 5 Oceania
## 6 Europe
## 7 Europe
## 8 North America
## 9 Europe
## 10 Oceania
## 11 Europe
Исправим переменную rank
DI2018$rank <- rank(as.numeric(DI2018$score)*(-1), ties.method = "min")
Различается ли демократичность стран на разных континентах?
par(cex.axis = 0.8)
plot(as.numeric(DI2018$score) ~ as.factor(DI2018$continent))
library(car)
## Warning: package 'car' was built under R version 3.5.2
leveneTest(as.numeric(DI2018$score) ~ as.factor(DI2018$continent))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 1.4191 0.22
## 161
oneway.test(as.numeric(DI2018$score) ~ as.factor(DI2018$continent), var.equal = T)
##
## One-way analysis of means
##
## data: as.numeric(DI2018$score) and as.factor(DI2018$continent)
## F = 15.654, num df = 5, denom df = 161, p-value = 1.522e-12
aov.out <- aov(as.numeric(DI2018$score) ~ as.factor(DI2018$continent))
summary(aov.out)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(DI2018$continent) 5 262.7 52.54 15.65 1.52e-12 ***
## Residuals 161 540.3 3.36
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Tukey <- TukeyHSD(aov.out)
Tukey
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = as.numeric(DI2018$score) ~ as.factor(DI2018$continent))
##
## $`as.factor(DI2018$continent)`
## diff lwr upr p adj
## Asia-Africa 0.1707619 -0.9352135 1.276737 0.9977574
## Europe-Africa 2.7363333 1.6505786 3.822088 0.0000000
## North America-Africa 1.9874286 0.3897031 3.585154 0.0058070
## Oceania-Africa 3.2685000 0.5228632 6.014137 0.0096976
## South America-Africa 2.2876667 0.5891046 3.986229 0.0020404
## Europe-Asia 2.5655714 1.4318929 3.699250 0.0000000
## North America-Asia 1.8166667 0.1859949 3.447338 0.0194084
## Oceania-Asia 3.0977381 0.3327996 5.862677 0.0183606
## South America-Asia 2.1169048 0.3873162 3.846493 0.0070508
## North America-Europe -0.7489048 -2.3659305 0.868121 0.7646367
## Oceania-Europe 0.5321667 -2.2247459 3.289079 0.9935668
## South America-Europe -0.4486667 -2.1653957 1.268062 0.9746264
## Oceania-North America 1.2810714 -1.7146639 4.276807 0.8198176
## South America-North America 0.3002381 -1.7784687 2.378945 0.9983709
## South America-Oceania -0.9808333 -4.0315409 2.069874 0.9389445
par(mar = c(1, 20, 4, 4))
plot(Tukey, las = 2)