Данные из википедии

https://en.wikipedia.org/wiki/Democracy_Index

library(httr)
library(XML)
## Warning: package 'XML' was built under R version 3.5.2
r <- GET("https://en.wikipedia.org/wiki/Democracy_Index")
r
## Response [https://en.wikipedia.org/wiki/Democracy_Index]
##   Date: 2019-11-10 08:28
##   Status: 200
##   Content-Type: text/html; charset=UTF-8
##   Size: 244 kB
## <!DOCTYPE html>
## <html class="client-nojs" lang="en" dir="ltr">
## <head>
## <meta charset="UTF-8"/>
## <title>Democracy Index - Wikipedia</title>
## <script>document.documentElement.className="client-js";RLCONF={"wgBreakF...
## "Articles with specifically marked weasel-worded phrases from November 2...
## "wgNoticeProject":"wikipedia","wgWikibaseItemId":"Q326174","wgCentralAut...
## "ext.gadget.watchlist-notice","ext.gadget.DRN-wizard","ext.gadget.charin...
## <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.implement("user.t...
## ...
doc <- readHTMLTable(
  doc = content(r, "text"))
str(doc)
## List of 7
##  $ NULL                  :'data.frame':  1 obs. of  4 variables:
##   ..$ V1: Factor w/ 1 level "Full democracies   9.01–10   8.01–9": 1
##   ..$ V2: Factor w/ 1 level "Flawed democracies   7.01–8   6.01–7": 1
##   ..$ V3: Factor w/ 1 level "Hybrid regimes   5.01–6   4.01–5": 1
##   ..$ V4: Factor w/ 1 level "Authoritarian regimes   3.01–4    2.01–3    0–2": 1
##  $ NULL                  :'data.frame':  9 obs. of  1 variable:
##   ..$ V1: Factor w/ 9 levels "Anarchism\nKleroterion\nDemocratic capitalism\nDemocratic centralism\nDemocratic confederalism\nDemocratic repu"| __truncated__,..: 5 3 4 8 2 7 1 6 9
##  $ Democracy Index 2018
## :'data.frame':   169 obs. of  10 variables:
##   ..$ V1 : Factor w/ 168 levels "1","10","100",..: 160 1 73 83 93 103 112 148 129 138 ...
##   ..$ V2 : Factor w/ 168 levels "1.93","2.37",..: 53 123 83 148 117 59 88 21 69 28 ...
##   ..$ V3 : Factor w/ 158 levels "0.08","0.83",..: 158 157 156 155 154 153 151 156 150 149 ...
##   ..$ V4 : Factor w/ 67 levels "0.00","0.08",..: 67 14 14 65 14 14 65 66 14 14 ...
##   ..$ V5 : Factor w/ 46 levels "0.00","0.07",..: 46 45 44 45 44 44 40 39 43 43 ...
##   ..$ V6 : Factor w/ 24 levels "1.11","1.67",..: 24 3 23 21 23 21 21 22 21 20 ...
##   ..$ V7 : Factor w/ 30 levels "0.88","1.25",..: 30 5 5 5 25 29 5 5 27 27 ...
##   ..$ V8 : Factor w/ 38 levels "0.00","0.59",..: 35 33 33 32 6 31 6 37 33 6 ...
##   ..$ V9 : Factor w/ 11 levels "Africa","Asia",..: 10 6 6 6 6 6 6 8 6 6 ...
##   ..$ V10: Factor w/ 7 levels "Africa","Asia",..: 3 4 4 4 6 4 4 NA 4 6 ...
##  $ NULL                  :'data.frame':  5 obs. of  5 variables:
##   ..$ V1: Factor w/ 5 levels "Authoritarian regimes",..: 5 3 2 4 1
##   ..$ V2: Factor w/ 5 levels "4 < s = 6","6 < s = 8",..: 5 3 2 1 4
##   ..$ V3: Factor w/ 5 levels "20","39","53",..: 5 1 4 2 3
##   ..$ V4: Factor w/ 5 levels "12.0","23.4",..: 5 1 4 2 3
##   ..$ V5: Factor w/ 5 levels "16.7","35.6",..: 5 3 4 1 2
##  $ NULL                  :'data.frame':  9 obs. of  14 variables:
##   ..$ V1 : Factor w/ 9 levels "","1","2","3",..: 9 2 3 4 5 6 7 8 1
##   ..$ V2 : Factor w/ 9 levels "Asia and Australasia",..: 6 5 8 3 1 2 7 4 9
##   ..$ V3 : Factor w/ 8 levels "167","2","20",..: 8 2 4 5 6 6 7 3 1
##   ..$ V4 : Factor w/ 9 levels "2006[4]","3.54",..: 1 9 8 7 4 6 3 2 5
##   ..$ V5 : Factor w/ 9 levels "2008[13]","3.48",..: 1 9 8 7 5 6 3 2 4
##   ..$ V6 : Factor w/ 9 levels "2010[3]","3.52",..: 1 9 8 7 5 6 3 2 4
##   ..$ V7 : Factor w/ 9 levels "2011[14]","3.62",..: 1 9 8 7 6 5 3 2 4
##   ..$ V8 : Factor w/ 9 levels "2012[2]","3.73",..: 1 9 8 7 6 4 3 2 5
##   ..$ V9 : Factor w/ 8 levels "2013[15]","3.68",..: 1 8 7 6 5 4 3 2 4
##   ..$ V10: Factor w/ 9 levels "2014[16]","3.65",..: 1 9 8 7 6 5 3 2 4
##   ..$ V11: Factor w/ 8 levels "2015[6]","3.58",..: 1 8 7 6 5 4 3 2 4
##   ..$ V12: Factor w/ 9 levels "2016[17]","3.56",..: 1 9 8 7 6 4 3 2 5
##   ..$ V13: Factor w/ 9 levels "2017[18]","3.54",..: 1 9 8 7 6 4 3 2 5
##   ..$ V14: Factor w/ 9 levels "2018[1]","3.54",..: 1 9 8 7 6 4 3 2 5
##  $ NULL                  :'data.frame':  1 obs. of  2 variables:
##   ..$ V1: Factor w/ 1 level "": 1
##   ..$ V2: Factor w/ 1 level "Wikimedia Commons has media related to Democracy Index.": 1
##  $ NULL                  :'data.frame':  8 obs. of  2 variables:
##   ..$ V1: Factor w/ 8 levels "Competitiveness",..: 8 3 2 1 4 7 6 5
##   ..$ V2: Factor w/ 6 levels "Bribes\nCorruption barometer\nCorruption perceptions",..: NA 5 1 2 4 6 3 NA

Сохраним третью таблицу

DI2018 <- doc[[3]]

Посмотрим на первые 20 строк

Что не так?

library(kableExtra)
## Warning: package 'kableExtra' was built under R version 3.5.3
DI2018[1:20,] %>%
  kable() %>%
  kable_styling()
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
Rank Country Score Electoral processand pluralism Functioning ofgovernment Politicalparticipation Politicalculture Civilliberties Regimetype Continent
1 Norway 9.87 10.00 9.64 10.00 10.00 9.71 Full democracy Europe
2 Iceland 9.58 10.00 9.29 8.89 10.00 9.71 Full democracy Europe
3 Sweden 9.39 9.58 9.64 8.33 10.00 9.41 Full democracy Europe
4 New Zealand 9.26 10.00 9.29 8.89 8.13 10.00 Full democracy Oceania
5 Denmark 9.22 10.00 9.29 8.33 9.38 9.12 Full democracy Europe
6 Ireland 9.15 9.58 7.86 8.33 10.00 10.00 Full democracy Europe
Canada 9.15 9.58 9.64 7.78 8.75 10.00 Full democracy North America NA
8 Finland 9.14 10.00 8.93 8.33 8.75 9.71 Full democracy Europe
9 Australia 9.09 10.00 8.93 7.78 8.75 10.00 Full democracy Oceania
10 Switzerland 9.03 9.58 9.29 7.78 9.38 9.12 Full democracy Europe
11 Netherlands 8.89 9.58 9.29 8.33 8.13 9.12 Full democracy Europe
12 Luxembourg 8.81 10.00 8.93 6.67 8.75 9.71 Full democracy Europe
13 Germany 8.68 9.58 8.57 8.33 7.50 9.41 Full democracy Europe
14 United Kingdom 8.53 9.58 7.50 8.33 8.13 9.12 Full democracy Europe
15 Uruguay 8.38 10.00 8.57 6.11 7.50 9.71 Full democracy South America
16 Austria 8.29 9.58 7.86 8.33 6.88 8.82 Full democracy Europe
17 Mauritius 8.22 9.17 8.21 5.56 8.75 9.41 Full democracy Africa
18 Malta 8.21 9.17 8.21 6.11 8.75 8.82 Full democracy Europe
19 Spain 8.08 9.17 7.14 7.78 7.50 8.82 Full democracy Europe
names(DI2018) <- c("rank", "country", "score", "EPP",  "FofG", "PP", "PC", "CL","regime", "continent")
DI2018 <- DI2018[-1,]
for (i in 1:10){
  DI2018[, i] <- as.character(DI2018[, i])
}

Убираем последнюю пустую строчку

DI2018 <-  DI2018[-168, ]

Сдвигаем строчки, в которых нет значения для континента

for (i in 1:nrow(DI2018))
{
  if(is.na(DI2018$continent[i]))
  {
    DI2018[i, 2:10] <- DI2018[i, 1:9]
  }
}

DI2018[1:10,]
##      rank     country score   EPP FofG    PP    PC    CL         regime
## 2       1      Norway  9.87 10.00 9.64 10.00 10.00  9.71 Full democracy
## 3       2     Iceland  9.58 10.00 9.29  8.89 10.00  9.71 Full democracy
## 4       3      Sweden  9.39  9.58 9.64  8.33 10.00  9.41 Full democracy
## 5       4 New Zealand  9.26 10.00 9.29  8.89  8.13 10.00 Full democracy
## 6       5     Denmark  9.22 10.00 9.29  8.33  9.38  9.12 Full democracy
## 7       6     Ireland  9.15  9.58 7.86  8.33 10.00 10.00 Full democracy
## 8  Canada      Canada  9.15  9.58 9.64  7.78  8.75 10.00 Full democracy
## 9       8     Finland  9.14 10.00 8.93  8.33  8.75  9.71 Full democracy
## 10      9   Australia  9.09 10.00 8.93  7.78  8.75 10.00 Full democracy
## 11     10 Switzerland  9.03  9.58 9.29  7.78  9.38  9.12 Full democracy
##        continent
## 2         Europe
## 3         Europe
## 4         Europe
## 5        Oceania
## 6         Europe
## 7         Europe
## 8  North America
## 9         Europe
## 10       Oceania
## 11        Europe

Исправим переменную rank

DI2018$rank <- rank(as.numeric(DI2018$score)*(-1), ties.method = "min")

Anova для индекса по континентам

Различается ли демократичность стран на разных континентах?

par(cex.axis = 0.8)
plot(as.numeric(DI2018$score) ~ as.factor(DI2018$continent))

library(car)
## Warning: package 'car' was built under R version 3.5.2
leveneTest(as.numeric(DI2018$score) ~ as.factor(DI2018$continent))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   5  1.4191   0.22
##       161
oneway.test(as.numeric(DI2018$score) ~ as.factor(DI2018$continent), var.equal = T)
## 
##  One-way analysis of means
## 
## data:  as.numeric(DI2018$score) and as.factor(DI2018$continent)
## F = 15.654, num df = 5, denom df = 161, p-value = 1.522e-12
aov.out <- aov(as.numeric(DI2018$score) ~ as.factor(DI2018$continent)) 
summary(aov.out)
##                              Df Sum Sq Mean Sq F value   Pr(>F)    
## as.factor(DI2018$continent)   5  262.7   52.54   15.65 1.52e-12 ***
## Residuals                   161  540.3    3.36                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Tukey <- TukeyHSD(aov.out)
Tukey
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = as.numeric(DI2018$score) ~ as.factor(DI2018$continent))
## 
## $`as.factor(DI2018$continent)`
##                                   diff        lwr      upr     p adj
## Asia-Africa                  0.1707619 -0.9352135 1.276737 0.9977574
## Europe-Africa                2.7363333  1.6505786 3.822088 0.0000000
## North America-Africa         1.9874286  0.3897031 3.585154 0.0058070
## Oceania-Africa               3.2685000  0.5228632 6.014137 0.0096976
## South America-Africa         2.2876667  0.5891046 3.986229 0.0020404
## Europe-Asia                  2.5655714  1.4318929 3.699250 0.0000000
## North America-Asia           1.8166667  0.1859949 3.447338 0.0194084
## Oceania-Asia                 3.0977381  0.3327996 5.862677 0.0183606
## South America-Asia           2.1169048  0.3873162 3.846493 0.0070508
## North America-Europe        -0.7489048 -2.3659305 0.868121 0.7646367
## Oceania-Europe               0.5321667 -2.2247459 3.289079 0.9935668
## South America-Europe        -0.4486667 -2.1653957 1.268062 0.9746264
## Oceania-North America        1.2810714 -1.7146639 4.276807 0.8198176
## South America-North America  0.3002381 -1.7784687 2.378945 0.9983709
## South America-Oceania       -0.9808333 -4.0315409 2.069874 0.9389445
par(mar = c(1, 20, 4, 4))
plot(Tukey, las = 2)