library(htmltab)
linkPage= 'https://www.nationsonline.org/oneworld/corruption.htm'
linkTabla= "/html/body/table[3]/tbody"
corruption = htmltab(doc = linkPage, which =linkTabla)
## Neither <thead> nor <th> information found. Taking first table row for the header. If incorrect, specifiy header argument.
## Warning: Columns [ ] seem to have no data and are removed. Use
## rm_nodata_cols = F to suppress this behavior
names(corruption)
## [1] "Rank" "Country" "2016 Score" "2015 Score" "2014 Score"
## [6] "2013 Score" "2012 Score" "Region"
quedate con los scores para el 2016 y claro Countyr y Region
corruption=corruption[,c(2,3,8)]
names(corruption)
## [1] "Country" "2016 Score" "Region"
cambio el nbombre “score2016” para evitar espacios en blanco
names(corruption)[2]="score2016"
identificando tipo de varibale
str(corruption)
## 'data.frame': 177 obs. of 3 variables:
## $ Country : chr "Denmark" "New Zealand" "Finland" "Sweden" ...
## $ score2016: chr "90" "90" "89" "88" ...
## $ Region : chr "Europe" "Asia Pacific" "Europe" "Europe" ...
corruption$score2016=as.numeric(corruption$score2016)
## Warning: NAs introduced by coercion
corruption[!complete.cases(corruption$score2016),]
## Country score2016
## 178 To get in-depth information visit:Transparency International NA
## Region
## 178 To get in-depth information visit:Transparency International
corruption=corruption[complete.cases(corruption$score2016),]
corruption
## Country score2016
## 2 Denmark 90
## 3 New Zealand 90
## 4 Finland 89
## 5 Sweden 88
## 6 Switzerland 86
## 7 Norway 85
## 8 Singapore 84
## 9 Netherlands 83
## 10 Canada 82
## 11 Germany 81
## 12 Luxembourg 81
## 13 United Kingdom 81
## 14 Australia 79
## 15 Iceland 78
## 16 Belgium 77
## 17 Hong Kong 77
## 18 Austria 75
## 19 United States 74
## 20 Ireland 73
## 21 Japan 72
## 22 Uruguay 71
## 23 Estonia 70
## 24 France 69
## 25 Bahamas 66
## 26 Chile 66
## 27 United Arab Emirates 66
## 28 Bhutan 65
## 29 Israel 64
## 30 Poland 62
## 31 Portugal 62
## 32 Barbados 61
## 33 Qatar 61
## 34 Slovenia 61
## 35 Taiwan 61
## 36 Botswana 60
## 37 Saint Lucia 60
## 38 Saint Vincent and the Grenadines 60
## 39 Cape Verde 59
## 40 Dominica 59
## 41 Lithuania 59
## 42 Brunei 58
## 43 Costa Rica 58
## 44 Spain 58
## 45 Georgia 57
## 46 Latvia 57
## 47 Grenada 56
## 48 Cyprus 55
## 49 Czech Republic 55
## 50 Malta 55
## 51 Mauritius 54
## 52 Rwanda 54
## 53 Korea (South) 53
## 54 Namibia 52
## 55 Slovakia 51
## 56 Croatia 49
## 57 Malaysia 49
## 58 Hungary 48
## 59 Jordan 48
## 60 Romania 48
## 61 Cuba 47
## 62 Italy 47
## 63 Sao Tome and Principe 46
## 64 Saudi Arabia 46
## 65 Montenegro 45
## 66 Oman 45
## 67 Senegal 45
## 68 South Africa 45
## 69 Suriname 45
## 70 Greece 44
## 71 Bahrain 43
## 72 Ghana 43
## 73 Burkina Faso 42
## 74 Serbia 42
## 75 Solomon Islands 42
## 76 Bulgaria 41
## 77 Kuwait 41
## 78 Tunisia 41
## 79 Turkey 41
## 80 Belarus 40
## 81 Brazil 40
## 82 China 40
## 83 India 40
## 84 Albania 39
## 85 Bosnia and Herzegovina 39
## 86 Jamaica 39
## 87 Lesotho 39
## 88 Mongolia 38
## 89 Panama 38
## 90 Zambia 38
## 91 Colombia 37
## 92 Indonesia 37
## 93 Liberia 37
## 94 Morocco 37
## 95 Macedonia (Rep. of) 37
## 96 Argentina 36
## 97 Benin 36
## 98 El Salvador 36
## 99 Kosovo 36
## 100 Maldives 36
## 101 Sri Lanka 36
## 102 Gabon 35
## 103 Niger 35
## 104 Peru 35
## 105 Philippines 35
## 106 Thailand 35
## 107 Timor-Leste 35
## 108 Trinidad and Tobago 35
## 109 Algeria 34
## 110 Côte d´Ivoire 34
## 111 Egypt 34
## 112 Ethiopia 34
## 113 Guyana 34
## 114 Armenia 33
## 115 Bolivia 33
## 116 Vietnam 33
## 117 Mali 32
## 118 Pakistan 32
## 119 Tanzania 32
## 120 Togo 32
## 121 Dominican Republic 31
## 122 Ecuador 31
## 123 Malawi 31
## 124 Azerbaijan 30
## 125 Djibouti 30
## 126 Honduras 30
## 127 Laos 30
## 128 Mexico 30
## 129 Moldova 30
## 130 Paraguay 30
## 131 Sierra Leone 30
## 132 Iran 29
## 133 Kazakhstan 29
## 134 Nepal 29
## 135 Russia 29
## 136 Ukraine 29
## 137 Guatemala 28
## 138 Kyrgyzstan 28
## 139 Lebanon 28
## 140 Myanmar 28
## 141 Nigeria 28
## 142 Papua New Guinea 28
## 143 Guinea 27
## 144 Mauritania 27
## 145 Mozambique 27
## 146 Bangladesh 26
## 147 Cameroon 26
## 148 Gambia 26
## 149 Kenya 26
## 150 Madagascar 26
## 151 Nicaragua 26
## 152 Tajikistan 25
## 153 Uganda 25
## 154 Comoros 24
## 155 Turkmenistan 22
## 156 Zimbabwe 22
## 157 Cambodia 21
## 158 Democratic Republic of Congo 21
## 159 Uzbekistan 21
## 160 Burundi 20
## 161 Central African Republic 20
## 162 Chad 20
## 163 Haiti 20
## 164 Republic of Congo 20
## 165 Angola 18
## 166 Eritrea 18
## 167 Iraq 17
## 168 Venezuela 17
## 169 Guinea-Bissau 16
## 170 Afghanistan 15
## 171 Libya 14
## 172 Sudan 14
## 173 Yemen 14
## 174 Syria 13
## 175 Korea (North) 12
## 176 South Sudan 11
## 177 Somalia 10
## Region
## 2 Europe
## 3 Asia Pacific
## 4 Europe
## 5 Europe
## 6 Europe
## 7 Europe
## 8 Maritime Southeast Asia
## 9 Europe
## 10 Americas
## 11 Europe
## 12 Europe
## 13 Europe
## 14 Asia Pacific
## 15 Europe
## 16 Europe
## 17 Asia
## 18 Europe
## 19 Americas
## 20 Europe
## 21 East Asia
## 22 South America
## 23 Europe
## 24 Europe
## 25 Americas
## 26 South America
## 27 Middle East
## 28 Asia Pacific
## 29 Middle East
## 30 Europe
## 31 Europe
## 32 Caribbean
## 33 Middle East
## 34 Europe
## 35 Maritime Southeast Asia
## 36 Sub Saharan Africa
## 37 Caribbean
## 38 Caribbean
## 39 Sub Saharan Africa
## 40 Caribbean
## 41 Europe and Central Asia
## 42 Maritime Southeast Asia
## 43 Americas
## 44 Europe
## 45 Europe
## 46 Europe
## 47 Americas
## 48 Europe and Western Asia
## 49 Europe
## 50 Europe
## 51 Sub Saharan Africa
## 52 Sub Saharan Africa
## 53 East Asia
## 54 Sub Saharan Africa
## 55 Europe
## 56 Europe
## 57 Maritime Southeast Asia
## 58 Europe
## 59 Middle East
## 60 Europe
## 61 Caribbean
## 62 Europe
## 63 Sub Saharan Africa
## 64 Middle East
## 65 Europe
## 66 Middle East
## 67 Sub Saharan Africa
## 68 Sub Saharan Africa
## 69 South America
## 70 Europe
## 71 Middle East
## 72 Sub Saharan Africa
## 73 Sub Saharan Africa
## 74 Europe
## 75 Asia Pacific
## 76 Europe
## 77 Middle East
## 78 North Africa
## 79 Europe and Western Asia
## 80 Europe
## 81 South America
## 82 East Asia
## 83 South Asia
## 84 Europe
## 85 Europe
## 86 Americas
## 87 Sub Saharan Africa
## 88 Asia Pacific
## 89 Americas
## 90 Sub Saharan Africa
## 91 Americas
## 92 Asia Pacific
## 93 Sub Saharan Africa
## 94 North Africa
## 95 Europe
## 96 Americas
## 97 Sub Saharan Africa
## 98 Americas
## 99 Europe
## 100 South Asia
## 101 South Asia
## 102 Sub Saharan Africa
## 103 Sub Saharan Africa
## 104 Americas
## 105 Maritime South East Asia
## 106 South East Asia
## 107 Maritime South East Asia
## 108 Caribbean
## 109 North Africa
## 110 Sub Saharan Africa
## 111 Middle East and North Africa
## 112 Sub Saharan Africa
## 113 South America
## 114 Europe and Western Asia
## 115 South America
## 116 South East Asia
## 117 Sub Saharan Africa
## 118 South Asia and Central Asia
## 119 Sub Saharan Africa
## 120 Sub Saharan Africa
## 121 Caribbean
## 122 South America
## 123 Sub Saharan Africa
## 124 Central Asia
## 125 Sub Saharan Africa
## 126 Central America
## 127 South East Asia
## 128 North America
## 129 Europe
## 130 South America
## 131 Sub Saharan Africa
## 132 Middle East
## 133 Central Asia
## 134 South Asia
## 135 Europe and Northern Asia
## 136 Europe
## 137 Central America
## 138 Central Asia
## 139 Middle East
## 140 South East Asia
## 141 Sub Saharan Africa
## 142 Maritime South East Asia
## 143 Sub Saharan Africa
## 144 North Africa
## 145 Sub Saharan Africa
## 146 South Asia
## 147 Sub Saharan Africa
## 148 Sub Saharan Africa
## 149 Sub Saharan Africa
## 150 Sub Saharan Africa
## 151 Central America
## 152 Central Asia
## 153 Sub Saharan Africa
## 154 Sub Saharan Africa
## 155 Central Asia
## 156 Sub Saharan Africa
## 157 South East Asia
## 158 Sub Saharan Africa
## 159 Central Asia
## 160 Sub Saharan Africa
## 161 Sub Saharan Africa
## 162 Sub Saharan Africa
## 163 Caribbean
## 164 Sub Saharan Africa
## 165 Sub Saharan Africa
## 166 Sub Saharan Africa
## 167 Middle East
## 168 South America
## 169 Sub Saharan Africa
## 170 Central Asia
## 171 North Africa
## 172 North Africa
## 173 Middle East
## 174 Middle East
## 175 Asia Pacific
## 176 Sub Saharan Africa
## 177 Sub Saharan Africa
hasta aqui tenemos:
head(corruption) #comando para ver las 6 primeras filas de tu data
## Country score2016 Region
## 2 Denmark 90 Europe
## 3 New Zealand 90 Asia Pacific
## 4 Finland 89 Europe
## 5 Sweden 88 Europe
## 6 Switzerland 86 Europe
## 7 Norway 85 Europe
no tenemos ordinales, pero esa numerica (el score) la conviertiremos en ordinal. para ello la organix¿zamos en 20 grupos.
hacer 10 intervalos con el score 2016
corruption$nivel=cut(corruption$score2016,
breaks = 10,
labels = c(1:10),
ordered_result = T)
head(corruption)
## Country score2016 Region nivel
## 2 Denmark 90 Europe 10
## 3 New Zealand 90 Asia Pacific 10
## 4 Finland 89 Europe 10
## 5 Sweden 88 Europe 10
## 6 Switzerland 86 Europe 10
## 7 Norway 85 Europe 10
Se nota que a mayor numero del grupo, menos corrupcion. Exploremos la variable corruption$nivel, que es nuestra ordinal.
#exploracion ordinal 1. crear tabla de frecuencuas para graficas
library(questionr)
library(magrittr)
freqCorrup=freq(corruption$nivel,cum = T)%>%data.frame()
freqCorrup=data.frame(nivel=row.names(freqCorrup),freqCorrup,row.names = NULL)
freqCorrup
## nivel n X. val. X.cum val.cum
## 1 1 13 7.4 7.4 7.4 7.4
## 2 2 19 10.8 10.8 18.2 18.2
## 3 3 37 21.0 21.0 39.2 39.2
## 4 4 36 20.5 20.5 59.7 59.7
## 5 5 17 9.7 9.7 69.3 69.3
## 6 6 14 8.0 8.0 77.3 77.3
## 7 7 17 9.7 9.7 86.9 86.9
## 8 8 6 3.4 3.4 90.3 90.3
## 9 9 9 5.1 5.1 95.5 95.5
## 10 10 8 4.5 4.5 100.0 100.0
OUEDES RENOMBRAR ASI ESTA TABLA:
names(freqCorrup)[2:6] =c("absoluta", "relativa", "relativaVAL", "relativaCum", "relativaCumVal")
freqCorrup
## nivel absoluta relativa relativaVAL relativaCum relativaCumVal
## 1 1 13 7.4 7.4 7.4 7.4
## 2 2 19 10.8 10.8 18.2 18.2
## 3 3 37 21.0 21.0 39.2 39.2
## 4 4 36 20.5 20.5 59.7 59.7
## 5 5 17 9.7 9.7 69.3 69.3
## 6 6 14 8.0 8.0 77.3 77.3
## 7 7 17 9.7 9.7 86.9 86.9
## 8 8 6 3.4 3.4 90.3 90.3
## 9 9 9 5.1 5.1 95.5 95.5
## 10 10 8 4.5 4.5 100.0 100.0
grafico:
library(ggplot2)
base = ggplot(data=freqCorrup,aes(x=nivel, y=absoluta))
base1= base + scale_x_discrete(limits=freqCorrup$nivel)
bar1 = base1 + geom_bar(stat = "identity")
bar1
bar1 + labs(x= "nivel",
y= "cantidad",
title= "¿la mayoria de paises estna libres de corrupcion?",
subtitle= "(por arriba del nivle 8)",
caption= "fuente=transparency international")
el grafico de paretti ¿¿o muestra dos medidas, los conteos y los porcentajes acumulados, por tradicion se trata de detectar que los valores representan un 80%.
library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
pareto.chart(table(corruption$nivel),cumperc = c(0,50,80,100))
##
## Pareto chart analysis for table(corruption$nivel)
## Frequency Cum.Freq. Percentage Cum.Percent.
## 3 37.000000 37.000000 21.022727 21.022727
## 4 36.000000 73.000000 20.454545 41.477273
## 2 19.000000 92.000000 10.795455 52.272727
## 5 17.000000 109.000000 9.659091 61.931818
## 7 17.000000 126.000000 9.659091 71.590909
## 6 14.000000 140.000000 7.954545 79.545455
## 1 13.000000 153.000000 7.386364 86.931818
## 9 9.000000 162.000000 5.113636 92.045455
## 10 8.000000 170.000000 4.545455 96.590909
## 8 6.000000 176.000000 3.409091 100.000000
se nota que el 80% de los paises estan de los nivels del 1 al 7 en la escala ordinal ya podemos pedir el boxplot:
box=ggplot(corruption,aes(y=as.numeric(nivel)))+ geom_boxplot()
box
al convertir el ordinal en nuemrico, aparece en la vertical numeros decimales. Podemos ver las etiquetas asi:
box + scale_y_discrete(limits = freqCorrup$nivel)
estadisticos
centrales
la moda
library(DescTools)
Mode(corruption$nivel)
## [1] "3"
EL NIVEL TRES ES EL QUE MAS SE REPITE LA MEDIANA
Median(corruption$nivel)
## [1] 4
## Levels: 1 < 2 < 3 < 4 < 5 < 6 < 7 < 8 < 9 < 10
disoercion variaicon modal
dataTable=table(corruption$nivel)
1-max(prop.table(dataTable))
## [1] 0.7897727
la moda No representa al 79% de los paises concentracion herfindahl hischman
Herfindahl(dataTable)
## [1] 0.1339747
lakso
1/sum(prop.table(dataTable)**2)
## [1] 7.464096
hay 7 grupos representativos
DIATANCIA INTERQUARTIL
IQR(corruption$nivel)
## [1] 3
desviacion absoluta de la mediana
mad(as.numeric(corruption$nivel))
## [1] 1.4826