http://rpubs.com/Brayan_Milla/533322
library(htmltab)
data_pbi= htmltab(doc = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(PPP)_per_capita",
which ='//*[@id="mw-content-text"]/div/table[1]/tbody/tr[2]/td[1]/table',
encoding = "UTF-8")
str(data_pbi)
## 'data.frame': 193 obs. of 3 variables:
## $ Rank : chr "1" "—" "2" "3" ...
## $ Country/Territory: chr " Qatar" " Macau" " Luxembourg" " Singapore" ...
## $ Int$ : chr "130,475" "116,808" "106,705" "100,345" ...
data_pbi
## Rank Country/Territory Int$
## 2 1 Qatar 130,475
## 3 — Macau 116,808
## 4 2 Luxembourg 106,705
## 5 3 Singapore 100,345
## 6 4 Brunei 79,530
## 7 5 Ireland 78,785
## 8 6 Norway 74,356
## 9 7 United Arab Emirates 69,382
## 10 8 Kuwait 67,000
## 11 9 Switzerland 64,649
## 12 — Hong Kong 64,216
## 13 10 United States 62,606
## 14 11 San Marino 60,313
## 15 12 Netherlands 56,383
## 16 13 Saudi Arabia 55,944
## 17 14 Iceland 55,917
## 18 — Taiwan 53,023
## 19 15 Sweden 52,984
## 20 16 Germany 52,559
## 21 17 Australia 52,373
## 22 18 Austria 52,137
## 23 19 Denmark 52,121
## 24 20 Bahrain 50,057
## 25 21 Canada 49,651
## 26 22 Belgium 48,245
## 27 23 Oman 46,584
## 28 24 Finland 46,430
## 29 25 France 45,775
## 30 26 United Kingdom 45,705
## 31 27 Malta 45,606
## 32 28 Japan 44,227
## 33 29 Korea, South 41,351
## 34 30 Spain 40,139
## 35 31 New Zealand 40,135
## 36 32 Cyprus 39,973
## 37 — Puerto Rico 39,737
## 38 33 Italy 39,637
## 39 34 Israel 37,972
## 40 35 Czech Republic 37,371
## 41 36 Slovenia 36,746
## 42 37 Slovakia 35,130
## 43 38 Lithuania 34,826
## 44 39 Estonia 34,096
## 45 40 Bahamas, The 33,494
## 46 41 Trinidad and Tobago 32,254
## 47 42 Portugal 32,006
## 48 43 Poland 31,939
## 49 44 Hungary 31,903
## 50 45 Malaysia 30,860
## 51 46 Seychelles 30,505
## 52 47 Latvia 29,901
## 53 48 Saint Kitts and Nevis 29,820
## 54 49 Russia 29,267
## 55 50 Greece 29,123
## 56 51 Antigua and Barbuda 27,981
## 57 52 Turkey 27,956
## 58 53 Kazakhstan 27,550
## 59 54 Romania 26,447
## 60 55 Croatia 26,221
## 61 56 Chile 25,978
## 62 57 Panama 25,674
## 63 58 Mauritius 23,699
## 64 59 Uruguay 23,274
## 65 60 Bulgaria 23,156
## 66 61 Equatorial Guinea 22,710
## 67 62 Maldives 21,760
## 68 63 Mexico 20,602
## 69 64 Argentina 20,537
## 70 65 Belarus 20,003
## 71 66 Iran 19,557
## 72 67 Turkmenistan 19,527
## 73 68 Thailand 19,476
## 74 69 Montenegro 19,043
## 75 70 Barbados 18,534
## 76 71 Gabon 18,496
## 77 72 Dominican Republic 18,424
## 78 73 China 18,110
## 79 74 Azerbaijan 18,076
## 80 75 Botswana 17,965
## 81 76 Iraq 17,659
## 82 77 Costa Rica 17,559
## 83 78 Serbia 17,555
## 84 — World 16,779
## 85 79 Grenada 16,167
## 86 80 Brazil 16,154
## 87 81 North Macedonia 15,709
## 88 82 Algeria 15,440
## 89 83 Suriname 15,105
## 90 84 Palau 14,952
## 91 85 Colombia 14,943
## 92 86 Lebanon 14,684
## 93 87 Saint Lucia 14,355
## 94 88 Peru 14,224
## 95 89 South Africa 13,675
## 96 90 Bosnia and Herzegovina 13,491
## 97 91 Mongolia 13,447
## 98 92 Sri Lanka 13,397
## 99 93 Paraguay 13,395
## 100 94 Egypt 13,366
## 101 95 Albania 13,345
## 102 96 Indonesia 13,230
## 103 97 Tunisia 12,372
## 104 98 Nauru 12,326
## 105 99 Saint Vincent and the Grenadines 11,956
## 106 100 Ecuador 11,718
## 107 — Kosovo 11,552
## 108 101 Georgia 11,485
## 109 102 Libya 11,469
## 110 103 Namibia 11,229
## 111 104 Eswatini 11,020
## 112 105 Fiji 10,234
## 113 106 Armenia 10,176
## 114 107 Dominica 9,886
## 115 108 Bhutan 9,540
## 116 109 Jamaica 9,447
## 117 110 Jordan 9,433
## 118 111 Ukraine 9,283
## 119 112 Philippines 8,936
## 120 113 Morocco 8,933
## 121 114 Guyana 8,519
## 122 115 Belize 8,501
## 123 116 Guatemala 8,436
## 124 117 El Salvador 8,041
## 125 118 Laos 7,925
## 126 119 India 7,874
## 127 120 Uzbekistan 7,665
## 128 121 Vietnam 7,510
## 129 122 Bolivia 7,477
## 130 123 Cape Verde 7,316
## 131 124 Moldova 7,305
## 132 125 Angola 6,814
## 133 126 Congo, Republic of the 6,799
## 134 127 Myanmar 6,511
## 135 128 Ghana 6,452
## 136 129 Tonga 6,111
## 137 130 Nigeria 6,027
## 138 131 Samoa 5,890
## 139 132 Nicaragua 5,683
## 140 133 Pakistan 5,680
## 141 134 Timor-Leste 5,242
## 142 135 Honduras 5,212
## 143 136 Bangladesh 4,620
## 144 137 Cambodia 4,335
## 145 138 Sudan 4,232
## 146 139 Côte d'Ivoire 4,178
## 147 140 Zambia 4,104
## 148 141 Tuvalu 4,052
## 149 142 Mauritania 3,990
## 150 143 Kyrgyzstan 3,844
## 151 144 Cameroon 3,828
## 152 145 Djibouti 3,786
## 153 146 Marshall Islands 3,697
## 154 147 Kenya 3,691
## 155 148 Papua New Guinea 3,662
## 156 149 Senegal 3,651
## 157 150 Micronesia, Federated States of 3,482
## 158 151 Tanzania 3,444
## 159 152 Tajikistan 3,416
## 160 153 Lesotho 3,494
## 161 154 São Tomé and Príncipe 3,324
## 162 155 Nepal 2,905
## 163 156 Vanuatu 2,862
## 164 157 Gambia, The 2,792
## 165 158 Zimbabwe 2,788
## 166 159 Uganda 2,498
## 167 160 Benin 2,426
## 168 161 Chad 2,415
## 169 162 Mali 2,384
## 170 163 Yemen 2,377
## 171 164 Ethiopia 2,332
## 172 165 Guinea 2,310
## 173 166 Rwanda 2,280
## 174 167 Solomon Islands 2,242
## 175 168 Kiribati 2,086
## 176 169 Afghanistan 2,017
## 177 170 Burkina Faso 1,996
## 178 171 Guinea-Bissau 1,937
## 179 172 Haiti 1,864
## 180 173 Togo 1,746
## 181 174 Eritrea 1,657
## 182 175 Comoros 1,632
## 183 176 Madagascar 1,630
## 184 177 Sierra Leone 1,620
## 185 178 South Sudan 1,502
## 186 179 Liberia 1,418
## 187 180 Mozambique 1,291
## 188 181 Niger 1,217
## 189 182 Malawi 1,199
## 190 183 Congo, Democratic Republic of the 767
## 191 184 Burundi 732
## 192 185 Central African Republic 712
## 193 — Syria n/a
## 194 — Venezuela n/a
names(data_pbi)
## [1] "Rank" "Country/Territory" "Int$"
data_pbi$`Int$`=gsub((","), (" "), data_pbi$`Int$`)
data_pbi$`Int$`=as.numeric(data_pbi$`Int$`)
## Warning: NAs introduced by coercion
names(data_pbi)[3]='PBI'
Parte 1. Exploración Gráfica NOTA: Las variables numéricas no requieren tablas de frecuencia para ser exploradas. El grafico inicial a usar es el histograma:
library(ggplot2)
base1=ggplot(data_pbi,aes(x=PBI))
histNum= base1 + geom_histogram(bins = 7)
histNum
## Warning: Removed 190 rows containing non-finite values (stat_bin).
Los atípicos se ven claramente en un boxplot:
base2=ggplot(data_pbi,aes(y=PBI))
box=base2 + geom_boxplot() + coord_flip()
box
## Warning: Removed 190 rows containing non-finite values (stat_boxplot).
No hay atípicos.
Para ser más preciso en nuestra exploración, debemos calcular diversos indicadores estadísticos.
Parte 2. Exploración con Estadígrafos Los estadigrafos aparecen rapidamente así:
summary(data_pbi$PBI)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 712.0 722.0 732.0 737.0 749.5 767.0 190
Podriamos graficarlos en el boxplot:
library(ggplot2)
estadigrafos=round(as.vector(summary(data_pbi$PBI)),2)
box + scale_y_continuous(breaks = estadigrafos)
## Warning: Removed 190 rows containing non-finite values (stat_boxplot).
No hay atípicos
library(DescTools)
Skew(data_pbi$PBI,conf.level = 0,05)
## skew lwr.ci upr.ci
## 0.1738116 -0.3849002 -0.3849002
La distancia intercuartilica es importante saberla:
IQR(data_pbi$PBI, na.rm = T)
## [1] 27.5
q3=as.numeric(summary(data_pbi$PBI)[5])
umbral= q3+1.5*IQR(data_pbi$PBI, na.rm = T)
umbral
## [1] 790.75
data_pbi[data_pbi$PBI>umbral,]
## Rank Country/Territory PBI
## NA <NA> <NA> NA
## NA.1 <NA> <NA> NA
## NA.2 <NA> <NA> NA
## NA.3 <NA> <NA> NA
## NA.4 <NA> <NA> NA
## NA.5 <NA> <NA> NA
## NA.6 <NA> <NA> NA
## NA.7 <NA> <NA> NA
## NA.8 <NA> <NA> NA
## NA.9 <NA> <NA> NA
## NA.10 <NA> <NA> NA
## NA.11 <NA> <NA> NA
## NA.12 <NA> <NA> NA
## NA.13 <NA> <NA> NA
## NA.14 <NA> <NA> NA
## NA.15 <NA> <NA> NA
## NA.16 <NA> <NA> NA
## NA.17 <NA> <NA> NA
## NA.18 <NA> <NA> NA
## NA.19 <NA> <NA> NA
## NA.20 <NA> <NA> NA
## NA.21 <NA> <NA> NA
## NA.22 <NA> <NA> NA
## NA.23 <NA> <NA> NA
## NA.24 <NA> <NA> NA
## NA.25 <NA> <NA> NA
## NA.26 <NA> <NA> NA
## NA.27 <NA> <NA> NA
## NA.28 <NA> <NA> NA
## NA.29 <NA> <NA> NA
## NA.30 <NA> <NA> NA
## NA.31 <NA> <NA> NA
## NA.32 <NA> <NA> NA
## NA.33 <NA> <NA> NA
## NA.34 <NA> <NA> NA
## NA.35 <NA> <NA> NA
## NA.36 <NA> <NA> NA
## NA.37 <NA> <NA> NA
## NA.38 <NA> <NA> NA
## NA.39 <NA> <NA> NA
## NA.40 <NA> <NA> NA
## NA.41 <NA> <NA> NA
## NA.42 <NA> <NA> NA
## NA.43 <NA> <NA> NA
## NA.44 <NA> <NA> NA
## NA.45 <NA> <NA> NA
## NA.46 <NA> <NA> NA
## NA.47 <NA> <NA> NA
## NA.48 <NA> <NA> NA
## NA.49 <NA> <NA> NA
## NA.50 <NA> <NA> NA
## NA.51 <NA> <NA> NA
## NA.52 <NA> <NA> NA
## NA.53 <NA> <NA> NA
## NA.54 <NA> <NA> NA
## NA.55 <NA> <NA> NA
## NA.56 <NA> <NA> NA
## NA.57 <NA> <NA> NA
## NA.58 <NA> <NA> NA
## NA.59 <NA> <NA> NA
## NA.60 <NA> <NA> NA
## NA.61 <NA> <NA> NA
## NA.62 <NA> <NA> NA
## NA.63 <NA> <NA> NA
## NA.64 <NA> <NA> NA
## NA.65 <NA> <NA> NA
## NA.66 <NA> <NA> NA
## NA.67 <NA> <NA> NA
## NA.68 <NA> <NA> NA
## NA.69 <NA> <NA> NA
## NA.70 <NA> <NA> NA
## NA.71 <NA> <NA> NA
## NA.72 <NA> <NA> NA
## NA.73 <NA> <NA> NA
## NA.74 <NA> <NA> NA
## NA.75 <NA> <NA> NA
## NA.76 <NA> <NA> NA
## NA.77 <NA> <NA> NA
## NA.78 <NA> <NA> NA
## NA.79 <NA> <NA> NA
## NA.80 <NA> <NA> NA
## NA.81 <NA> <NA> NA
## NA.82 <NA> <NA> NA
## NA.83 <NA> <NA> NA
## NA.84 <NA> <NA> NA
## NA.85 <NA> <NA> NA
## NA.86 <NA> <NA> NA
## NA.87 <NA> <NA> NA
## NA.88 <NA> <NA> NA
## NA.89 <NA> <NA> NA
## NA.90 <NA> <NA> NA
## NA.91 <NA> <NA> NA
## NA.92 <NA> <NA> NA
## NA.93 <NA> <NA> NA
## NA.94 <NA> <NA> NA
## NA.95 <NA> <NA> NA
## NA.96 <NA> <NA> NA
## NA.97 <NA> <NA> NA
## NA.98 <NA> <NA> NA
## NA.99 <NA> <NA> NA
## NA.100 <NA> <NA> NA
## NA.101 <NA> <NA> NA
## NA.102 <NA> <NA> NA
## NA.103 <NA> <NA> NA
## NA.104 <NA> <NA> NA
## NA.105 <NA> <NA> NA
## NA.106 <NA> <NA> NA
## NA.107 <NA> <NA> NA
## NA.108 <NA> <NA> NA
## NA.109 <NA> <NA> NA
## NA.110 <NA> <NA> NA
## NA.111 <NA> <NA> NA
## NA.112 <NA> <NA> NA
## NA.113 <NA> <NA> NA
## NA.114 <NA> <NA> NA
## NA.115 <NA> <NA> NA
## NA.116 <NA> <NA> NA
## NA.117 <NA> <NA> NA
## NA.118 <NA> <NA> NA
## NA.119 <NA> <NA> NA
## NA.120 <NA> <NA> NA
## NA.121 <NA> <NA> NA
## NA.122 <NA> <NA> NA
## NA.123 <NA> <NA> NA
## NA.124 <NA> <NA> NA
## NA.125 <NA> <NA> NA
## NA.126 <NA> <NA> NA
## NA.127 <NA> <NA> NA
## NA.128 <NA> <NA> NA
## NA.129 <NA> <NA> NA
## NA.130 <NA> <NA> NA
## NA.131 <NA> <NA> NA
## NA.132 <NA> <NA> NA
## NA.133 <NA> <NA> NA
## NA.134 <NA> <NA> NA
## NA.135 <NA> <NA> NA
## NA.136 <NA> <NA> NA
## NA.137 <NA> <NA> NA
## NA.138 <NA> <NA> NA
## NA.139 <NA> <NA> NA
## NA.140 <NA> <NA> NA
## NA.141 <NA> <NA> NA
## NA.142 <NA> <NA> NA
## NA.143 <NA> <NA> NA
## NA.144 <NA> <NA> NA
## NA.145 <NA> <NA> NA
## NA.146 <NA> <NA> NA
## NA.147 <NA> <NA> NA
## NA.148 <NA> <NA> NA
## NA.149 <NA> <NA> NA
## NA.150 <NA> <NA> NA
## NA.151 <NA> <NA> NA
## NA.152 <NA> <NA> NA
## NA.153 <NA> <NA> NA
## NA.154 <NA> <NA> NA
## NA.155 <NA> <NA> NA
## NA.156 <NA> <NA> NA
## NA.157 <NA> <NA> NA
## NA.158 <NA> <NA> NA
## NA.159 <NA> <NA> NA
## NA.160 <NA> <NA> NA
## NA.161 <NA> <NA> NA
## NA.162 <NA> <NA> NA
## NA.163 <NA> <NA> NA
## NA.164 <NA> <NA> NA
## NA.165 <NA> <NA> NA
## NA.166 <NA> <NA> NA
## NA.167 <NA> <NA> NA
## NA.168 <NA> <NA> NA
## NA.169 <NA> <NA> NA
## NA.170 <NA> <NA> NA
## NA.171 <NA> <NA> NA
## NA.172 <NA> <NA> NA
## NA.173 <NA> <NA> NA
## NA.174 <NA> <NA> NA
## NA.175 <NA> <NA> NA
## NA.176 <NA> <NA> NA
## NA.177 <NA> <NA> NA
## NA.178 <NA> <NA> NA
## NA.179 <NA> <NA> NA
## NA.180 <NA> <NA> NA
## NA.181 <NA> <NA> NA
## NA.182 <NA> <NA> NA
## NA.183 <NA> <NA> NA
## NA.184 <NA> <NA> NA
## NA.185 <NA> <NA> NA
## NA.186 <NA> <NA> NA
## NA.187 <NA> <NA> NA
## NA.188 <NA> <NA> NA
## NA.189 <NA> <NA> NA
Los accidentes pueden ser representados en el Gini:
Gini(data_pbi$PBI,conf.level = 0,95)
## [1] NA
Si el Gini es 0, todas las empresas causan la misma cantidad de accidentes; si fuera 1, una sola empresa causa todos los accidentes.
Normalmente al Gini le acompaña la curva de Lorenz:
library(gglorenz)
## Registered S3 methods overwritten by 'ineq':
## method from
## plot.Lc DescTools
## lines.Lc DescTools
base1 + gglorenz::stat_lorenz(color='red') +
geom_abline(linetype = "dashed") + coord_fixed() +
labs(x = "% Países",
y = "% PBI",
title = "Relación país / PBI",
caption = "Fuente: Wikipedia")
## Warning: Removed 190 rows containing non-finite values (stat_lorenz).