library(htmltab)
linkPage='https://www.nationsonline.org/oneworld/corruption.htm'
linkTabla='/html/body/table[3]/tbody'
corruption=htmltab(doc = linkPage,
which = linkTabla)
## Neither <thead> nor <th> information found. Taking first table row for the header. If incorrect, specifiy header argument.
## Warning: Columns [ ] seem to have no data and are removed. Use
## rm_nodata_cols = F to suppress this behavior
names(corruption)
## [1] "Rank" "Country" "2016 Score" "2015 Score" "2014 Score"
## [6] "2013 Score" "2012 Score" "Region"
corruption=corruption[,c(2,3,8)]
names(corruption)
## [1] "Country" "2016 Score" "Region"
names(corruption)[2]='Score2016'
str(corruption)
## 'data.frame': 177 obs. of 3 variables:
## $ Country : chr "Denmark" "New Zealand" "Finland" "Sweden" ...
## $ Score2016: chr "90" "90" "89" "88" ...
## $ Region : chr "Europe" "Asia Pacific" "Europe" "Europe" ...
corruption$Score2016=as.numeric(corruption$Score2016) #SE DEBE CAMBIAR SCORE A NUMÉRICO
## Warning: NAs introduced by coercion
corruption[!complete.cases(corruption$Score2016),] #identificar
## Country Score2016
## 178 To get in-depth information visit:Transparency International NA
## Region
## 178 To get in-depth information visit:Transparency International
corruption=corruption[complete.cases(corruption$Score2016),] #quedandonos con las filas con datos completos
head(corruption) #para ver las 6 primeras filas
## Country Score2016 Region
## 2 Denmark 90 Europe
## 3 New Zealand 90 Asia Pacific
## 4 Finland 89 Europe
## 5 Sweden 88 Europe
## 6 Switzerland 86 Europe
## 7 Norway 85 Europe
# Para hacerlo, se organiza en 10 grupos
corruption$nivel=cut(corruption$Score2016,
breaks = 10, # cuantos grupos
labels = c(1:10),# nombre de los grupos
ordered_result = T) # resultado es ordinal
head(corruption) #ahora aparece nivel, a mayor grupo, menos corrupción
## Country Score2016 Region nivel
## 2 Denmark 90 Europe 10
## 3 New Zealand 90 Asia Pacific 10
## 4 Finland 89 Europe 10
## 5 Sweden 88 Europe 10
## 6 Switzerland 86 Europe 10
## 7 Norway 85 Europe 10
library(questionr)
library(magrittr)
freqCorrup=freq(corruption$nivel,cum = T)%>%data.frame() #aqui se inserta la variable ordinal
freqCorrup=data.frame(nivel=row.names(freqCorrup),freqCorrup,row.names = NULL) #crea un dataframe que se llama freqCrrupcion
freqCorrup
## nivel n X. val. X.cum val.cum
## 1 1 13 7.4 7.4 7.4 7.4
## 2 2 19 10.8 10.8 18.2 18.2
## 3 3 37 21.0 21.0 39.2 39.2
## 4 4 36 20.5 20.5 59.7 59.7
## 5 5 17 9.7 9.7 69.3 69.3
## 6 6 14 8.0 8.0 77.3 77.3
## 7 7 17 9.7 9.7 86.9 86.9
## 8 8 6 3.4 3.4 90.3 90.3
## 9 9 9 5.1 5.1 95.5 95.5
## 10 10 8 4.5 4.5 100.0 100.0
#RENOMBRANDO CAMPOS DE LA TABLA CREADA
names(freqCorrup)[2:6] =c("absoluta","relativa","relativaVAl","relativaCum",
"relativaCumVAL")
freqCorrup
## nivel absoluta relativa relativaVAl relativaCum relativaCumVAL
## 1 1 13 7.4 7.4 7.4 7.4
## 2 2 19 10.8 10.8 18.2 18.2
## 3 3 37 21.0 21.0 39.2 39.2
## 4 4 36 20.5 20.5 59.7 59.7
## 5 5 17 9.7 9.7 69.3 69.3
## 6 6 14 8.0 8.0 77.3 77.3
## 7 7 17 9.7 9.7 86.9 86.9
## 8 8 6 3.4 3.4 90.3 90.3
## 9 9 9 5.1 5.1 95.5 95.5
## 10 10 8 4.5 4.5 100.0 100.0
library(ggplot2) #descargar la libreria
base = ggplot(data=freqCorrup,aes(x=nivel,y=absoluta)) #hacemos un grafico de barras al data frame que acabamos de crear, es decir "freqCorrup"
base1= base + scale_x_discrete(limits=freqCorrup$nivel)
bar1 = base1 + geom_bar(stat='identity')
bar1
bar1 + labs(x="Nivel",
y="Cantidad",
title="¿La mayoria de paises están libres de corrupcion?",
subtitle = "(por arriba de nivel 8)",
caption = "Fuente: Transparency International")
library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
pareto.chart(table(corruption$nivel),cumperc = c(0,50,80,100))
##
## Pareto chart analysis for table(corruption$nivel)
## Frequency Cum.Freq. Percentage Cum.Percent.
## 3 37.000000 37.000000 21.022727 21.022727
## 4 36.000000 73.000000 20.454545 41.477273
## 2 19.000000 92.000000 10.795455 52.272727
## 5 17.000000 109.000000 9.659091 61.931818
## 7 17.000000 126.000000 9.659091 71.590909
## 6 14.000000 140.000000 7.954545 79.545455
## 1 13.000000 153.000000 7.386364 86.931818
## 9 9.000000 162.000000 5.113636 92.045455
## 10 8.000000 170.000000 4.545455 96.590909
## 8 6.000000 176.000000 3.409091 100.000000
# no se usa la tabla de frecuencia
# se usa en tipo numerico a la ordinal
box=ggplot(corruption,aes(y=as.numeric(nivel)))+ geom_boxplot() #volvemos numerico la ordinal, para poder hacerle un boxplot
box
box + scale_y_discrete(limits = freqCorrup$nivel)