SESIÓN 4 - ORDINALES

library(htmltab)

linkPage='https://www.nationsonline.org/oneworld/corruption.htm'
linkTabla='/html/body/table[3]/tbody'
corruption=htmltab(doc = linkPage,
                   which = linkTabla)
## Neither <thead> nor <th> information found. Taking first table row for the header. If incorrect, specifiy header argument.
## Warning: Columns [ ] seem to have no data and are removed. Use
## rm_nodata_cols = F to suppress this behavior
names(corruption)
## [1] "Rank"        "Country"     "2016  Score" "2015  Score" "2014  Score"
## [6] "2013  Score" "2012  Score" "Region"
corruption=corruption[,c(2,3,8)]
names(corruption)
## [1] "Country"     "2016  Score" "Region"
names(corruption)[2]='Score2016'
str(corruption)
## 'data.frame':    177 obs. of  3 variables:
##  $ Country  : chr  "Denmark" "New Zealand" "Finland" "Sweden" ...
##  $ Score2016: chr  "90" "90" "89" "88" ...
##  $ Region   : chr  "Europe" "Asia Pacific" "Europe" "Europe" ...
corruption$Score2016=as.numeric(corruption$Score2016) #SE DEBE CAMBIAR SCORE A NUMÉRICO
## Warning: NAs introduced by coercion
corruption[!complete.cases(corruption$Score2016),] #identificar
##                                                          Country Score2016
## 178 To get in-depth information visit:Transparency International        NA
##                                                           Region
## 178 To get in-depth information visit:Transparency International
corruption=corruption[complete.cases(corruption$Score2016),] #quedandonos con las filas con datos completos
head(corruption) #para ver las 6 primeras filas
##       Country Score2016       Region
## 2     Denmark        90       Europe
## 3 New Zealand        90 Asia Pacific
## 4     Finland        89       Europe
## 5      Sweden        88       Europe
## 6 Switzerland        86       Europe
## 7      Norway        85       Europe
# Para hacerlo, se organiza en 10 grupos
corruption$nivel=cut(corruption$Score2016,
                     breaks = 10, # cuantos grupos
                     labels = c(1:10),# nombre de los grupos
                     ordered_result = T) # resultado es ordinal

head(corruption) #ahora aparece nivel, a mayor grupo, menos corrupción
##       Country Score2016       Region nivel
## 2     Denmark        90       Europe    10
## 3 New Zealand        90 Asia Pacific    10
## 4     Finland        89       Europe    10
## 5      Sweden        88       Europe    10
## 6 Switzerland        86       Europe    10
## 7      Norway        85       Europe    10

EXPLORACIÓN DE VARIABLES ORDINALES

library(questionr)
library(magrittr) 
freqCorrup=freq(corruption$nivel,cum = T)%>%data.frame() #aqui se inserta la variable ordinal 
freqCorrup=data.frame(nivel=row.names(freqCorrup),freqCorrup,row.names = NULL) #crea un dataframe que se llama freqCrrupcion
freqCorrup
##    nivel  n   X. val. X.cum val.cum
## 1      1 13  7.4  7.4   7.4     7.4
## 2      2 19 10.8 10.8  18.2    18.2
## 3      3 37 21.0 21.0  39.2    39.2
## 4      4 36 20.5 20.5  59.7    59.7
## 5      5 17  9.7  9.7  69.3    69.3
## 6      6 14  8.0  8.0  77.3    77.3
## 7      7 17  9.7  9.7  86.9    86.9
## 8      8  6  3.4  3.4  90.3    90.3
## 9      9  9  5.1  5.1  95.5    95.5
## 10    10  8  4.5  4.5 100.0   100.0
#RENOMBRANDO CAMPOS DE LA TABLA CREADA

names(freqCorrup)[2:6] =c("absoluta","relativa","relativaVAl","relativaCum",
                          "relativaCumVAL") 

freqCorrup
##    nivel absoluta relativa relativaVAl relativaCum relativaCumVAL
## 1      1       13      7.4         7.4         7.4            7.4
## 2      2       19     10.8        10.8        18.2           18.2
## 3      3       37     21.0        21.0        39.2           39.2
## 4      4       36     20.5        20.5        59.7           59.7
## 5      5       17      9.7         9.7        69.3           69.3
## 6      6       14      8.0         8.0        77.3           77.3
## 7      7       17      9.7         9.7        86.9           86.9
## 8      8        6      3.4         3.4        90.3           90.3
## 9      9        9      5.1         5.1        95.5           95.5
## 10    10        8      4.5         4.5       100.0          100.0
library(ggplot2) #descargar la libreria
base = ggplot(data=freqCorrup,aes(x=nivel,y=absoluta))  #hacemos un grafico de barras al data frame que acabamos de crear, es decir "freqCorrup"
base1= base + scale_x_discrete(limits=freqCorrup$nivel)
bar1 = base1 + geom_bar(stat='identity') 
bar1

bar1 + labs(x="Nivel", 
            y="Cantidad",
            title="¿La mayoria de paises están libres de corrupcion?", 
            subtitle = "(por arriba de nivel 8)",
            caption = "Fuente: Transparency International")

library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
pareto.chart(table(corruption$nivel),cumperc = c(0,50,80,100))

##     
## Pareto chart analysis for table(corruption$nivel)
##       Frequency  Cum.Freq. Percentage Cum.Percent.
##   3   37.000000  37.000000  21.022727    21.022727
##   4   36.000000  73.000000  20.454545    41.477273
##   2   19.000000  92.000000  10.795455    52.272727
##   5   17.000000 109.000000   9.659091    61.931818
##   7   17.000000 126.000000   9.659091    71.590909
##   6   14.000000 140.000000   7.954545    79.545455
##   1   13.000000 153.000000   7.386364    86.931818
##   9    9.000000 162.000000   5.113636    92.045455
##   10   8.000000 170.000000   4.545455    96.590909
##   8    6.000000 176.000000   3.409091   100.000000
# no se usa la tabla de frecuencia
# se usa en tipo numerico a la ordinal

box=ggplot(corruption,aes(y=as.numeric(nivel)))+ geom_boxplot() #volvemos numerico la ordinal, para poder hacerle un boxplot
box

box + scale_y_discrete(limits = freqCorrup$nivel)