#Practica calificada

#PARTE 1

library(htmltab)
Link = "https://en.wikipedia.org/wiki/List_of_countries_by_Human_Development_Index"
Xpath = '//*[@id="mw-content-text"]/div[1]/table[1]'
IDH = htmltab(doc = Link,
                      which = Xpath)
## Warning: Columns [Rank >> Change from previous year​ >> Very high human
## development >> High human development >> Medium human development >> Low human
## development] seem to have no data and are removed. Use rm_nodata_cols = F to
## suppress this behavior
rm_nodata_cols = F
str(IDH)
## 'data.frame':    190 obs. of  4 variables:
##  $ Rank >> 2018 data (2019 report)​ >> Very high human development >> High human development >> Medium human development >> Low human development             : chr  "1" "2" "3" "4" ...
##  $ Country or Territory >> Very high human development >> High human development >> Medium human development >> Low human development                        : chr  " Norway" "  Switzerland" " Ireland" " Germany" ...
##  $ HDI >> 2018 data (2019 report)​ >> Very high human development >> High human development >> Medium human development >> Low human development              : chr  "0.954" "0.946" "0.942" "0.939" ...
##  $ HDI >> Average annual HDI growth (2010–2018)​ >> Very high human development >> High human development >> Medium human development >> Low human development: chr  "0.16%" "0.18%" "0.71%" "0.25%" ...

NOMBRES

posicionesCambiar=c(2,3) 
nuevosNombres=c('pais','idh')
names(IDH)[posicionesCambiar]=nuevosNombres

VARIABLES

IDH[,c(3:4)]=lapply(IDH[,c(3:4)], as.numeric)
## Warning in lapply(IDH[, c(3:4)], as.numeric): NAs introduced by coercion
str(IDH)
## 'data.frame':    190 obs. of  4 variables:
##  $ Rank >> 2018 data (2019 report)​ >> Very high human development >> High human development >> Medium human development >> Low human development             : chr  "1" "2" "3" "4" ...
##  $ pais                                                                                                                                                      : chr  " Norway" "  Switzerland" " Ireland" " Germany" ...
##  $ idh                                                                                                                                                       : num  0.954 0.946 0.942 0.939 0.939 0.938 0.938 0.937 0.935 0.933 ...
##  $ HDI >> Average annual HDI growth (2010–2018)​ >> Very high human development >> High human development >> Medium human development >> Low human development: num  NA NA NA NA NA NA NA NA NA NA ...

PAIS Y IDH

summary(IDH[,c('pais','idh')])
##      pais                idh        
##  Length:190         Min.   :0.3770  
##  Class :character   1st Qu.:0.5962  
##  Mode  :character   Median :0.7280  
##                     Mean   :0.7135  
##                     3rd Qu.:0.8285  
##                     Max.   :0.9540

#PARTE 2

CURTOSIS EN EL HISTOGRAMA

library(ggplot2)
base1=ggplot(IDH,aes(x=idh))
histNum= base1 + geom_histogram(bins=7) 
histNum 

Respuesta: en el histograma se observa una curtosis leptocurtica

ASIMETRIA EN EL BOXPLOT

base2=ggplot(IDH,aes(y=idh))
box=base2 + geom_boxplot() + coord_flip()

box 

Respuesta: en el boxplot se observa una asimetria negativa

DISTANCIA INTERCUARTILICA

library(DescTools)
Skew(IDH$idh,conf.level = 0.05)
##       skew     lwr.ci     upr.ci 
## -0.3471514 -0.3560799 -0.3406978
IQR(IDH$idh)
## [1] 0.23225
# cuartil tres
q3=as.numeric(summary(IDH$idh)[5])

# calculando umbral (distancia del q3)
umbral= q3+1.5*IQR(IDH$idh)
umbral
## [1] 1.176875
IDH[IDH$idh>umbral,]
## [1] Rank >> 2018 data (2019 report)​ >> Very high human development >> High human development >> Medium human development >> Low human development             
## [2] pais                                                                                                                                                      
## [3] idh                                                                                                                                                       
## [4] HDI >> Average annual HDI growth (2010–2018)​ >> Very high human development >> High human development >> Medium human development >> Low human development
## <0 rows> (or 0-length row.names)

Respuesta: no hay atipicos

IDH$grupo=cut(IDH$idh,3,labels = c("bajo","medio","alto"),
              include.lowest = T,ordered_result = T)
iDHORD=reshape::melt(IDH,id.vars='idh')
library(ggplot2)

hist=ggplot(data=iDHORD, aes(x=idh)) + geom_histogram()
hist+facet_wrap(.~variable)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.