#Practica calificada
#PARTE 1
library(htmltab)
Link = "https://en.wikipedia.org/wiki/List_of_countries_by_Human_Development_Index"
Xpath = '//*[@id="mw-content-text"]/div[1]/table[1]'
IDH = htmltab(doc = Link,
which = Xpath)
## Warning: Columns [Rank >> Change from previous year >> Very high human
## development >> High human development >> Medium human development >> Low human
## development] seem to have no data and are removed. Use rm_nodata_cols = F to
## suppress this behavior
rm_nodata_cols = F
str(IDH)
## 'data.frame': 190 obs. of 4 variables:
## $ Rank >> 2018 data (2019 report) >> Very high human development >> High human development >> Medium human development >> Low human development : chr "1" "2" "3" "4" ...
## $ Country or Territory >> Very high human development >> High human development >> Medium human development >> Low human development : chr " Norway" " Switzerland" " Ireland" " Germany" ...
## $ HDI >> 2018 data (2019 report) >> Very high human development >> High human development >> Medium human development >> Low human development : chr "0.954" "0.946" "0.942" "0.939" ...
## $ HDI >> Average annual HDI growth (2010–2018) >> Very high human development >> High human development >> Medium human development >> Low human development: chr "0.16%" "0.18%" "0.71%" "0.25%" ...
NOMBRES
posicionesCambiar=c(2,3)
nuevosNombres=c('pais','idh')
names(IDH)[posicionesCambiar]=nuevosNombres
VARIABLES
IDH[,c(3:4)]=lapply(IDH[,c(3:4)], as.numeric)
## Warning in lapply(IDH[, c(3:4)], as.numeric): NAs introduced by coercion
str(IDH)
## 'data.frame': 190 obs. of 4 variables:
## $ Rank >> 2018 data (2019 report) >> Very high human development >> High human development >> Medium human development >> Low human development : chr "1" "2" "3" "4" ...
## $ pais : chr " Norway" " Switzerland" " Ireland" " Germany" ...
## $ idh : num 0.954 0.946 0.942 0.939 0.939 0.938 0.938 0.937 0.935 0.933 ...
## $ HDI >> Average annual HDI growth (2010–2018) >> Very high human development >> High human development >> Medium human development >> Low human development: num NA NA NA NA NA NA NA NA NA NA ...
PAIS Y IDH
summary(IDH[,c('pais','idh')])
## pais idh
## Length:190 Min. :0.3770
## Class :character 1st Qu.:0.5962
## Mode :character Median :0.7280
## Mean :0.7135
## 3rd Qu.:0.8285
## Max. :0.9540
#PARTE 2
CURTOSIS EN EL HISTOGRAMA
library(ggplot2)
base1=ggplot(IDH,aes(x=idh))
histNum= base1 + geom_histogram(bins=7)
histNum
Respuesta: en el histograma se observa una curtosis leptocurtica
ASIMETRIA EN EL BOXPLOT
base2=ggplot(IDH,aes(y=idh))
box=base2 + geom_boxplot() + coord_flip()
box
Respuesta: en el boxplot se observa una asimetria negativa
DISTANCIA INTERCUARTILICA
library(DescTools)
Skew(IDH$idh,conf.level = 0.05)
## skew lwr.ci upr.ci
## -0.3471514 -0.3560799 -0.3406978
IQR(IDH$idh)
## [1] 0.23225
# cuartil tres
q3=as.numeric(summary(IDH$idh)[5])
# calculando umbral (distancia del q3)
umbral= q3+1.5*IQR(IDH$idh)
umbral
## [1] 1.176875
IDH[IDH$idh>umbral,]
## [1] Rank >> 2018 data (2019 report) >> Very high human development >> High human development >> Medium human development >> Low human development
## [2] pais
## [3] idh
## [4] HDI >> Average annual HDI growth (2010–2018) >> Very high human development >> High human development >> Medium human development >> Low human development
## <0 rows> (or 0-length row.names)
Respuesta: no hay atipicos
IDH$grupo=cut(IDH$idh,3,labels = c("bajo","medio","alto"),
include.lowest = T,ordered_result = T)
iDHORD=reshape::melt(IDH,id.vars='idh')
library(ggplot2)
hist=ggplot(data=iDHORD, aes(x=idh)) + geom_histogram()
hist+facet_wrap(.~variable)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.