LINK="https://docs.google.com/spreadsheets/d/e/2PACX-1vSa8xFQmm0Bhkci8Hnsa1sTJw3JECiV1hUE9UYHjpK8DAtiKqqti99vqPFrOdks2owPnCMJv3NUuNBr/pub?output=csv"
IDH=read.csv(LINK,stringsAsFactors = F)
head(IDH)
## X X.1 X.2
## 1 NA Ã\215ndice de desarrollo humano (IDH)
## 2 NA (Ãndice entre 0 y 1, donde 1 representa más desarrollo humano)
## 3 NA
## 4 NA
## 5 NA
## 6 NA Amazonas
## X.3 X.4 X.5 X.6 X.7
## 1 Fuente: PNUD
## 2 Elaboración: IPE
## 3
## 4
## 5 1993 2000 2007 2012
## 6 0,47 0,52 0,57 0,38
str(IDH)
## 'data.frame': 31 obs. of 8 variables:
## $ X : logi NA NA NA NA NA NA ...
## $ X.1: chr "Ã\215ndice de desarrollo humano (IDH)" "(Ãndice entre 0 y 1, donde 1 representa más desarrollo humano)" "" "" ...
## $ X.2: chr "" "" "" "" ...
## $ X.3: chr "" "" "" "" ...
## $ X.4: chr "" "" "" "" ...
## $ X.5: chr "" "" "" "" ...
## $ X.6: chr "" "" "" "" ...
## $ X.7: chr "Fuente: PNUD" "Elaboración: IPE" "" "" ...
IDH[,c(3:6)]=lapply(IDH[,c(3:6)], as.factor)
summary(IDH)
## X X.1 X.2 X.3 X.4
## Mode:logical Length:31 : 6 : 5 : 5
## NA's:31 Class :character Amazonas : 1 0,45 : 3 0,58 : 3
## Mode :character Ancash : 1 0,53 : 3 0,62 : 3
## ApurÃmac: 1 0,52 : 2 0,46 : 2
## Arequipa : 1 0,54 : 2 0,49 : 2
## Ayacucho : 1 0,59 : 2 0,55 : 2
## (Other) :20 (Other):14 (Other):14
## X.5 X.6 X.7
## :5 Length:31 Length:31
## 0,65 :5 Class :character Class :character
## 0,56 :4 Mode :character Mode :character
## 0,60 :4
## 0,59 :3
## 0,62 :3
## (Other):7
table(IDH$X.2)
##
## Amazonas Ancash ApurÃmac Arequipa
## 6 1 1 1 1
## Ayacucho Cajamarca Cusco Huánuco Huancavelica
## 1 1 1 1 1
## Ica JunÃn La Libertad Lambayeque Lima
## 1 1 1 1 1
## Loreto Madre de Dios Moquegua Pasco Perú
## 1 1 1 1 1
## Piura Puno San Martin Tacna Tumbes
## 1 1 1 1 1
## Ucayali
## 1
table(IDH$X.3)
##
## 0,37 0,40 0,42 0,45 0,47 0,48 0,52 0,53 0,54
## 5 1 1 1 3 1 1 2 3 2
## 0,58 0,59 0,60 0,61 0,62 0,64 0,65 0,71 0,75 1993
## 1 2 1 1 1 1 1 1 1 1
table(IDH$X.4)
##
## 0,46 0,49 0,50 0,51 0,52 0,54 0,55 0,56 0,57
## 5 2 2 1 1 1 1 2 1 1
## 0,58 0,61 0,62 0,63 0,64 0,67 0,68 0,75 2000
## 3 1 3 1 1 2 1 1 1
table(IDH$X.5)
##
## 0,54 0,56 0,57 0,58 0,59 0,60 0,62 0,63 0,65
## 5 1 4 2 1 3 4 3 1 5
## 0,68 2007
## 1 1
table(IDH$X.6)
##
## 0,30 0,33 0,34 0,37 0,38 0,39 0,40 0,41 0,43 0,44 0,45 0,46 0,47 0,51 0,52
## 5 1 1 1 1 2 1 1 1 2 3 1 1 1 1 1
## 0,54 0,56 0,58 0,62 0,63 2012
## 1 2 1 1 1 1
library(questionr)
## Warning: package 'questionr' was built under R version 4.0.2
freqOrd=freq(IDH$X.2,cum = T)
freqOrd
## n % val% %cum val%cum
## 6 19.4 19.4 19.4 19.4
## Amazonas 1 3.2 3.2 22.6 22.6
## Ancash 1 3.2 3.2 25.8 25.8
## ApurÃmac 1 3.2 3.2 29.0 29.0
## Arequipa 1 3.2 3.2 32.3 32.3
## Ayacucho 1 3.2 3.2 35.5 35.5
## Cajamarca 1 3.2 3.2 38.7 38.7
## Cusco 1 3.2 3.2 41.9 41.9
## Huánuco 1 3.2 3.2 45.2 45.2
## Huancavelica 1 3.2 3.2 48.4 48.4
## Ica 1 3.2 3.2 51.6 51.6
## JunÃn 1 3.2 3.2 54.8 54.8
## La Libertad 1 3.2 3.2 58.1 58.1
## Lambayeque 1 3.2 3.2 61.3 61.3
## Lima 1 3.2 3.2 64.5 64.5
## Loreto 1 3.2 3.2 67.7 67.7
## Madre de Dios 1 3.2 3.2 71.0 71.0
## Moquegua 1 3.2 3.2 74.2 74.2
## Pasco 1 3.2 3.2 77.4 77.4
## Perú 1 3.2 3.2 80.6 80.6
## Piura 1 3.2 3.2 83.9 83.9
## Puno 1 3.2 3.2 87.1 87.1
## San Martin 1 3.2 3.2 90.3 90.3
## Tacna 1 3.2 3.2 93.5 93.5
## Tumbes 1 3.2 3.2 96.8 96.8
## Ucayali 1 3.2 3.2 100.0 100.0
library(questionr)
freqOrd=freq(IDH$X.3,cum = T)
freqOrd
## n % val% %cum val%cum
## 5 16.1 16.1 16.1 16.1
## 0,37 1 3.2 3.2 19.4 19.4
## 0,40 1 3.2 3.2 22.6 22.6
## 0,42 1 3.2 3.2 25.8 25.8
## 0,45 3 9.7 9.7 35.5 35.5
## 0,47 1 3.2 3.2 38.7 38.7
## 0,48 1 3.2 3.2 41.9 41.9
## 0,52 2 6.5 6.5 48.4 48.4
## 0,53 3 9.7 9.7 58.1 58.1
## 0,54 2 6.5 6.5 64.5 64.5
## 0,58 1 3.2 3.2 67.7 67.7
## 0,59 2 6.5 6.5 74.2 74.2
## 0,60 1 3.2 3.2 77.4 77.4
## 0,61 1 3.2 3.2 80.6 80.6
## 0,62 1 3.2 3.2 83.9 83.9
## 0,64 1 3.2 3.2 87.1 87.1
## 0,65 1 3.2 3.2 90.3 90.3
## 0,71 1 3.2 3.2 93.5 93.5
## 0,75 1 3.2 3.2 96.8 96.8
## 1993 1 3.2 3.2 100.0 100.0
freqOrd=data.frame(X.2=row.names(freqOrd),
freqOrd,
row.names = NULL)
freqOrd
## X.2 n X. val. X.cum val.cum
## 1 5 16.1 16.1 16.1 16.1
## 2 0,37 1 3.2 3.2 19.4 19.4
## 3 0,40 1 3.2 3.2 22.6 22.6
## 4 0,42 1 3.2 3.2 25.8 25.8
## 5 0,45 3 9.7 9.7 35.5 35.5
## 6 0,47 1 3.2 3.2 38.7 38.7
## 7 0,48 1 3.2 3.2 41.9 41.9
## 8 0,52 2 6.5 6.5 48.4 48.4
## 9 0,53 3 9.7 9.7 58.1 58.1
## 10 0,54 2 6.5 6.5 64.5 64.5
## 11 0,58 1 3.2 3.2 67.7 67.7
## 12 0,59 2 6.5 6.5 74.2 74.2
## 13 0,60 1 3.2 3.2 77.4 77.4
## 14 0,61 1 3.2 3.2 80.6 80.6
## 15 0,62 1 3.2 3.2 83.9 83.9
## 16 0,64 1 3.2 3.2 87.1 87.1
## 17 0,65 1 3.2 3.2 90.3 90.3
## 18 0,71 1 3.2 3.2 93.5 93.5
## 19 0,75 1 3.2 3.2 96.8 96.8
## 20 1993 1 3.2 3.2 100.0 100.0
freqOrd=data.frame(X.3=row.names(freqOrd),
freqOrd,
row.names = NULL)
freqOrd
## X.3 X.2 n X. val. X.cum val.cum
## 1 1 5 16.1 16.1 16.1 16.1
## 2 2 0,37 1 3.2 3.2 19.4 19.4
## 3 3 0,40 1 3.2 3.2 22.6 22.6
## 4 4 0,42 1 3.2 3.2 25.8 25.8
## 5 5 0,45 3 9.7 9.7 35.5 35.5
## 6 6 0,47 1 3.2 3.2 38.7 38.7
## 7 7 0,48 1 3.2 3.2 41.9 41.9
## 8 8 0,52 2 6.5 6.5 48.4 48.4
## 9 9 0,53 3 9.7 9.7 58.1 58.1
## 10 10 0,54 2 6.5 6.5 64.5 64.5
## 11 11 0,58 1 3.2 3.2 67.7 67.7
## 12 12 0,59 2 6.5 6.5 74.2 74.2
## 13 13 0,60 1 3.2 3.2 77.4 77.4
## 14 14 0,61 1 3.2 3.2 80.6 80.6
## 15 15 0,62 1 3.2 3.2 83.9 83.9
## 16 16 0,64 1 3.2 3.2 87.1 87.1
## 17 17 0,65 1 3.2 3.2 90.3 90.3
## 18 18 0,71 1 3.2 3.2 93.5 93.5
## 19 19 0,75 1 3.2 3.2 96.8 96.8
## 20 20 1993 1 3.2 3.2 100.0 100.0
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
base = ggplot(data=freqOrd,aes(x=X.2,y=n))
base1= base + scale_x_discrete(limits=freqOrd$X.2)
bar1 = base1 + geom_bar(stat='identity')
bar1

bar1 + labs(x="Departamentos",
y="Cantidad",
title="IDH Perú",
subtitle = "1993-2012",
caption = "Fuente: IEP")

library(qcc)
## Warning: package 'qcc' was built under R version 4.0.2
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
pareto.chart(table(IDH$X.2),cumperc = c(0,50,80,100))

##
## Pareto chart analysis for table(IDH$X.2)
## Frequency Cum.Freq. Percentage Cum.Percent.
## 6.000000 6.000000 19.354839 19.354839
## Amazonas 1.000000 7.000000 3.225806 22.580645
## Ancash 1.000000 8.000000 3.225806 25.806452
## ApurÃmac 1.000000 9.000000 3.225806 29.032258
## Arequipa 1.000000 10.000000 3.225806 32.258065
## Ayacucho 1.000000 11.000000 3.225806 35.483871
## Cajamarca 1.000000 12.000000 3.225806 38.709677
## Cusco 1.000000 13.000000 3.225806 41.935484
## Huánuco 1.000000 14.000000 3.225806 45.161290
## Huancavelica 1.000000 15.000000 3.225806 48.387097
## Ica 1.000000 16.000000 3.225806 51.612903
## JunÃn 1.000000 17.000000 3.225806 54.838710
## La Libertad 1.000000 18.000000 3.225806 58.064516
## Lambayeque 1.000000 19.000000 3.225806 61.290323
## Lima 1.000000 20.000000 3.225806 64.516129
## Loreto 1.000000 21.000000 3.225806 67.741935
## Madre de Dios 1.000000 22.000000 3.225806 70.967742
## Moquegua 1.000000 23.000000 3.225806 74.193548
## Pasco 1.000000 24.000000 3.225806 77.419355
## Perú 1.000000 25.000000 3.225806 80.645161
## Piura 1.000000 26.000000 3.225806 83.870968
## Puno 1.000000 27.000000 3.225806 87.096774
## San Martin 1.000000 28.000000 3.225806 90.322581
## Tacna 1.000000 29.000000 3.225806 93.548387
## Tumbes 1.000000 30.000000 3.225806 96.774194
## Ucayali 1.000000 31.000000 3.225806 100.000000
bar=ggplot(IDH,aes(y=as.numeric(X.2)))+ geom_boxplot()
bar

bar + scale_y_discrete(limits = freqOrd$X.2)

library(DescTools)
## Warning: package 'DescTools' was built under R version 4.0.2
Mode(IDH$X.2)
## [1]
## attr(,"freq")
## [1] 6
## 26 Levels: Amazonas Ancash ApurÃmac Arequipa Ayacucho Cajamarca ... Ucayali
dataTable=table(IDH$X.2)
Herfindahl(dataTable)
## [1] 0.06347555
1/sum(prop.table(dataTable)**2)
## [1] 15.7541
Median(IDH$X.2)
## [1] NA
IQR(IDH$X.2)
## [1] 15
mad(as.numeric(IDH$X.2))
## [1] 11.8608