Con este codigo Ud tendrá la base de datos de lapop para el 2017.
library(rio)
link="https://github.com/JoseManuelMagallanes/Estadistica_Para_AnalisisPolitico/raw/master/lapop17.dta"
lapop17= import(link)
Ahora Ud tiene la data, son 188 columnas. El cuestionario está en PDF en este link. Las siguientes preguntas esta basada en una columna. Sólo use la columna que necesite.
names(lapop17)
## [1] "pais" "sobremuestra" "idnum" "uniq_id"
## [5] "upm" "prov" "municipio" "cluster"
## [9] "ur" "tamano" "idiomaq" "fecha"
## [13] "wt" "estratopri" "estratosec" "q2"
## [17] "q1" "perprov" "ls3" "a4"
## [21] "soct2" "idio2" "np1" "sgl1"
## [25] "cp6" "cp7" "cp8" "cp13"
## [29] "cp20" "it1" "l1" "prot3"
## [33] "jc10" "jc13" "jc15a" "vic1ext"
## [37] "vic1exta" "vic2new" "aoj11" "aoj12"
## [41] "b1" "b2" "b3" "b4"
## [45] "b6" "b43" "b12" "b13"
## [49] "b18" "b21" "b21a" "b32"
## [53] "b37" "b47a" "m1" "m2"
## [57] "sd2new2" "sd3new2" "sd6new2" "infrax"
## [61] "infra3" "ros1" "ros4" "ing4"
## [65] "eff1" "eff2" "aoj22new" "media3"
## [69] "media4" "exp_a" "dst1b" "drk1"
## [73] "env1c" "env2b" "envp3l" "envp3l_o"
## [77] "envp3n" "envp3n_o" "envp41" "envp42"
## [81] "pn4" "w14a" "e5" "e15"
## [85] "e16" "d1" "d2" "d3"
## [89] "d4" "d5" "d6" "ivv3"
## [93] "lib1" "lib2b" "lib2c" "lib4"
## [97] "exc2" "exc6" "exc20" "exc11"
## [101] "exc13" "exc14" "exc15" "exc16"
## [105] "exc18" "exc7" "exc7new" "vb1"
## [109] "vb2" "vb3n" "vb10" "vb11"
## [113] "pol1" "vb20" "dis7a" "dis8a"
## [117] "dis9a" "dis10a" "dis11a" "for5"
## [121] "mil10a" "mil10e" "ccq1" "ccq2"
## [125] "ccq3" "ccq4" "ie1" "ie2"
## [129] "ie3" "ie6" "ie9" "ie10"
## [133] "envp8" "wf1" "cct1b" "ed"
## [137] "ed2" "q5a" "q5b" "q3c"
## [141] "ocup4a" "ocup1a" "q10g" "q10new"
## [145] "q14" "q10d" "q10e" "q11n"
## [149] "q12c" "q12bn" "q12" "q12m"
## [153] "q12f" "vac1" "etid" "iiet1"
## [157] "iiet2" "iiet3" "www1" "gi0"
## [161] "pr1" "r3" "r4" "r4a"
## [165] "r5" "r6" "r7" "r8"
## [169] "r12" "r14" "r15" "r18"
## [173] "r1" "r16" "colorr" "conocim"
## [177] "iarea1" "iarea2" "iarea3" "iarea4"
## [181] "iarea6" "iarea7" "sexi" "colori"
## [185] "srvyrid" "nationality" "formatq" "sex"
Pregunta:
¿Cree la mayoría de la gente que los servicios que dan los municipios son malos?
table(lapop17$sgl1)
##
## 1 2 3 4 5
## 46 504 1100 606 320
# nombres SGL1 ordenados
ordenOK=c("MUYBUENO", "BUENO", "REGULAR", "MALO", "MUYMALO")
lapop17$sgl1=factor(lapop17$sgl1, levels = ordenOK, ordered = TRUE)
Función gsub
lapop17$sgl1=gsub(",","","NA", lapop17$sgl1)
table(lapop17$sgl1)
##
## NA
## 2647
LA MAYORÍA CREE QUE EL SERVICIO ES REGULAR ¿Esa creencia es relevante o no? SÍ ES RELEVANTE YA QUE SIMBOLIZAN CASI EL 50%
¿Podemos afirmar que alrededor de 70% de los entrevistados cree que los derechos básicos del ciudadano están a los más medianamente protegidos por el sistema político peruano?
Para esta parte descargue la información de cada país sobre el índice de felicidad
library(htmltab)
linkPage="https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad"
linkTabla="///div/table"
Ifer=htmltab(doc = linkPage, which = linkTabla)
str(Ifer)
## 'data.frame': 156 obs. of 9 variables:
## $ № : chr "1" "2" "3" "4" ...
## $ País : chr "Finlandia" "Noruega" "Dinamarca" "Islandia" ...
## $ Puntuación : chr "7.633" "7.594" "7.555" "7.495" ...
## $ PIB per cápita : chr "1.305" "1.456" "1.351" "1.343" ...
## $ Apoyo social : chr "1.592" "1.582" "1.590" "1.644" ...
## $ Esperanza de años de vida saludable : chr "0.874" "0.861" "0.868" "0.914" ...
## $ Libertad para tomar decisiones vitales: chr "0.681" "0.686" "0.683" "0.677" ...
## $ Generosidad : chr "0.192" "0.286" "0.284" "0.353" ...
## $ Percepción de la corrupción : chr "0.393" "0.340" "0.408" "0.138" ...
LIMPIO DATA Nombres sin espacios
library(stringr)
names(Ifer)= str_split(names(Ifer), " ", simplify = T)[,1]
names(Ifer)
## [1] "№" "País" "Puntuación" "PIB" "Apoyo"
## [6] "Esperanza" "Libertad" "Generosidad" "Percepción"
Nombres sin simbolos “raros”
names(Ifer)=str_replace_all(names(Ifer), "[^[:ascii:]]", "")
names(Ifer)
## [1] "" "Pas" "Puntuacin" "PIB" "Apoyo"
## [6] "Esperanza" "Libertad" "Generosidad" "Percepcin"
Valores del data frame sin simbolos “raros”
Ifer[,]=lapply(Ifer[,], str_replace_all,"[^[:ascii:]]","")
names(Ifer)
## [1] "" "Pas" "Puntuacin" "PIB" "Apoyo"
## [6] "Esperanza" "Libertad" "Generosidad" "Percepcin"
library(readr)
Ifer[,c(3:9)]=lapply(Ifer[,c(3:9)],parse_number)
str(Ifer)
## 'data.frame': 156 obs. of 9 variables:
## $ : chr "1" "2" "3" "4" ...
## $ Pas : chr "Finlandia" "Noruega" "Dinamarca" "Islandia" ...
## $ Puntuacin : num 7.63 7.59 7.55 7.5 7.49 ...
## $ PIB : num 1.3 1.46 1.35 1.34 1.42 ...
## $ Apoyo : num 1.59 1.58 1.59 1.64 1.55 ...
## $ Esperanza : num 0.874 0.861 0.868 0.914 0.927 0.878 0.896 0.876 0.913 0.91 ...
## $ Libertad : num 0.681 0.686 0.683 0.677 0.66 0.638 0.653 0.669 0.659 0.647 ...
## $ Generosidad: num 0.192 0.286 0.284 0.353 0.256 0.333 0.321 0.365 0.285 0.361 ...
## $ Percepcin : num 0.393 0.34 0.408 0.138 0.357 0.295 0.291 0.389 0.383 0.302 ...
Graficamos en el Boxplot:
library(ggplot2)
estadigrafos=round(as.vector(summary(Ifer$Puntuacin)),2)
box + scale_y_continuous(breaks = estadigrafos)
## NULL
Podemos apreciar que sí hay asimetría, para mayor respaldo calculamos el coeficiente
library(DescTools)
Skew(Ifer$Puntuacin,conf.level = 0.05)
## skew lwr.ci upr.ci
## 0.014941126 0.004235002 0.016521766
library(DescTools)
Median(Ifer$Puntuacin)
## [1] 5.378
tablaContinente=prop.table(table(indem$Continent)) Herfindahl(tablaContinente) ¿Es representativo este valor?
tablaInfel=prop.table(table(Ifer$Puntuacin))
Herfindahl(tablaInfel)
## [1] 0.006574622
NOTA: * < 0.01 : indica que la moda no es significativa, las categorias tienen pesos similares. * < 0.15 : indica que la moda no es significativa, varias categorias tienen pesos similares. * entre 0.15 - 0.25: hay una moda. * > 0.25: La moda se diferencia de los demas
El Herfindahl es 0.006574622, por lo tanto podemos afirmar que el valor representativo no es robusto.
¿Se puede afirmar que aproximadamente el 80% de los países del mundo suman el 75% del puntaje acumulado de felicidad?
library(questionr)
library(magrittr)
NoO=freq(Ifer$Puntuacin, cum = T)%>%data.frame()
NoO=data.frame(variable=row.names(NoO),NoO,row.names = NULL)
# viendo a NoO
NoO
## variable n X. val. X.cum val.cum
## 1 2.905 1 0.6 0.6 0.6 0.6
## 2 3.083 1 0.6 0.6 1.3 1.3
## 3 3.254 1 0.6 0.6 1.9 1.9
## 4 3.303 1 0.6 0.6 2.6 2.6
## 5 3.355 1 0.6 0.6 3.2 3.2
## 6 3.408 1 0.6 0.6 3.8 3.8
## 7 3.462 1 0.6 0.6 4.5 4.5
## 8 3.495 1 0.6 0.6 5.1 5.1
## 9 3.582 1 0.6 0.6 5.8 5.8
## 10 3.587 1 0.6 0.6 6.4 6.4
## 11 3.59 1 0.6 0.6 7.1 7.1
## 12 3.632 1 0.6 0.6 7.7 7.7
## 13 3.692 1 0.6 0.6 8.3 8.3
## 14 3.774 1 0.6 0.6 9.0 9.0
## 15 3.795 1 0.6 0.6 9.6 9.6
## 16 3.808 1 0.6 0.6 10.3 10.3
## 17 3.964 1 0.6 0.6 10.9 10.9
## 18 3.999 1 0.6 0.6 11.5 11.5
## 19 4.103 1 0.6 0.6 12.2 12.2
## 20 4.139 1 0.6 0.6 12.8 12.8
## 21 4.141 1 0.6 0.6 13.5 13.5
## 22 4.161 1 0.6 0.6 14.1 14.1
## 23 4.166 1 0.6 0.6 14.7 14.7
## 24 4.19 1 0.6 0.6 15.4 15.4
## 25 4.245 1 0.6 0.6 16.0 16.0
## 26 4.301 1 0.6 0.6 16.7 16.7
## 27 4.308 1 0.6 0.6 17.3 17.3
## 28 4.321 1 0.6 0.6 17.9 17.9
## 29 4.34 1 0.6 0.6 18.6 18.6
## 30 4.35 1 0.6 0.6 19.2 19.2
## 31 4.356 1 0.6 0.6 19.9 19.9
## 32 4.377 1 0.6 0.6 20.5 20.5
## 33 4.41 1 0.6 0.6 21.2 21.2
## 34 4.417 1 0.6 0.6 21.8 21.8
## 35 4.419 1 0.6 0.6 22.4 22.4
## 36 4.424 1 0.6 0.6 23.1 23.1
## 37 4.433 1 0.6 0.6 23.7 23.7
## 38 4.441 1 0.6 0.6 24.4 24.4
## 39 4.447 1 0.6 0.6 25.0 25.0
## 40 4.456 1 0.6 0.6 25.6 25.6
## 41 4.471 1 0.6 0.6 26.3 26.3
## 42 4.5 1 0.6 0.6 26.9 26.9
## 43 4.559 1 0.6 0.6 27.6 27.6
## 44 4.571 1 0.6 0.6 28.2 28.2
## 45 4.586 1 0.6 0.6 28.8 28.8
## 46 4.592 1 0.6 0.6 29.5 29.5
## 47 4.623 1 0.6 0.6 30.1 30.1
## 48 4.631 1 0.6 0.6 30.8 30.8
## 49 4.657 1 0.6 0.6 31.4 31.4
## 50 4.671 1 0.6 0.6 32.1 32.1
## 51 4.707 1 0.6 0.6 32.7 32.7
## 52 4.724 1 0.6 0.6 33.3 33.3
## 53 4.743 1 0.6 0.6 34.0 34.0
## 54 4.758 1 0.6 0.6 34.6 34.6
## 55 4.806 1 0.6 0.6 35.3 35.3
## 56 4.88 1 0.6 0.6 35.9 35.9
## 57 4.933 1 0.6 0.6 36.5 36.5
## 58 4.975 1 0.6 0.6 37.2 37.2
## 59 4.982 1 0.6 0.6 37.8 37.8
## 60 5.082 1 0.6 0.6 38.5 38.5
## 61 5.093 1 0.6 0.6 39.1 39.1
## 62 5.103 1 0.6 0.6 39.7 39.7
## 63 5.125 1 0.6 0.6 40.4 40.4
## 64 5.129 1 0.6 0.6 41.0 41.0
## 65 5.131 1 0.6 0.6 41.7 41.7
## 66 5.155 1 0.6 0.6 42.3 42.3
## 67 5.161 1 0.6 0.6 42.9 42.9
## 68 5.185 1 0.6 0.6 43.6 43.6
## 69 5.199 1 0.6 0.6 44.2 44.2
## 70 5.201 1 0.6 0.6 44.9 44.9
## 71 5.246 1 0.6 0.6 45.5 45.5
## 72 5.254 1 0.6 0.6 46.2 46.2
## 73 5.295 1 0.6 0.6 46.8 46.8
## 74 5.302 1 0.6 0.6 47.4 47.4
## 75 5.321 1 0.6 0.6 48.1 48.1
## 76 5.347 1 0.6 0.6 48.7 48.7
## 77 5.358 2 1.3 1.3 50.0 50.0
## 78 5.398 1 0.6 0.6 50.6 50.6
## 79 5.41 1 0.6 0.6 51.3 51.3
## 80 5.43 1 0.6 0.6 51.9 51.9
## 81 5.472 1 0.6 0.6 52.6 52.6
## 82 5.483 2 1.3 1.3 53.8 53.8
## 83 5.504 1 0.6 0.6 54.5 54.5
## 84 5.524 1 0.6 0.6 55.1 55.1
## 85 5.566 1 0.6 0.6 55.8 55.8
## 86 5.62 1 0.6 0.6 56.4 56.4
## 87 5.636 1 0.6 0.6 57.1 57.1
## 88 5.64 1 0.6 0.6 57.7 57.7
## 89 5.662 1 0.6 0.6 58.3 58.3
## 90 5.663 1 0.6 0.6 59.0 59.0
## 91 5.681 1 0.6 0.6 59.6 59.6
## 92 5.739 1 0.6 0.6 60.3 60.3
## 93 5.752 1 0.6 0.6 60.9 60.9
## 94 5.762 1 0.6 0.6 61.5 61.5
## 95 5.79 1 0.6 0.6 62.2 62.2
## 96 5.81 1 0.6 0.6 62.8 62.8
## 97 5.835 1 0.6 0.6 63.5 63.5
## 98 5.875 1 0.6 0.6 64.1 64.1
## 99 5.89 1 0.6 0.6 64.7 64.7
## 100 5.891 1 0.6 0.6 65.4 65.4
## 101 5.915 1 0.6 0.6 66.0 66.0
## 102 5.933 1 0.6 0.6 66.7 66.7
## 103 5.945 1 0.6 0.6 67.3 67.3
## 104 5.948 1 0.6 0.6 67.9 67.9
## 105 5.952 1 0.6 0.6 68.6 68.6
## 106 5.956 1 0.6 0.6 69.2 69.2
## 107 5.973 1 0.6 0.6 69.9 69.9
## 108 6 1 0.6 0.6 70.5 70.5
## 109 6.072 1 0.6 0.6 71.2 71.2
## 110 6.083 1 0.6 0.6 71.8 71.8
## 111 6.096 1 0.6 0.6 72.4 72.4
## 112 6.105 1 0.6 0.6 73.1 73.1
## 113 6.123 1 0.6 0.6 73.7 73.7
## 114 6.141 1 0.6 0.6 74.4 74.4
## 115 6.167 1 0.6 0.6 75.0 75.0
## 116 6.173 1 0.6 0.6 75.6 75.6
## 117 6.192 1 0.6 0.6 76.3 76.3
## 118 6.26 1 0.6 0.6 76.9 76.9
## 119 6.31 1 0.6 0.6 77.6 77.6
## 120 6.322 1 0.6 0.6 78.2 78.2
## 121 6.343 1 0.6 0.6 78.8 78.8
## 122 6.371 1 0.6 0.6 79.5 79.5
## 123 6.374 1 0.6 0.6 80.1 80.1
## 124 6.379 1 0.6 0.6 80.8 80.8
## 125 6.382 1 0.6 0.6 81.4 81.4
## 126 6.388 1 0.6 0.6 82.1 82.1
## 127 6.419 1 0.6 0.6 82.7 82.7
## 128 6.43 1 0.6 0.6 83.3 83.3
## 129 6.441 1 0.6 0.6 84.0 84.0
## 130 6.476 1 0.6 0.6 84.6 84.6
## 131 6.488 1 0.6 0.6 85.3 85.3
## 132 6.489 1 0.6 0.6 85.9 85.9
## 133 6.627 1 0.6 0.6 86.5 86.5
## 134 6.711 1 0.6 0.6 87.2 87.2
## 135 6.774 1 0.6 0.6 87.8 87.8
## 136 6.814 1 0.6 0.6 88.5 88.5
## 137 6.886 1 0.6 0.6 89.1 89.1
## 138 6.91 1 0.6 0.6 89.7 89.7
## 139 6.927 1 0.6 0.6 90.4 90.4
## 140 6.965 1 0.6 0.6 91.0 91.0
## 141 6.977 1 0.6 0.6 91.7 91.7
## 142 7.072 1 0.6 0.6 92.3 92.3
## 143 7.139 1 0.6 0.6 92.9 92.9
## 144 7.19 1 0.6 0.6 93.6 93.6
## 145 7.272 1 0.6 0.6 94.2 94.2
## 146 7.314 1 0.6 0.6 94.9 94.9
## 147 7.324 1 0.6 0.6 95.5 95.5
## 148 7.328 1 0.6 0.6 96.2 96.2
## 149 7.441 1 0.6 0.6 96.8 96.8
## 150 7.487 1 0.6 0.6 97.4 97.4
## 151 7.495 1 0.6 0.6 98.1 98.1
## 152 7.555 1 0.6 0.6 98.7 98.7
## 153 7.594 1 0.6 0.6 99.4 99.4
## 154 7.633 1 0.6 0.6 100.0 100.0
¿Hay atipicos en el puntaje de felicidad? Los atípicos se ven claramente en un boxplot:
base7=ggplot(Ifer, aes(y=Puntuacin))
box2= base7 + geom_boxplot() + coord_flip()
box2
No se aprecia ningún atípico