library(openxlsx)
library(rmarkdown)
library(tidyverse)
library(haven)
library(foreign)
library(survey)
library(knitr)Una ruta hacia una carpeta donde almacenaremos los excel que se elaborarán posteriormente, que contarán con dataset de información construida.
ruta <- "C:/Users/Trabajo/Desktop/RDATA" choose.files()## character(0)
sumaria2022 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SUMARIA - 759-Modulo34\\Sumaria-2022.sav")
empleo2022 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EMPLEO - 759-Modulo05\\Enaho01a-2022-500.sav")
educacion2022 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EDUACIÓN - 759-Modulo03\\Enaho01A-2022-300.sav")
salud2022 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SALUD - 759-Modulo04\\Enaho01A-2022-400.sav")
educacion2022 <- subset(educacion2022, select = c(CODPERSO,CONGLOME,VIVIENDA,HOGAR,P300A))
salud2022 <- subset(salud2022, select = c(CODPERSO,CONGLOME,VIVIENDA,HOGAR,P401H1,P401H2,P401H3,P401H4,P401H5,P401H6))
sumaria2022 <- subset(sumaria2022, select = c(CONGLOME,VIVIENDA,HOGAR,POBREZA))
enaho0 <- left_join(empleo2022, educacion2022, by=c("CODPERSO","CONGLOME", "VIVIENDA", "HOGAR"))
enaho1 <- left_join(enaho0, salud2022, by=c("CODPERSO","CONGLOME", "VIVIENDA", "HOGAR"))
enaho <- left_join(enaho1,sumaria2022, by =c("CONGLOME","VIVIENDA","HOGAR"))enaho <- enaho %>% mutate(Sexo = ifelse(P207==1,"hombre",ifelse(P207==2,"Mujer",NA)))enaho$ubigeonum <- as.numeric(enaho$UBIGEO)
enaho <- enaho %>%
mutate(regiones2 =
ifelse(ubigeonum >= 010101 & ubigeonum <= 010707, "Amazonas",
ifelse(ubigeonum >= 020101 & ubigeonum <= 022008, "Ancash",
ifelse(ubigeonum >= 030101 & ubigeonum <= 030714, "Apurimac",
ifelse(ubigeonum >= 040101 & ubigeonum <= 040811, "Arequipa",
ifelse(ubigeonum >= 050101 & ubigeonum <= 051108, "Ayacucho",
ifelse(ubigeonum >= 060101 & ubigeonum <= 061311, "Cajamarca",
ifelse(ubigeonum >= 070101 & ubigeonum <= 070107, "Callao",
ifelse(ubigeonum >= 080101 & ubigeonum <= 081307, "Cusco",
ifelse(ubigeonum >= 090101 & ubigeonum <= 090723, "Huancavelica",
ifelse(ubigeonum >= 100101 & ubigeonum <= 101108, "Huanuco",
ifelse(ubigeonum >= 110101 & ubigeonum <= 110508, "Ica",
ifelse(ubigeonum >= 120101 & ubigeonum <= 120909, "Junin",
ifelse(ubigeonum >= 130101 & ubigeonum <= 131203, "La Libertad",
ifelse(ubigeonum >= 140101 & ubigeonum <= 140312, "Lambayeque",
ifelse(ubigeonum >= 150101 & ubigeonum <= 150143, "Lima Metropolitana",
ifelse(ubigeonum >= 150201 & ubigeonum <= 151033, "Lima Region",
ifelse(ubigeonum >= 160101 & ubigeonum <= 160804, "Loreto",
ifelse(ubigeonum >= 170101 & ubigeonum <= 170303, "Madre de Dios",
ifelse(ubigeonum >= 180101 & ubigeonum <= 180303, "Moquegua",
ifelse(ubigeonum >= 190101 & ubigeonum <= 190308, "Pasco",
ifelse(ubigeonum >= 200101 & ubigeonum <= 200806, "Piura",
ifelse(ubigeonum >= 210101 & ubigeonum <= 211307, "Puno",
ifelse(ubigeonum >= 220101 & ubigeonum <= 221005, "San Martín",
ifelse(ubigeonum >= 230101 & ubigeonum <= 230408, "Tacna",
ifelse(ubigeonum >= 240101 & ubigeonum <= 240304, "Tumbes",
ifelse(ubigeonum >= 250101 & ubigeonum <= 250401,"Ucayali",NA)))))))))))))))))))))))))))
table(enaho$regiones2, useNA = "alw")##
## Amazonas Ancash Apurimac Arequipa
## 3126 3578 2313 3942
## Ayacucho Cajamarca Callao Cusco
## 2593 3527 2804 3069
## Huancavelica Huanuco Ica Junin
## 2584 3043 3812 3660
## La Libertad Lambayeque Lima Metropolitana Lima Region
## 4065 3991 8548 3678
## Loreto Madre de Dios Moquegua Pasco
## 4108 1437 2291 2131
## Piura Puno San Martín Tacna
## 4447 2667 3569 3221
## Tumbes Ucayali <NA>
## 2283 3174 0
enaho <- enaho %>%
mutate(regnat = ifelse(DOMINIO>=1 & DOMINIO<=3 | DOMINIO==8,"Costa",
ifelse(DOMINIO>=4 & DOMINIO<=6,"Sierra",
ifelse(DOMINIO==7,"Selva",NA))))
table(enaho$regnat, useNA = "alw")##
## Costa Selva Sierra <NA>
## 38829 18847 29985 0
enaho <- enaho %>%
mutate(area = ifelse((DOMINIO==8 |
(DOMINIO>=1 & DOMINIO<=7) &
(ESTRATO>=1 & ESTRATO<=5)), "Urbano",
ifelse(((DOMINIO>=1 & DOMINIO<=7) &
(ESTRATO>=6 & ESTRATO<=8)), "Rural", NA)))
table(enaho$area, useNA = "alw")##
## Rural Urbano <NA>
## 28845 58816 0
enaho <- enaho %>%
mutate(pobreza3 = ifelse(POBREZA==1, "Pobre extremo",
ifelse(POBREZA==2, "Pobre no extremo",
ifelse(POBREZA==3, "No pobre", NA
))))
table(enaho$pobreza3, useNA = "alw")##
## No pobre Pobre extremo Pobre no extremo <NA>
## 67956 4159 15546 0
enaho <- enaho %>%
mutate(lengua = ifelse(P300A==4, "Castellano",
ifelse(P300A==1 | P300A==2 | P300A==3, "Originaria", NA)))
enaho$lengua <- as.factor(enaho$lengua)
table(enaho$lengua, useNA = "alw")##
## Castellano Originaria <NA>
## 68370 17789 1502
enaho <- enaho %>%
mutate(discapacidad =ifelse(P401H1==1|P401H2==1|P401H3==1|
P401H4==1|P401H5==1|P401H6==1,1,0))
table(enaho$discapacidad, useNA = "alw")##
## 0 1 <NA>
## 82328 5266 67
enaho <- enaho %>%
mutate(defiet2 = case_when(
P558C == 1 ~ "Quechua",
P558C == 2 ~ "Aimara",
P558C == 3 ~ "Nativo o indigena de la Amazonia",
P558C == 4 ~ "Negro/Moreno/Zambo/Mulato/Pueblo Afro peruano o Afrodescendiente",
P558C == 5 ~ "Blanco",
P558C == 6 ~ "Mestizo",
P558C == 7 ~ "otro",
P558C == 8 ~ "No sabe/No responde",
P558C == 9 ~ "Nativo o indigena de la Amazonia",
TRUE ~ NA_character_
))
enaho$defiet2 <- as.factor(enaho$defiet2)
table(enaho$defiet2, useNA = "alw")##
## Aimara
## 2883
## Blanco
## 3701
## Mestizo
## 44600
## Nativo o indigena de la Amazonia
## 2000
## Negro/Moreno/Zambo/Mulato/Pueblo Afro peruano o Afrodescendiente
## 6047
## No sabe/No responde
## 3517
## otro
## 3626
## Quechua
## 21160
## <NA>
## 127
enaho <- enaho %>%
mutate(res = ifelse((P204==1 & P205==2)|(P204==2 & P206==1),"Residente","No residente"))
enaho$res <- as.factor(enaho$res)
table(enaho$res, useNA = "alw")##
## No residente Residente <NA>
## 1390 86271 0
enaho <- enaho %>%
mutate(Juventud = ifelse(P208A >= 15 & P208A <= 29,"Joven",NA))
enaho$Juventud <- as.factor(enaho$Juventud)
enaho <- enaho %>%
mutate(POBTOTAL = ifelse(P208A >= 15,"POB TOTAL","NA"))
enaho$POBTOTAL <- as.factor(enaho$POBTOTAL)enaho <- enaho %>%
mutate(ingtrabw = rowSums(select(enaho, I524A1, D529T, I530A, D536, I538A1, D540T, I541A, D543, D544T), na.rm = TRUE))
enaho <- enaho %>%
mutate(ingtra_n = ingtrabw / 12)enaho_filtrado <- enaho %>% filter(OCU500 == 1 & ingtra_n > 0)
enaho_filtrado2 <- enaho_filtrado %>% filter(res=="Residente")# Diseño muestral para la ponderación de valores
encuesta = svydesign(data=enaho_filtrado2, id=~CONGLOME, strata=~ESTRATO,
weights=~FAC500A)
# Función para generar un archivo excel con todas las desagregaciones en pestañas
generar_archivo_excel2 <- function(nombre_archivo, datos) {
workbook <- createWorkbook()
for (i in seq_along(datos)) {
addWorksheet(workbook, sheetName = paste("Datos", i-1, sep = ""))
writeData(workbook, sheet = paste("Datos", i-1, sep = ""), x = datos[[i]], colNames = TRUE)
}
saveWorkbook(workbook, nombre_archivo)
}tabla <- svyby(~ingtra_n, ~Juventud, encuesta, svymean, deff=F, na.rm=T)
ic <- confint(tabla)
cv<-cv(tabla)
tabla <- tabla %>% select(!contains("se"))
datos<-bind_cols(tabla, cv, ic)## New names:
## • `` -> `...3`
names(datos) <- c("Población Joven","%","Coef. Var","Int.Inf","Int.Sup")
kable(datos, format = "markdown")| Población Joven | % | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|
| Joven | Joven | 1235.955 | 0.0119544 | 1206.996 | 1264.914 |
tabla1 <- svyby(~ingtra_n, ~Juventud+P207, encuesta, svymean, deff=F, na.rm=T)
ic1 <- confint(tabla1)
cv1<-cv(tabla1)
cv1<- as.data.frame(cv1)
tabla1 <- tabla1 %>% select(!contains("se"))
datos1<-bind_cols(tabla1, cv1, ic1)
names(datos1) <- c("Población Joven","Sexo","Promedio Mensual","Coef. Var","Int.Inf","Int.Sup")
kable(datos1, format = "markdown")| Población Joven | Sexo | Promedio Mensual | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|---|
| Joven.1 | Joven | 1 | 1352.676 | 0.0143467 | 1314.640 | 1390.711 |
| Joven.2 | Joven | 2 | 1062.029 | 0.0197609 | 1020.896 | 1103.162 |
tabla2 <- svyby(~ingtra_n, ~Juventud+area, encuesta, svymean, deff=F, na.rm=T)
ic2 <- confint(tabla2)
cv2<-cv(tabla2)
cv2<- as.data.frame(cv2)
tabla2 <- tabla2 %>% select(!contains("se"))
datos2<-bind_cols(tabla2, cv2, ic2)
names(datos2) <- c("Población Joven","Área","Promedio Mensual","Coef. Var","Int.Inf","Int.Sup")
kable(datos2, format = "markdown")| Población Joven | Área | Promedio Mensual | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|---|
| Joven.Rural | Joven | Rural | 883.5046 | 0.0273786 | 836.0948 | 930.9145 |
| Joven.Urbano | Joven | Urbano | 1309.8735 | 0.0130009 | 1276.4962 | 1343.2509 |
tabla3 <- svyby(~ingtra_n, ~Juventud+regnat, encuesta, svymean, deff=F, na.rm=T)
ic3 <- confint(tabla3)
cv3<-cv(tabla3)
cv3<- as.data.frame(cv3)
tabla3 <- tabla3 %>% select(!contains("se"))
datos3<-bind_cols(tabla3, cv3, ic3)
names(datos3) <- c("Población Joven","Área","Promedio Mensual","Coef. Var","Int.Inf","Int.Sup")
kable(datos3, format = "markdown")| Población Joven | Área | Promedio Mensual | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|---|
| Joven.Costa | Joven | Costa | 1373.566 | 0.0156252 | 1331.5012 | 1415.631 |
| Joven.Selva | Joven | Selva | 1037.878 | 0.0263198 | 984.3378 | 1091.417 |
| Joven.Sierra | Joven | Sierra | 1028.463 | 0.0212095 | 985.7102 | 1071.216 |
tabla4 <- svyby(~ingtra_n, ~Juventud+regiones2, encuesta, svymean, deff=F, na.rm=T)
ic4 <- confint(tabla4)
cv4<-cv(tabla4)
cv4<- as.data.frame(cv4)
tabla4 <- tabla4 %>% select(!contains("se"))
datos4<-bind_cols(tabla4, cv4, ic4)
names(datos4) <- c("Población Joven","Departamentos","Promedio Mensual","Coef. Var","Int.Inf","Int.Sup")
kable(datos4, format = "markdown")| Población Joven | Departamentos | Promedio Mensual | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|---|
| Joven.Amazonas | Joven | Amazonas | 914.4555 | 0.0579629 | 810.5685 | 1018.3425 |
| Joven.Ancash | Joven | Ancash | 1019.2420 | 0.0556999 | 907.9716 | 1130.5124 |
| Joven.Apurimac | Joven | Apurimac | 1179.4922 | 0.0889893 | 973.7701 | 1385.2144 |
| Joven.Arequipa | Joven | Arequipa | 1345.2996 | 0.0371847 | 1247.2532 | 1443.3461 |
| Joven.Ayacucho | Joven | Ayacucho | 852.4935 | 0.0486932 | 771.1341 | 933.8529 |
| Joven.Cajamarca | Joven | Cajamarca | 865.0101 | 0.0698523 | 746.5833 | 983.4369 |
| Joven.Callao | Joven | Callao | 1426.4816 | 0.0494875 | 1288.1219 | 1564.8413 |
| Joven.Cusco | Joven | Cusco | 1020.6092 | 0.0600792 | 900.4295 | 1140.7890 |
| Joven.Huancavelica | Joven | Huancavelica | 921.5418 | 0.0719645 | 791.5603 | 1051.5234 |
| Joven.Huanuco | Joven | Huanuco | 858.8851 | 0.0845274 | 716.5931 | 1001.1772 |
| Joven.Ica | Joven | Ica | 1550.4441 | 0.0290218 | 1462.2523 | 1638.6360 |
| Joven.Junin | Joven | Junin | 1151.9122 | 0.0421123 | 1056.8350 | 1246.9894 |
| Joven.La Libertad | Joven | La Libertad | 1317.5007 | 0.0493638 | 1190.0309 | 1444.9704 |
| Joven.Lambayeque | Joven | Lambayeque | 1175.1252 | 0.0364441 | 1091.1870 | 1259.0634 |
| Joven.Lima Metropolitana | Joven | Lima Metropolitana | 1439.6380 | 0.0250200 | 1369.0408 | 1510.2353 |
| Joven.Lima Region | Joven | Lima Region | 1169.0371 | 0.0427607 | 1071.0608 | 1267.0134 |
| Joven.Loreto | Joven | Loreto | 1099.4599 | 0.0678588 | 953.2308 | 1245.6890 |
| Joven.Madre de Dios | Joven | Madre de Dios | 1581.3324 | 0.0583516 | 1400.4802 | 1762.1846 |
| Joven.Moquegua | Joven | Moquegua | 1379.4675 | 0.0651938 | 1203.2026 | 1555.7325 |
| Joven.Pasco | Joven | Pasco | 1148.0089 | 0.0665543 | 998.2580 | 1297.7599 |
| Joven.Piura | Joven | Piura | 1266.6127 | 0.0399346 | 1167.4745 | 1365.7509 |
| Joven.Puno | Joven | Puno | 938.6172 | 0.0639429 | 820.9842 | 1056.2501 |
| Joven.San Martín | Joven | San Martín | 946.7961 | 0.0408386 | 871.0125 | 1022.5798 |
| Joven.Tacna | Joven | Tacna | 1161.1668 | 0.0546692 | 1036.7482 | 1285.5854 |
| Joven.Tumbes | Joven | Tumbes | 1177.2222 | 0.0541385 | 1052.3077 | 1302.1366 |
| Joven.Ucayali | Joven | Ucayali | 1118.5491 | 0.0345682 | 1042.7646 | 1194.3336 |
tabla5 <- svyby(~ingtra_n, ~Juventud+pobreza3, encuesta, svymean, deff=F, na.rm=T)
ic5 <- confint(tabla5)
cv5<-cv(tabla5)
cv5<- as.data.frame(cv5)
tabla5 <- tabla5 %>% select(!contains("se"))
datos5<-bind_cols(tabla5, cv5, ic5)
names(datos5) <- c("Población Joven","Condición de pobreza","Promedio Mensual","Coef. Var","Int.Inf","Int.Sup")
kable(datos5, format = "markdown")| Población Joven | Condición de pobreza | Promedio Mensual | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|---|
| Joven.No pobre | Joven | No pobre | 1337.0935 | 0.0131203 | 1302.7098 | 1371.4771 |
| Joven.Pobre extremo | Joven | Pobre extremo | 623.8269 | 0.0661097 | 542.9961 | 704.6578 |
| Joven.Pobre no extremo | Joven | Pobre no extremo | 917.7585 | 0.0241534 | 874.3119 | 961.2050 |
tabla6 <- svyby(~ingtra_n, ~Juventud+discapacidad, encuesta, svymean, deff=F, na.rm=T)
ic6 <- confint(tabla6)
cv6<-cv(tabla6)
cv6<- as.data.frame(cv6)
tabla6 <- tabla6 %>% select(!contains("se"))
datos6<-bind_cols(tabla6, cv6, ic6)
names(datos6) <- c("Población Joven","Discapacidad","Promedio Mensual","Coef. Var","Int.Inf","Int.Sup")
kable(datos6, format = "markdown")| Población Joven | Discapacidad | Promedio Mensual | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|---|
| Joven.0 | Joven | 0 | 1241.0316 | 0.0120007 | 1211.8415 | 1270.2218 |
| Joven.1 | Joven | 1 | 746.3196 | 0.0990564 | 601.4239 | 891.2154 |
tabla7 <- svyby(~ingtra_n, ~Juventud+defiet2, encuesta, svymean, deff=F, na.rm=T)
ic7 <- confint(tabla7)
cv7<-cv(tabla7)
cv7<- as.data.frame(cv7)
tabla7 <- tabla7 %>% select(!contains("se"))
datos7<-bind_cols(tabla7, cv7, ic7)
names(datos7) <- c("Población Joven","Etnicidad","Promedio Mensual","Coef. Var","Int.Inf","Int.Sup")
kable(datos7, format = "markdown")| Población Joven | Etnicidad | Promedio Mensual | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|---|
| Joven.Aimara | Joven | Aimara | 1062.2373 | 0.0631598 | 930.7419 | 1193.733 |
| Joven.Blanco | Joven | Blanco | 1326.0614 | 0.0723060 | 1138.1356 | 1513.987 |
| Joven.Mestizo | Joven | Mestizo | 1301.0205 | 0.0140420 | 1265.2141 | 1336.827 |
| Joven.Nativo o indigena de la Amazonia | Joven | Nativo o indigena de la Amazonia | 920.0449 | 0.0747934 | 785.1733 | 1054.916 |
| Joven.Negro/Moreno/Zambo/Mulato/Pueblo Afro peruano o Afrodescendiente | Joven | Negro/Moreno/Zambo/Mulato/Pueblo Afro peruano o Afrodescendiente | 1149.8836 | 0.0368795 | 1066.7672 | 1233.000 |
| Joven.No sabe/No responde | Joven | No sabe/No responde | 1216.8124 | 0.0468467 | 1105.0874 | 1328.537 |
| Joven.otro | Joven | otro | 1285.6051 | 0.0528326 | 1152.4806 | 1418.730 |
| Joven.Quechua | Joven | Quechua | 1102.8780 | 0.0340571 | 1029.2601 | 1176.496 |
tabla8 <- svyby(~ingtra_n, ~Juventud+lengua, encuesta, svymean, deff=F, na.rm=T)
ic8 <- confint(tabla8)
cv8<-cv(tabla8)
cv8<- as.data.frame(cv8)
tabla8 <- tabla8 %>% select(!contains("se"))
datos8<-bind_cols(tabla8, cv8, ic8)
names(datos8) <- c("Población Joven","Etnicidad","Promedio Mensual","Coef. Var","Int.Inf","Int.Sup")
kable(datos8, format = "markdown")| Población Joven | Etnicidad | Promedio Mensual | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|---|
| Joven.Castellano | Joven | Castellano | 1268.7932 | 0.0126064 | 1237.4438 | 1300.143 |
| Joven.Originaria | Joven | Originaria | 954.4219 | 0.0299115 | 898.4685 | 1010.375 |
generar_archivo_excel2("DINDES-20-EMP-04-INDA.xlsx", list(datos,datos1,datos2,datos3,datos4,datos5,datos6,datos7,datos8))#save(enaho,file=paste(ruta,"BASEDEDATOSIndicador1ENDES.RData",sep = "/"))
#BORRAMOS TODO MENOS "RUTA"
#rm(list=setdiff(ls(), c("ruta")))
#VOLVEMOS A CARGAR NUESTRA BD LIMPIA
#load(paste(ruta,"BASEDEDATOSIndicador1ENDES.RData",sep="/"))