library(openxlsx)
library(rmarkdown)
library(tidyverse)
library(haven)
library(foreign)
library(survey) Una ruta hacia una carpeta donde almacenaremos los excel que se elaborarán posteriormente, que contarán con dataset de información construida.
ruta <- "C:/Users/Trabajo/Desktop/RDATA" #choose.files()
#sumaria2018 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\SUMARIA - 759-Modulo34\\Sumaria-2021.sav")
#sumaria2019 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\SUMARIA - 759-Modulo34\\Sumaria-2021.sav")
#sumaria2020 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\SUMARIA - 759-Modulo34\\Sumaria-2021.sav")
#sumaria2021 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\SUMARIA - 759-Modulo34\\Sumaria-2021.sav")
sumaria2022 <- read_spss("C:\\Users\\DINDES08\\Desktop\\ENAHO\\2022\\Sumaria - 784-Modulo34\\Sumaria-2022.sav")
#empleo2018 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\EMPLEO - 759-Modulo05\\Enaho01A-2021-500.sav")
#empleo2019 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\EMPLEO - 759-Modulo05\\Enaho01A-2021-500.sav")
#empleo2020 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\EMPLEO - 759-Modulo05\\Enaho01A-2021-500.sav")
#empleo2021 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\EMPLEO - 759-Modulo05\\Enaho01A-2021-500.sav")
empleo2022 <- read_spss("C:\\Users\\DINDES08\\Desktop\\ENAHO\\2022\\Empleo 2022 - 784-Modulo05\\Enaho01a-2022-500.sav")
#educacion2018 <- read_spss("C:\\Users\\DINDES08\\Desktop\\ENAHO\\2022\\Educacion - 2022 - 784-Modulo03\\Enaho01A-2022-300.sav")
#educacion2019 <- read_spss("C:\\Users\\DINDES08\\Desktop\\ENAHO\\2022\\Educacion - 2022 - 784-Modulo03\\Enaho01A-2022-300.sav")
#educacion2020 <- read_spss("C:\\Users\\DINDES08\\Desktop\\ENAHO\\2022\\Educacion - 2022 - 784-Modulo03\\Enaho01A-2022-300.sav")
#educacion2021 <- read_spss("C:\\Users\\DINDES08\\Desktop\\ENAHO\\2022\\Educacion - 2022 - 784-Modulo03\\Enaho01A-2022-300.sav")
educacion2022 <- read_spss("C:\\Users\\DINDES08\\Desktop\\ENAHO\\2022\\Educacion - 2022 - 784-Modulo03\\Enaho01A-2022-300.sav")
#alud2018 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\SALUD - 759-Modulo04\\enaho01a-2018-400.dta")
#salud2019 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\SALUD - 759-Modulo04\\enaho01a-2019-400.dta")
#salud2020 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\SALUD - 759-Modulo04\\enaho01a-2020-400.dta")
#salud2021 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO 2021\\SALUD - 759-Modulo04\\enaho01a-2021-400.dta")
salud2022 <- read_spss("C:\\Users\\DINDES08\\Desktop\\ENAHO\\2022\\Salud - 784-Modulo04\\Enaho01A-2022-400.sav")
educacion2022 <- subset(educacion2022, select = c(CODPERSO,CONGLOME,VIVIENDA,HOGAR,P300A))
salud2022 <- subset(salud2022, select = c(CODPERSO,CONGLOME,VIVIENDA,HOGAR,P401H1,P401H2,P401H3,P401H4,P401H5,P401H6))
sumaria2022 <- subset(sumaria2022, select = c(CONGLOME,VIVIENDA,HOGAR,POBREZA))
enaho0 <- left_join(empleo2022, educacion2022, by=c("CODPERSO","CONGLOME", "VIVIENDA", "HOGAR"))
enaho1 <- left_join(enaho0, salud2022, by=c("CODPERSO","CONGLOME", "VIVIENDA", "HOGAR"))
enaho <- left_join(enaho1,sumaria2022, by =c("CONGLOME","VIVIENDA","HOGAR"))Aquellas variables que nos sirven para realizar las desagregaciones posteriores.
Hay que convertir la variable ubigeo.x (el “.x” es producto de la unión de bases de datos) a numérico para no encontrar problemas al momento de recodificar
enaho$ubigeonum <- as.numeric(enaho$UBIGEO)
enaho <- enaho %>%
mutate(regiones2 =
ifelse(ubigeonum >= 010101 & ubigeonum <= 010707, "Amazonas",
ifelse(ubigeonum >= 020101 & ubigeonum <= 022008, "Ancash",
ifelse(ubigeonum >= 030101 & ubigeonum <= 030714, "Apurimac",
ifelse(ubigeonum >= 040101 & ubigeonum <= 040811, "Arequipa",
ifelse(ubigeonum >= 050101 & ubigeonum <= 051108, "Ayacucho",
ifelse(ubigeonum >= 060101 & ubigeonum <= 061311, "Cajamarca",
ifelse(ubigeonum >= 070101 & ubigeonum <= 070107, "Callao",
ifelse(ubigeonum >= 080101 & ubigeonum <= 081307, "Cusco",
ifelse(ubigeonum >= 090101 & ubigeonum <= 090723, "Huancavelica",
ifelse(ubigeonum >= 100101 & ubigeonum <= 101108, "Huanuco",
ifelse(ubigeonum >= 110101 & ubigeonum <= 110508, "Ica",
ifelse(ubigeonum >= 120101 & ubigeonum <= 120909, "Junin",
ifelse(ubigeonum >= 130101 & ubigeonum <= 131203, "La Libertad",
ifelse(ubigeonum >= 140101 & ubigeonum <= 140312, "Lambayeque",
ifelse(ubigeonum >= 150101 & ubigeonum <= 150143, "Lima Metropolitana",
ifelse(ubigeonum >= 150201 & ubigeonum <= 151033, "Lima Region",
ifelse(ubigeonum >= 160101 & ubigeonum <= 160804, "Loreto",
ifelse(ubigeonum >= 170101 & ubigeonum <= 170303, "Madre de Dios",
ifelse(ubigeonum >= 180101 & ubigeonum <= 180303, "Moquegua",
ifelse(ubigeonum >= 190101 & ubigeonum <= 190308, "Pasco",
ifelse(ubigeonum >= 200101 & ubigeonum <= 200806, "Piura",
ifelse(ubigeonum >= 210101 & ubigeonum <= 211307, "Puno",
ifelse(ubigeonum >= 220101 & ubigeonum <= 221005, "San Martín",
ifelse(ubigeonum >= 230101 & ubigeonum <= 230408, "Tacna",
ifelse(ubigeonum >= 240101 & ubigeonum <= 240304, "Tumbes",
ifelse(ubigeonum >= 250101 & ubigeonum <= 250401,"Ucayali",NA)))))))))))))))))))))))))))
table(enaho$regiones2, useNA = "alw")##
## Amazonas Ancash Apurimac Arequipa
## 3126 3578 2313 3942
## Ayacucho Cajamarca Callao Cusco
## 2593 3527 2804 3069
## Huancavelica Huanuco Ica Junin
## 2584 3043 3812 3660
## La Libertad Lambayeque Lima Metropolitana Lima Region
## 4065 3991 8548 3678
## Loreto Madre de Dios Moquegua Pasco
## 4108 1437 2291 2131
## Piura Puno San Martín Tacna
## 4447 2667 3569 3221
## Tumbes Ucayali <NA>
## 2283 3174 0
enaho <- enaho %>%
mutate(regnat = ifelse(DOMINIO>=1 & DOMINIO<=3 | DOMINIO==8,"Costa",
ifelse(DOMINIO>=4 & DOMINIO<=6,"Sierra",
ifelse(DOMINIO==7,"Selva",NA))))
table(enaho$regnat, useNA = "alw")##
## Costa Selva Sierra <NA>
## 38829 18847 29985 0
enaho <- enaho %>%
mutate(area = ifelse((DOMINIO==8 |
(DOMINIO>=1 & DOMINIO<=7) &
(ESTRATO>=1 & ESTRATO<=5)), "Urbano",
ifelse(((DOMINIO>=1 & DOMINIO<=7) &
(ESTRATO>=6 & ESTRATO<=8)), "Rural", NA)))
table(enaho$area, useNA = "alw")##
## Rural Urbano <NA>
## 28845 58816 0
enaho <- enaho %>%
mutate(pobreza3 = ifelse(POBREZA==1, "Pobre extremo",
ifelse(POBREZA==2, "Pobre no extremo",
ifelse(POBREZA==3, "No pobre", NA
))))
table(enaho$pobreza3, useNA = "alw")##
## No pobre Pobre extremo Pobre no extremo <NA>
## 67956 4159 15546 0
enaho <- enaho %>%
mutate(lengua = ifelse(P300A==4, "Castellano",
ifelse(P300A==1 | P300A==2 | P300A==3, "Originaria", NA)))
enaho$lengua <- as.factor(enaho$lengua)
table(enaho$lengua, useNA = "alw")##
## Castellano Originaria <NA>
## 68370 17789 1502
enaho <- enaho %>%
mutate(discapacidad =ifelse(P401H1==1|P401H2==1|P401H3==1|
P401H4==1|P401H5==1|P401H6==1,1,0))
table(enaho$discapacidad, useNA = "alw")##
## 0 1 <NA>
## 82328 5266 67
table(enaho$P558C, useNA = "alw")##
## 1 2 3 4 5 6 7 8 9 <NA>
## 21160 2883 1883 6047 3701 44600 3626 3517 117 127
enaho <- enaho %>%
mutate(defiet2 = case_when(
P558C == 1 ~ "Quechua",
P558C == 2 ~ "Aimara",
P558C == 3 ~ "Nativo o indigena de la Amazonia",
P558C == 4 ~ "Negro/Moreno/Zambo/Mulato/Pueblo Afro peruano o Afrodescendiente",
P558C == 5 ~ "Blanco",
P558C == 6 ~ "Mestizo",
P558C == 7 ~ "otro",
P558C == 8 ~ "No sabe/No responde",
P558C == 9 ~ "Nativo o indigena de la Amazonia",
TRUE ~ NA_character_
))
enaho$defiet2 <- as.factor(enaho$defiet2)
table(enaho$defiet2, useNA = "alw")##
## Aimara
## 2883
## Blanco
## 3701
## Mestizo
## 44600
## Nativo o indigena de la Amazonia
## 2000
## Negro/Moreno/Zambo/Mulato/Pueblo Afro peruano o Afrodescendiente
## 6047
## No sabe/No responde
## 3517
## otro
## 3626
## Quechua
## 21160
## <NA>
## 127
enaho <- enaho %>%
mutate(res = ifelse((P204==1 & P205==2)|(P204==2 & P206==1),"Residente","No residente"))
enaho$res <- as.factor(enaho$res)
table(enaho$res, useNA = "alw")##
## No residente Residente <NA>
## 1390 86271 0
enaho <- enaho %>%
mutate(pea = ifelse(P208A>=14 & res=="Residente" & (OCU500==1 | OCU500==2),"PEA","NO PEA"))
enaho$pea <- as.factor(enaho$pea)
table(enaho$pea, useNA = "alw")##
## NO PEA PEA <NA>
## 25294 62367 0
enaho <- enaho %>%
mutate(ocupado = ifelse(P208A>=14 & P208A<=98 & res=="Residente" & OCU500==1,"Ocupado","No Ocupado"))
enaho$ocupado <- as.factor(enaho$ocupado)
table(enaho$ocupado, useNA = "alw")##
## No Ocupado Ocupado <NA>
## 27221 60440 0
enaho$resid14 <-
ifelse(((enaho$P204==1 & enaho$P205==2)|(enaho$P204==2 & enaho$P206==1)) & enaho$P208A>=14 & enaho$CODINFOR != "00", 1, 0)
table(enaho$resid14, useNA = "alw")##
## 0 1 <NA>
## 1513 86148 0
#OCUPADO FORMAL
enaho <- enaho %>%
mutate (ocuinformal = ifelse(OCUPINF==1 & resid14==1,1,0))
table(enaho$ocuinformal, useNA = "alw")##
## 0 1 <NA>
## 14201 47752 25708
enaho_filtrado <- enaho %>%
filter(P208A >= 15 & P208A <= 29)# Diseño muestral
encuesta = svydesign(data=enaho_filtrado, id=~CONGLOME, strata=~ESTRATO,
weights=~FAC500A)
# Función para generar un archivo excel con todas las desagregaciones en pestañas
generar_archivo_excel2 <- function(nombre_archivo, datos) {
workbook <- createWorkbook()
for (i in seq_along(datos)) {
addWorksheet(workbook, sheetName = paste("Datos", i-1, sep = ""))
writeData(workbook, sheet = paste("Datos", i-1, sep = ""), x = datos[[i]], colNames = TRUE)
}
saveWorkbook(workbook, nombre_archivo)
}# Calcular promedio, coeficiente de variación e intervalos de confianza
tabla0 <- svyby(~ocuinformal, ~pea, encuesta, svymean, deff=F, na.rm=T)
ic0 <- confint(tabla0)
cv0 <- matrix(cv(tabla0), nrow = length(cv(tabla0)), ncol = 1, dimnames = list(names(cv(tabla0)), "Coef. Var."))
datos0<-bind_cols(tabla0, cv0, ic0)
datos0tabla1 <- svyby(~ocuinformal, ~pea+P207, encuesta, svymean, deff=F, na.rm=T)
ic1 <- confint(tabla1)
cv1 <- matrix(cv(tabla1), nrow = length(cv(tabla1)), ncol = 1, dimnames = list(names(cv(tabla1)), "Coef. Var."))
datos1<-bind_cols(tabla1, cv1, ic1)
datos1tabla2 <- svyby(~ocuinformal, ~pea+area, encuesta, svymean, deff=F,na.rm=T) #PROMEDIO
ic2 <- confint(tabla2)
cv2 <- matrix(cv(tabla2), nrow = length(cv(tabla2)), ncol = 1, dimnames = list(names(cv(tabla2)), "Coef. Var."))
datos2<-bind_cols(tabla2, cv2, ic2)
datos2tabla3 <- svyby(~ocuinformal, ~pea+regnat, encuesta, svymean, deff=F, na.rm=T)
ic3 <- confint(tabla3)
cv3 <- matrix(cv(tabla3), nrow = length(cv(tabla3)), ncol = 1, dimnames = list(names(cv(tabla3)), "Coef. Var."))
datos3<-bind_cols(tabla3, cv3, ic3)
datos3tabla4 <- svyby(~ocuinformal, ~pea+regiones2, encuesta, svymean, deff=F, na.rm=T)
ic4 <- confint(tabla4)
cv4 <- matrix(cv(tabla4), nrow = length(cv(tabla4)), ncol = 1, dimnames = list(names(cv(tabla4)), "Coef. Var."))
datos4<-bind_cols(tabla4, cv4, ic4)
datos4tabla5 <- svyby(~ocuinformal, ~pea+pobreza3, encuesta, svymean, deff=F, na.rm=T)
ic5 <- confint(tabla5)
cv5 <- matrix(cv(tabla5), nrow = length(cv(tabla5)), ncol = 1, dimnames = list(names(cv(tabla5)), "Coef. Var."))
datos5<-bind_cols(tabla5, cv5, ic5)
datos5tabla6 <- svyby(~ocuinformal, ~pea+discapacidad, encuesta, svymean, deff=F, na.rm=T)
ic6 <- confint(tabla6)
cv6 <- matrix(cv(tabla6), nrow = length(cv(tabla6)), ncol = 1, dimnames = list(names(cv(tabla6)), "Coef. Var."))
datos6<-bind_cols(tabla6, cv6, ic6)
datos6tabla7 <- svyby(~ocuinformal, ~pea+defiet2, encuesta, svymean, deff=F, na.rm=T)
ic7 <- confint(tabla7)
cv7 <- matrix(cv(tabla7), nrow = length(cv(tabla7)), ncol = 1, dimnames = list(names(cv(tabla7)), "Coef. Var."))
datos7<-bind_cols(tabla7, cv7, ic7)
datos7tabla8 <- svyby(~ocuinformal, ~pea+lengua, encuesta, svymean, deff=F, na.rm=T)
ic8 <- confint(tabla8)
cv8 <- matrix(cv(tabla8), nrow = length(cv(tabla8)), ncol = 1, dimnames = list(names(cv(tabla8)), "Coef. Var."))
datos8<-bind_cols(tabla8, cv8, ic8)
datos8generar_archivo_excel2("DINDES-25-EMP-05-INDA.xlsx", list(datos0,datos1,datos2,datos3,datos4,datos5,datos6,datos7,datos8))#save(enaho,file=paste(ruta,"BASEDEDATOSIndicador1ENDES.RData",sep = "/"))
#BORRAMOS TODO MENOS "RUTA"
#rm(list=setdiff(ls(), c("ruta")))
#VOLVEMOS A CARGAR NUESTRA BD LIMPIA
#load(paste(ruta,"BASEDEDATOSIndicador1ENDES.RData",sep="/"))