library(openxlsx)
library(rmarkdown)
library(tidyverse)
library(haven)
library(foreign)
library(survey) Una ruta hacia una carpeta donde almacenaremos los excel que se elaborarán posteriormente, que contarán con dataset de información construida.
ruta <- "C:/Users/Trabajo/Desktop/RDATA" #choose.files()
#sumaria2018 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SUMARIA - 759-Modulo34\\sumaria-2022.dta")
#sumaria2019 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SUMARIA - 759-Modulo34\\sumaria-2022.dta")
#sumaria2020 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SUMARIA - 759-Modulo34\\sumaria-2022.dta")
#sumaria2021 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SUMARIA - 759-Modulo34\\sumaria-2022.dta")
sumaria2022 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SUMARIA - 759-Modulo34\\sumaria-2022.dta")
#educacion2018 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EDUACIÓN - 759-Modulo03\\enaho01a-2022-300.dta")
#educacion2019 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EDUACIÓN - 759-Modulo03\\enaho01a-2022-300.dta")
#educacion2020 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EDUACIÓN - 759-Modulo03\\enaho01a-2022-300.dta")
#educacion2021 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EDUACIÓN - 759-Modulo03\\enaho01a-2022-300.dta")
educacion2022 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EDUACIÓN - 759-Modulo03\\enaho01a-2022-300.dta")
#salud2018 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SALUD - 759-Modulo04\\enaho01a-2022-400.dta")
#salud2019 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SALUD - 759-Modulo04\\enaho01a-2022-400.dta")
#salud2020 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SALUD - 759-Modulo04\\enaho01a-2022-400.dta")
#salud2021 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SALUD - 759-Modulo04\\enaho01a-2022-400.dta")
salud2022 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SALUD - 759-Modulo04\\enaho01a-2022-400.dta")
#empleo2018 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EMPLEO - 759-Modulo05\\enaho01a-2022-500.dta")
#empleo2019 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EMPLEO - 759-Modulo05\\enaho01a-2022-500.dta")
#empleo2020 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EMPLEO - 759-Modulo05\\enaho01a-2022-500.dta")
#empleo2021 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EMPLEO - 759-Modulo05\\enaho01a-2022-500.dta")
empleo2022 <- read_dta("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EMPLEO - 759-Modulo05\\enaho01a-2022-500.dta")
sumaria2022 <- subset(sumaria2022, select= c("conglome","hogar","vivienda","pobreza"))
empleo2022 <- subset(empleo2022, select = c("codperso","conglome","vivienda","hogar","p558c"))
educacion2022 <- subset(educacion2022, select = c("codperso","conglome","vivienda","hogar","p300a"))table(salud2022$codperso)##
## 01 02 03 04 05 06 07 08 09 10 11 12 13
## 34213 28994 21882 14890 7872 3614 1721 821 383 172 76 35 16
## 14 15 16 17 18 19 20 21
## 8 4 3 1 1 1 1 1
enaho_inicial <- left_join(salud2022, educacion2022, by=c("codperso","conglome","vivienda","hogar"))
enaho_inicial2 <- left_join(enaho_inicial, empleo2022, by=c("codperso","conglome","vivienda","hogar"))
enaho_inicial3 <- left_join(enaho_inicial2, sumaria2022, by=c("conglome","vivienda","hogar"))
enaho <- enaho_inicial3Aquellas variables que nos sirven para realizar las desagregaciones posteriores.
enaho <- enaho %>% mutate(sexo = ifelse(p207==1,"hombre",
ifelse(p207==2,"Mujer",NA)))
table(enaho$sexo, useNA = "alw")##
## hombre Mujer <NA>
## 55733 58976 0
Hay que convertir la variable ubigeo.x (el “.x” es producto de la unión de bases de datos) a numérico para no encontrar problemas al momento de recodificar
enaho$ubigeonum <- as.numeric(enaho$ubigeo)
enaho <- enaho %>%
mutate(regiones2 =
ifelse(ubigeonum >= 010101 & ubigeonum <= 010707, "Amazonas",
ifelse(ubigeonum >= 020101 & ubigeonum <= 022008, "Ancash",
ifelse(ubigeonum >= 030101 & ubigeonum <= 030714, "Apurimac",
ifelse(ubigeonum >= 040101 & ubigeonum <= 040811, "Arequipa",
ifelse(ubigeonum >= 050101 & ubigeonum <= 051108, "Ayacucho",
ifelse(ubigeonum >= 060101 & ubigeonum <= 061311, "Cajamarca",
ifelse(ubigeonum >= 070101 & ubigeonum <= 070107, "Callao",
ifelse(ubigeonum >= 080101 & ubigeonum <= 081307, "Cusco",
ifelse(ubigeonum >= 090101 & ubigeonum <= 090723, "Huancavelica",
ifelse(ubigeonum >= 100101 & ubigeonum <= 101108, "Huanuco",
ifelse(ubigeonum >= 110101 & ubigeonum <= 110508, "Ica",
ifelse(ubigeonum >= 120101 & ubigeonum <= 120909, "Junin",
ifelse(ubigeonum >= 130101 & ubigeonum <= 131203, "La Libertad",
ifelse(ubigeonum >= 140101 & ubigeonum <= 140312, "Lambayeque",
ifelse(ubigeonum >= 150101 & ubigeonum <= 150143, "Lima Metropolitana",
ifelse(ubigeonum >= 150201 & ubigeonum <= 151033, "Lima Region",
ifelse(ubigeonum >= 160101 & ubigeonum <= 160804, "Loreto",
ifelse(ubigeonum >= 170101 & ubigeonum <= 170303, "Madre de Dios",
ifelse(ubigeonum >= 180101 & ubigeonum <= 180303, "Moquegua",
ifelse(ubigeonum >= 190101 & ubigeonum <= 190308, "Pasco",
ifelse(ubigeonum >= 200101 & ubigeonum <= 200806, "Piura",
ifelse(ubigeonum >= 210101 & ubigeonum <= 211307, "Puno",
ifelse(ubigeonum >= 220101 & ubigeonum <= 221005, "San Martín",
ifelse(ubigeonum >= 230101 & ubigeonum <= 230408, "Tacna",
ifelse(ubigeonum >= 240101 & ubigeonum <= 240304, "Tumbes",
ifelse(ubigeonum >= 250101 & ubigeonum <= 250401,"Ucayali",NA)))))))))))))))))))))))))))
table(enaho$regiones2, useNA = "alw")##
## Amazonas Ancash Apurimac Arequipa
## 4317 4650 2915 5026
## Ayacucho Cajamarca Callao Cusco
## 3442 4601 3573 3877
## Huancavelica Huanuco Ica Junin
## 3362 4099 4950 4703
## La Libertad Lambayeque Lima Metropolitana Lima Region
## 5374 5154 10462 4729
## Loreto Madre de Dios Moquegua Pasco
## 6060 1991 2776 2910
## Piura Puno San Martín Tacna
## 6017 3279 4837 3986
## Tumbes Ucayali <NA>
## 3028 4591 0
enaho <- enaho %>%
mutate(regnat = ifelse(dominio>=1 & dominio<=3 | dominio==8,"Costa",
ifelse(dominio>=4 & dominio<=6,"Sierra",
ifelse(dominio==7,"Selva",NA))))
table(enaho$regnat, useNA = "alw")##
## Costa Selva Sierra <NA>
## 49579 26587 38543 0
enaho <- enaho %>%
mutate(area = ifelse((dominio==8 |
(dominio>=1 & dominio<=7) &
(estrato>=1 & estrato<=5)), "Urbano",
ifelse(((dominio>=1 & dominio<=7) &
(estrato>=6 & estrato<=8)), "Rural", NA)))
table(enaho$area, useNA = "alw")##
## Rural Urbano <NA>
## 39033 75676 0
enaho <- enaho %>%
mutate(pobreza3 = ifelse(pobreza==1, "Pobre extremo",
ifelse(pobreza==2, "Pobre no extremo",
ifelse(pobreza==3, "No pobre", NA
))))
table(enaho$pobreza3, useNA = "alw")##
## No pobre Pobre extremo Pobre no extremo <NA>
## 85197 6661 22851 0
enaho <- enaho %>%
mutate(lengua = ifelse(p300a==4, "Castellano",
ifelse(p300a==1 | p300a==2 | p300a==3, "Originaria", NA)))
enaho$lengua <- as.factor(enaho$lengua)
table(enaho$lengua, useNA = "alw")##
## Castellano Originaria <NA>
## 88959 19163 6587
enaho <- enaho %>%
mutate(discapacidad =ifelse(p401h1==1|p401h2==1|p401h3==1|
p401h4==1|p401h5==1|p401h6==1,1,0))
table(enaho$discapacidad, useNA = "alw")##
## 0 1 <NA>
## 108906 5715 88
enaho <- enaho %>%
mutate(defiet2 = case_when(
p558c == 1 ~ "Quechua",
p558c == 2 ~ "Aimara",
p558c == 3 ~ "Nativo o indigena de la Amazonia",
p558c == 4 ~ "Negro/Moreno/Zambo/Mulato/Pueblo Afro peruano o Afrodescendiente",
p558c == 5 ~ "Blanco",
p558c == 6 ~ "Mestizo",
p558c == 7 ~ "otro",
p558c == 8 ~ "No sabe/No responde",
p558c == 9 ~ "Nativo o indigena de la Amazonia",
TRUE ~ NA_character_
))
enaho$defiet2 <- as.factor(enaho$defiet2)
table(enaho$defiet2, useNA = "alw")##
## Aimara
## 2883
## Blanco
## 3701
## Mestizo
## 44600
## Nativo o indigena de la Amazonia
## 2000
## Negro/Moreno/Zambo/Mulato/Pueblo Afro peruano o Afrodescendiente
## 6047
## No sabe/No responde
## 3517
## otro
## 3626
## Quechua
## 21160
## <NA>
## 27175
enaho <- enaho %>%
mutate(seguro = ifelse(p4191==1 | p4192==1 | p4193==1 | p4194==1 | p4195==1 | p4196==1 | p4197==1 | p4198==1,1,0))
table(enaho$seguro, useNA = "alw")##
## 0 1 <NA>
## 14425 100195 89
enaho <- enaho %>% mutate(Juventud = ifelse(p208a>=15 & p208a<=29,"jovenes","no jovenes"))
table(enaho$Juventud)##
## jovenes no jovenes
## 25344 89365
encuesta = svydesign(data=enaho, id=~conglome, strata=~estrato,
weights=~factor07)
generar_archivo_excel <- function(nombre_archivo, tabla, cv, ic) {
datos_combinados <- bind_cols(tabla, cv, ic)
workbook <- createWorkbook()
addWorksheet(workbook, sheetName = "Datos")
writeData(workbook, sheet = "Datos", x = datos_combinados, colNames = TRUE)
saveWorkbook(workbook, nombre_archivo)
}# Calcular promedio, coeficiente de variación e intervalos de confianza
tabla0 <- svyby(~seguro, ~Juventud, encuesta, svymean, deff=F, na.rm=T)
ic0 <- confint(tabla0)
cv0 <- matrix(cv(tabla0), nrow = length(cv(tabla0)), ncol = 1, dimnames = list(names(cv(tabla0)), "Coef. Var."))
datos0<-bind_cols(tabla0, cv0, ic0)
datos0generar_archivo_excel("datos0.xlsx",tabla0,cv0,ic0)# Calcular promedio, coeficiente de variación e intervalos de confianza
tabla1 <- svyby(~seguro, ~Juventud+sexo, encuesta, svymean, deff=F, na.rm=T)
ic1 <- confint(tabla1)
cv1 <- matrix(cv(tabla1), nrow = length(cv(tabla1)), ncol = 1, dimnames = list(names(cv(tabla1)), "Coef. Var."))
datos1<-bind_cols(tabla1, cv1, ic1)
datos1generar_archivo_excel("datos1.xlsx",tabla1,cv1,ic1)tabla2 <- svyby(~seguro, ~Juventud+area, encuesta, svymean, deff=F, na.rm=T)
ic2 <- confint(tabla2)
cv2 <- matrix(cv(tabla2), nrow = length(cv(tabla2)), ncol = 1, dimnames = list(names(cv(tabla2)), "Coef. Var."))
datos2<-bind_cols(tabla2, cv2, ic2)
datos2generar_archivo_excel("datos2.xlsx",tabla2,cv2,ic2)tabla3 <- svyby(~seguro, ~Juventud+regnat, encuesta, svymean, deff=F, na.rm=T)
ic3 <- confint(tabla3)
cv3 <- matrix(cv(tabla3), nrow = length(cv(tabla3)), ncol = 1, dimnames = list(names(cv(tabla3)), "Coef. Var."))
datos3<-bind_cols(tabla3, cv3, ic3)
datos3generar_archivo_excel("datos3.xlsx",tabla3,cv3,ic3)tabla4 <- svyby(~seguro, ~Juventud+regiones2, encuesta, svymean, deff=F, na.rm=T)
ic4 <- confint(tabla4)
cv4 <- matrix(cv(tabla4), nrow = length(cv(tabla4)), ncol = 1, dimnames = list(names(cv(tabla4)), "Coef. Var."))
datos4<-bind_cols(tabla4, cv4, ic4)
datos4generar_archivo_excel("datos4.xlsx",tabla4,cv4,ic4)tabla5 <- svyby(~seguro, ~Juventud+pobreza3, encuesta, svymean, deff=F, na.rm=T)
ic5 <- confint(tabla5)
cv5 <- matrix(cv(tabla5), nrow = length(cv(tabla5)), ncol = 1, dimnames = list(names(cv(tabla5)), "Coef. Var."))
datos5<-bind_cols(tabla5, cv5, ic5)
datos5generar_archivo_excel("datos5.xlsx",tabla5,cv5,ic5)tabla6 <- svyby(~seguro, ~Juventud+discapacidad, encuesta, svymean, deff=F, na.rm=T)
ic6 <- confint(tabla6)
cv6 <- matrix(cv(tabla6), nrow = length(cv(tabla6)), ncol = 1, dimnames = list(names(cv(tabla6)), "Coef. Var."))
datos6<-bind_cols(tabla6, cv6, ic6)
datos6generar_archivo_excel("datos6.xlsx",tabla6,cv6,ic6)tabla7 <- svyby(~seguro, ~Juventud+defiet2, encuesta, svymean, deff=F, na.rm=T)
ic7 <- confint(tabla7)
cv7 <- matrix(cv(tabla7), nrow = length(cv(tabla7)), ncol = 1, dimnames = list(names(cv(tabla7)), "Coef. Var."))
datos7<-bind_cols(tabla7, cv7, ic7)
datos7generar_archivo_excel("datos7.xlsx",tabla7,cv7,ic7)tabla8 <- svyby(~seguro, ~Juventud+lengua, encuesta, svymean, deff=F, na.rm=T)
ic8 <- confint(tabla8)
cv8 <- matrix(cv(tabla8), nrow = length(cv(tabla8)), ncol = 1, dimnames = list(names(cv(tabla8)), "Coef. Var."))
datos8<-bind_cols(tabla8, cv8, ic8)
datos8generar_archivo_excel("datos8.xlsx",tabla8,cv8,ic8)