DESCRIPCIÓN DEL INDICADOR

MODELADO DE BASE DATO

PAQUETES ESTADÍSTICOS

library(openxlsx) 
library(rmarkdown) 
library(tidyverse) 
library(haven) 
library(foreign) 
library(survey)
library(knitr)

GENERAMOS UNA RUTA PARA GUARDAR NUESTROS DATOS

Una ruta hacia una carpeta donde almacenaremos los excel que se elaborarán posteriormente, que contarán con dataset de información construida.

ruta <- "C:/Users/Trabajo/Desktop/RDATA"

CARGAR BASES DE DATOS

cmh <- read_dta("C:\\Users\\DINDES08\\Desktop\\ENAHO\\cmh-784-Modulo02\\enaho01-2022-200.dta")
sumaria2022 <- read_dta("C:\\Users\\DINDES08\\Desktop\\ENAHO\\sumaria-784-Modulo34\\sumaria-2022.dta")
educacion2022 <- read_dta("C:\\Users\\DINDES08\\Desktop\\ENAHO\\educacion-784-Modulo03\\enaho01a-2022-300.dta")
salud2022 <- read_dta("C:\\Users\\DINDES08\\Desktop\\ENAHO\\salud-784-Modulo04\\enaho01a-2022-400.dta")
empleo2022 <- read_dta("C:\\Users\\DINDES08\\Desktop\\ENAHO\\empleo-ingresos-784-Modulo05\\enaho01a-2022-500.dta")

sumaria2022 <- subset(sumaria2022, select= c("conglome","hogar","vivienda","pobreza","estrsocial"))
empleo2022 <- subset(empleo2022, select = c("codperso","conglome","vivienda","hogar","p558c")) 
educacion2022 <- subset(educacion2022, select = c("codperso","conglome","vivienda","hogar","p300a"))
salud2022 <- subset(salud2022, select = c("codperso","conglome","vivienda","hogar","p401h1","p401h2","p401h3","p401h4","p401h5","p401h6"))

UNIÓN DE BASES DE DATOS

enaho0 <- left_join(cmh, educacion2022, by=c("codperso","conglome", "vivienda", "hogar"))
enaho1 <- left_join(enaho0, salud2022, by=c("codperso","conglome", "vivienda", "hogar"))
enaho2 <- left_join(enaho1, empleo2022, by=c("codperso","conglome", "vivienda", "hogar"))
enaho <- left_join(enaho2,sumaria2022, by =c("conglome", "vivienda", "hogar"))

ELABORACIÓN DE VARIABLES PARA DESAGREGACIONES

VARIABLES GENERALES

DEPARTAMENTOS

enaho$ubigeonum <- as.numeric(enaho$ubigeo)
enaho <- enaho %>%
  mutate(regiones2 = 
           ifelse(ubigeonum >= 010101 & ubigeonum <= 010707, "Amazonas",
           ifelse(ubigeonum >= 020101 & ubigeonum <= 022008, "Ancash",
           ifelse(ubigeonum >= 030101 & ubigeonum <= 030714, "Apurimac",
           ifelse(ubigeonum >= 040101 & ubigeonum <= 040811, "Arequipa",
           ifelse(ubigeonum >= 050101 & ubigeonum <= 051108, "Ayacucho",
           ifelse(ubigeonum >= 060101 & ubigeonum <= 061311, "Cajamarca",
           ifelse(ubigeonum >= 070101 & ubigeonum <= 070107, "Callao",
           ifelse(ubigeonum >= 080101 & ubigeonum <= 081307, "Cusco",
           ifelse(ubigeonum >= 090101 & ubigeonum <= 090723, "Huancavelica",
           ifelse(ubigeonum >= 100101 & ubigeonum <= 101108, "Huanuco",
           ifelse(ubigeonum >= 110101 & ubigeonum <= 110508, "Ica",
           ifelse(ubigeonum >= 120101 & ubigeonum <= 120909, "Junin",
           ifelse(ubigeonum >= 130101 & ubigeonum <= 131203, "La Libertad",
           ifelse(ubigeonum >= 140101 & ubigeonum <= 140312, "Lambayeque",
           ifelse(ubigeonum >= 150101 & ubigeonum <= 150143, "Lima Metropolitana",
           ifelse(ubigeonum >= 150201 & ubigeonum <= 151033, "Lima Region",
           ifelse(ubigeonum >= 160101 & ubigeonum <= 160804, "Loreto",
           ifelse(ubigeonum >= 170101 & ubigeonum <= 170303, "Madre de Dios",
           ifelse(ubigeonum >= 180101 & ubigeonum <= 180303, "Moquegua",
           ifelse(ubigeonum >= 190101 & ubigeonum <= 190308, "Pasco",
           ifelse(ubigeonum >= 200101 & ubigeonum <= 200806, "Piura",
           ifelse(ubigeonum >= 210101 & ubigeonum <= 211307, "Puno",
           ifelse(ubigeonum >= 220101 & ubigeonum <= 221005, "San Martín",
           ifelse(ubigeonum >= 230101 & ubigeonum <= 230408, "Tacna",
           ifelse(ubigeonum >= 240101 & ubigeonum <= 240304, "Tumbes",
           ifelse(ubigeonum >= 250101 & ubigeonum <= 250401,"Ucayali",NA)))))))))))))))))))))))))))
table(enaho$regiones2, useNA = "alw")
## 
##           Amazonas             Ancash           Apurimac           Arequipa 
##               4668               5020               3141               5306 
##           Ayacucho          Cajamarca             Callao              Cusco 
##               3632               4870               3684               4016 
##       Huancavelica            Huanuco                Ica              Junin 
##               3524               4497               5178               4998 
##        La Libertad         Lambayeque Lima Metropolitana        Lima Region 
##               5677               5467              10856               4926 
##             Loreto      Madre de Dios           Moquegua              Pasco 
##               6490               2056               2946               3119 
##              Piura               Puno         San Martín              Tacna 
##               6375               3396               5201               4161 
##             Tumbes            Ucayali               <NA> 
##               3148               4901                  0

EDAD: JUVENTUD

enaho <- enaho %>% mutate(Juventud = ifelse(p208a>=15 & p208a<=29,"jovenes",NA))
enaho$Juventud <- as.factor(enaho$Juventud)

enaho <- enaho %>% mutate(pobtotal = ifelse(p208a>=15,"poblacion total",NA))
enaho$pobtotal <- as.factor(enaho$pobtotal)

VARIABLES INDICADOR

CONDICIÓN DE POBREZA

enaho <- enaho %>%
  mutate(pobreza3 = case_when(
    pobreza == 1 ~ "Pobre extremo",
    pobreza == 2 ~ "Pobre no extremo",
    pobreza == 3 ~ "No pobre",
    TRUE ~ NA_character_
  ))

PONDERACIÓN DE DATOS, DATASETS Y TRASLADO A EXCEL

DISEÑO MUESTRAL

# Diseño muestral para la ponderación de valores
encuesta = svydesign(data=enaho, id=~conglome, strata=~estrato,
                     weights=~facpob07)

# Función para generar un archivo excel con todas las desagregaciones en pestañas
generar_archivo_excel2 <- function(nombre_archivo, datos) {
  workbook <- createWorkbook()
  
  for (i in seq_along(datos)) {
    addWorksheet(workbook, sheetName = paste("Datos", i-1, sep = ""))
    writeData(workbook, sheet = paste("Datos", i-1, sep = ""), x = datos[[i]], colNames = TRUE)
  }
  
  saveWorkbook(workbook, nombre_archivo)
}

DESAGREGACIÓN NACIONAL POBLACIÓN JOVEN

tabla1 <- svyby(~pobreza3, ~Juventud, encuesta, svymean, deff=F, na.rm=T)
cv1<-cv(tabla1)

names(cv1) <- c("Coef. Var. No pobre", "Coef. Var. Pobre extremo", "Coef. Var. Pobre No extremo")

tabla1 <- tabla1 %>% select(!contains("se."))
names(tabla1) <- c("Juventud","No pobre","Pobre extremo","Pobre No extremo")

datos1<-bind_cols(tabla1, cv1)
kable(datos1, format = "markdown")
Juventud No pobre Pobre extremo Pobre No extremo Coef. Var. No pobre Coef. Var. Pobre extremo Coef. Var. Pobre No extremo
jovenes jovenes 0.7440863 0.0413697 0.214544 0.0083467 0.0577761 0.0269689

DESAGREGACIÓN SEGÚN DEPARTAMENTOS

tabla4 <- svyby(~pobreza3, ~Juventud+regiones2, encuesta, svymean, deff=F, na.rm=T)
cv4<-cv(tabla4)

names(cv4) <- c("Coef. Var. No pobre", "Coef. Var. Pobre extremo", "Coef. Var. Pobre No extremo")

tabla4 <- tabla4 %>% select(!contains("se."))
names(tabla4) <- c("Juventud","Departamentos","No pobre","Pobre extremo","Pobre No extremo")

datos4<-bind_cols(tabla4, cv4)

kable(datos4, format = "markdown")
Juventud Departamentos No pobre Pobre extremo Pobre No extremo Coef. Var. No pobre Coef. Var. Pobre extremo Coef. Var. Pobre No extremo
jovenes.Amazonas jovenes Amazonas 0.7501534 0.0664109 0.1834357 0.0365106 0.2230240 0.1288610
jovenes.Ancash jovenes Ancash 0.8219702 0.0330884 0.1449414 0.0258168 0.2410685 0.1316352
jovenes.Apurimac jovenes Apurimac 0.7963316 0.0113678 0.1923006 0.0336537 0.3069272 0.1373547
jovenes.Arequipa jovenes Arequipa 0.8915793 0.0140041 0.0944166 0.0195763 0.2991273 0.1816501
jovenes.Ayacucho jovenes Ayacucho 0.6291714 0.0821561 0.2886725 0.0545506 0.1853991 0.1055225
jovenes.Cajamarca jovenes Cajamarca 0.5925035 0.1751356 0.2323608 0.0517138 0.1136795 0.0954782
jovenes.Callao jovenes Callao 0.7137706 0.0365858 0.2496437 0.0421397 0.2737433 0.1101305
jovenes.Cusco jovenes Cusco 0.8110025 0.0241298 0.1648678 0.0276129 0.2795660 0.1269746
jovenes.Huancavelica jovenes Huancavelica 0.6808576 0.0729649 0.2461776 0.0408290 0.1998224 0.1029051
jovenes.Huanuco jovenes Huanuco 0.6366780 0.1068369 0.2564851 0.0460752 0.1669326 0.1041835
jovenes.Ica jovenes Ica 0.9583921 0.0000000 0.0416079 0.0119017 NaN 0.2741425
jovenes.Junin jovenes Junin 0.8052509 0.0311797 0.1635694 0.0262585 0.2343342 0.1178425
jovenes.La Libertad jovenes La Libertad 0.7626948 0.0339905 0.2033148 0.0279273 0.2708315 0.0944823
jovenes.Lambayeque jovenes Lambayeque 0.8622585 0.0073159 0.1304256 0.0186187 0.4583566 0.1194523
jovenes.Lima Metropolitana jovenes Lima Metropolitana 0.7310776 0.0232279 0.2456945 0.0214508 0.2315347 0.0599559
jovenes.Lima Region jovenes Lima Region 0.7715781 0.0309697 0.1974522 0.0310632 0.2391726 0.1148276
jovenes.Loreto jovenes Loreto 0.6423476 0.1125878 0.2450646 0.0396874 0.1485195 0.0816292
jovenes.Madre de Dios jovenes Madre de Dios 0.8773769 0.0100712 0.1125519 0.0336260 0.6160127 0.2457181
jovenes.Moquegua jovenes Moquegua 0.9042411 0.0151948 0.0805641 0.0220417 0.4056900 0.2337884
jovenes.Pasco jovenes Pasco 0.6344877 0.0701374 0.2953749 0.0587293 0.2012609 0.1195261
jovenes.Piura jovenes Piura 0.6905610 0.0308566 0.2785825 0.0309384 0.2387890 0.0740094
jovenes.Puno jovenes Puno 0.6149701 0.0846424 0.3003875 0.0505436 0.1819776 0.0985813
jovenes.San Martín jovenes San Martín 0.7689535 0.0405260 0.1905205 0.0323486 0.2829174 0.1128549
jovenes.Tacna jovenes Tacna 0.7652002 0.0302780 0.2045219 0.0339826 0.5433306 0.1243425
jovenes.Tumbes jovenes Tumbes 0.7709526 0.0003081 0.2287393 0.0421136 0.7156681 0.1419742
jovenes.Ucayali jovenes Ucayali 0.7871739 0.0280066 0.1848195 0.0323515 0.2973910 0.1248744

GENERAR ARCHIVO EXCEL

generar_archivo_excel2("DINDES-42-DEMO-04-CALG.xlsx", list(datos1,datos4))