DETERMINAR LA DISTRIBUCIÓN DE LA POBLACIÓN ECONÓMICAMENTE ACTIVA JOVEN SEGÚN DESAGREGACIONES DE INTERÉS. SE TOMA COMO BASE LA SINTAXIS DE EMPLEO FORMAL. ES DECIR, EL CÁLCULO SE REALIZA SOBRE LOS RESIDENTES.
library(openxlsx)
library(rmarkdown)
library(tidyverse)
library(haven)
library(foreign)
library(survey)
library(knitr)Una ruta hacia una carpeta donde almacenaremos los excel que se elaborarán posteriormente, que contarán con dataset de información construida.
ruta <- "C:/Users/Trabajo/Desktop/RDATA" sumaria2022 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SUMARIA - 759-Modulo34\\Sumaria-2022.sav")
empleo2022 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EMPLEO - 759-Modulo05\\Enaho01a-2022-500.sav")
educacion2022 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO\\EDUACIÓN - 759-Modulo03\\Enaho01A-2022-300.sav")
salud2022 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENAHO\\SALUD - 759-Modulo04\\Enaho01A-2022-400.sav")
educacion2022 <- subset(educacion2022, select = c(CODPERSO,CONGLOME,VIVIENDA,HOGAR,P300A))
salud2022 <- subset(salud2022, select = c(CODPERSO,CONGLOME,VIVIENDA,HOGAR,P401H1,P401H2,P401H3,P401H4,P401H5,P401H6))
sumaria2022 <- subset(sumaria2022, select = c(CONGLOME,VIVIENDA,HOGAR,POBREZA))
enaho0 <- left_join(empleo2022, educacion2022, by=c("CODPERSO","CONGLOME", "VIVIENDA", "HOGAR"))
enaho1 <- left_join(enaho0, salud2022, by=c("CODPERSO","CONGLOME", "VIVIENDA", "HOGAR"))
enaho <- left_join(enaho1,sumaria2022, by =c("CONGLOME","VIVIENDA","HOGAR"))Aquellas variables que nos sirven para realizar las desagregaciones posteriores.
Hay que convertir la variable ubigeo.x (el “.x” es producto de la unión de bases de datos) a numérico para no encontrar problemas al momento de recodificar
enaho$ubigeonum <- as.numeric(enaho$UBIGEO)
enaho <- enaho %>%
mutate(regiones2 =
ifelse(ubigeonum >= 010101 & ubigeonum <= 010707, "Amazonas",
ifelse(ubigeonum >= 020101 & ubigeonum <= 022008, "Ancash",
ifelse(ubigeonum >= 030101 & ubigeonum <= 030714, "Apurimac",
ifelse(ubigeonum >= 040101 & ubigeonum <= 040811, "Arequipa",
ifelse(ubigeonum >= 050101 & ubigeonum <= 051108, "Ayacucho",
ifelse(ubigeonum >= 060101 & ubigeonum <= 061311, "Cajamarca",
ifelse(ubigeonum >= 070101 & ubigeonum <= 070107, "Callao",
ifelse(ubigeonum >= 080101 & ubigeonum <= 081307, "Cusco",
ifelse(ubigeonum >= 090101 & ubigeonum <= 090723, "Huancavelica",
ifelse(ubigeonum >= 100101 & ubigeonum <= 101108, "Huanuco",
ifelse(ubigeonum >= 110101 & ubigeonum <= 110508, "Ica",
ifelse(ubigeonum >= 120101 & ubigeonum <= 120909, "Junin",
ifelse(ubigeonum >= 130101 & ubigeonum <= 131203, "La Libertad",
ifelse(ubigeonum >= 140101 & ubigeonum <= 140312, "Lambayeque",
ifelse(ubigeonum >= 150101 & ubigeonum <= 150143, "Lima Metropolitana",
ifelse(ubigeonum >= 150201 & ubigeonum <= 151033, "Lima Region",
ifelse(ubigeonum >= 160101 & ubigeonum <= 160804, "Loreto",
ifelse(ubigeonum >= 170101 & ubigeonum <= 170303, "Madre de Dios",
ifelse(ubigeonum >= 180101 & ubigeonum <= 180303, "Moquegua",
ifelse(ubigeonum >= 190101 & ubigeonum <= 190308, "Pasco",
ifelse(ubigeonum >= 200101 & ubigeonum <= 200806, "Piura",
ifelse(ubigeonum >= 210101 & ubigeonum <= 211307, "Puno",
ifelse(ubigeonum >= 220101 & ubigeonum <= 221005, "San Martín",
ifelse(ubigeonum >= 230101 & ubigeonum <= 230408, "Tacna",
ifelse(ubigeonum >= 240101 & ubigeonum <= 240304, "Tumbes",
ifelse(ubigeonum >= 250101 & ubigeonum <= 250401,"Ucayali",NA)))))))))))))))))))))))))))
table(enaho$regiones2, useNA = "alw")##
## Amazonas Ancash Apurimac Arequipa
## 3126 3578 2313 3942
## Ayacucho Cajamarca Callao Cusco
## 2593 3527 2804 3069
## Huancavelica Huanuco Ica Junin
## 2584 3043 3812 3660
## La Libertad Lambayeque Lima Metropolitana Lima Region
## 4065 3991 8548 3678
## Loreto Madre de Dios Moquegua Pasco
## 4108 1437 2291 2131
## Piura Puno San Martín Tacna
## 4447 2667 3569 3221
## Tumbes Ucayali <NA>
## 2283 3174 0
enaho <- enaho %>%
mutate(regnat = ifelse(DOMINIO>=1 & DOMINIO<=3 | DOMINIO==8,"Costa",
ifelse(DOMINIO>=4 & DOMINIO<=6,"Sierra",
ifelse(DOMINIO==7,"Selva",NA))))
table(enaho$regnat, useNA = "alw")##
## Costa Selva Sierra <NA>
## 38829 18847 29985 0
enaho <- enaho %>%
mutate(area = ifelse((DOMINIO==8 |
(DOMINIO>=1 & DOMINIO<=7) &
(ESTRATO>=1 & ESTRATO<=5)), "Urbano",
ifelse(((DOMINIO>=1 & DOMINIO<=7) &
(ESTRATO>=6 & ESTRATO<=8)), "Rural", NA)))
table(enaho$area, useNA = "alw")##
## Rural Urbano <NA>
## 28845 58816 0
enaho <- enaho %>%
mutate(pobreza3 = ifelse(POBREZA==1, "Pobre extremo",
ifelse(POBREZA==2, "Pobre no extremo",
ifelse(POBREZA==3, "No pobre", NA
))))
table(enaho$pobreza3, useNA = "alw")##
## No pobre Pobre extremo Pobre no extremo <NA>
## 67956 4159 15546 0
enaho <- enaho %>%
mutate(lengua = ifelse(P300A==4, "Castellano",
ifelse(P300A==1 | P300A==2 | P300A==3, "Originaria", NA)))
enaho$lengua <- as.factor(enaho$lengua)
table(enaho$lengua, useNA = "alw")##
## Castellano Originaria <NA>
## 68370 17789 1502
enaho <- enaho %>%
mutate(discapacidad =ifelse(P401H1==1|P401H2==1|P401H3==1|
P401H4==1|P401H5==1|P401H6==1,1,0))
table(enaho$discapacidad, useNA = "alw")##
## 0 1 <NA>
## 82328 5266 67
enaho <- enaho %>%
mutate(etnia = case_when(
P558C < 4 | P558C == 9 ~ 1,
P558C == 4 ~ 2,
P558C == 6 ~ 3,
P558C == 5 | P558C == 7 ~ 4,
P558C == 8 ~ 5,
TRUE ~ NA_integer_
))
etnia_labels <- c("Indigena 1/", "Negro, mulato, Afro peruano", "Mestizo/a", "Otro 2/", "No sabe")
table(enaho$etnia, useNA = "alw")##
## 1 2 3 4 5 <NA>
## 26043 6047 44600 7327 3517 127
table(enaho$P204)##
## 1 2
## 87156 505
enaho <- enaho %>%
mutate(res = ifelse((P204==1 & P205==2)|(P204==2 & P206==1),"Residente","No residente"))
enaho$res <- as.factor(enaho$res)
table(enaho$res, useNA = "alw")##
## No residente Residente <NA>
## 1390 86271 0
enaho <- enaho %>%
mutate(pea = ifelse(P208A>=14 & res=="Residente" & (OCU500==1 | OCU500==2),"PEA","NO PEA"))
enaho$pea <- as.factor(enaho$pea)
table(enaho$pea, useNA = "alw")##
## NO PEA PEA <NA>
## 25294 62367 0
enaho <- enaho %>%
mutate(Juventud = ifelse(P208A >= 15 & P208A <= 29,"Joven",NA))
enaho$Juventud <- as.factor(enaho$Juventud)
enaho <- enaho %>%
mutate(POBTOTAL = ifelse(P208A >= 15,"POB TOTAL","NA"))
enaho$POBTOTAL <- as.factor(enaho$POBTOTAL)# Diseño muestral
encuesta = svydesign(data=enaho, id=~CONGLOME, strata=~ESTRATO,
weights=~FAC500A)
# Función para generar un archivo excel con todas las desagregaciones en pestañas
generar_archivo_excel2 <- function(nombre_archivo, datos) {
workbook <- createWorkbook()
for (i in seq_along(datos)) {
addWorksheet(workbook, sheetName = paste("Datos", i-1, sep = ""))
writeData(workbook, sheet = paste("Datos", i-1, sep = ""), x = datos[[i]], colNames = TRUE)
}
saveWorkbook(workbook, nombre_archivo)
}tabla <- svyby(~pea, ~Juventud, encuesta, svymean, deff=F, na.rm=T)
ic <- confint(tabla)
cv<-cv(tabla)
ic <- as.data.frame(ic)
ic <- ic[-1,]
cv <- cv %>% select(!contains("NO PEA"))
tabla <- tabla %>% select(!contains("se") & !contains("NO PEA"))
datos<-bind_cols(tabla, cv, ic)
names(datos) <- c("PEA Joven","%","Coef. Var","Int.Inf","Int.Sup")
kable(datos, format = "markdown")| PEA Joven | % | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|
| Joven | Joven | 0.6171033 | 0.0076107 | 0.6078982 | 0.6263084 |
tabla <- svyby(~pea, ~Juventud+regiones2, encuesta, svymean, deff=F, na.rm=T)
ic <- confint(tabla)
cv<-cv(tabla)
ic <- subset(ic, !grepl("peaNO", rownames(ic)))
ic <- as.data.frame(ic)
cv <- cv %>% select(!contains("NO PEA"))
tabla <- tabla %>% select(!contains("se") & !contains("NO PEA"))
datos1<-bind_cols(tabla, cv, ic)
names(datos1) <- c("PEA Joven","Departamentos","%","Coef. Var","Int.Inf","Int.Sup")
kable(datos1, format = "markdown")| PEA Joven | Departamentos | % | Coef. Var | Int.Inf | Int.Sup | |
|---|---|---|---|---|---|---|
| Joven.Amazonas | Joven | Amazonas | 0.6023364 | 0.0381406 | 0.5573092 | 0.6473636 |
| Joven.Ancash | Joven | Ancash | 0.6906447 | 0.0271950 | 0.6538325 | 0.7274569 |
| Joven.Apurimac | Joven | Apurimac | 0.6930614 | 0.0356936 | 0.6445761 | 0.7415467 |
| Joven.Arequipa | Joven | Arequipa | 0.5750194 | 0.0346838 | 0.5359302 | 0.6141086 |
| Joven.Ayacucho | Joven | Ayacucho | 0.6378894 | 0.0376315 | 0.5908410 | 0.6849378 |
| Joven.Cajamarca | Joven | Cajamarca | 0.6126829 | 0.0332001 | 0.5728150 | 0.6525507 |
| Joven.Callao | Joven | Callao | 0.6433202 | 0.0279301 | 0.6081036 | 0.6785369 |
| Joven.Cusco | Joven | Cusco | 0.5891993 | 0.0445208 | 0.5377862 | 0.6406124 |
| Joven.Huancavelica | Joven | Huancavelica | 0.7360180 | 0.0332591 | 0.6880394 | 0.7839965 |
| Joven.Huanuco | Joven | Huanuco | 0.6446265 | 0.0311654 | 0.6052507 | 0.6840023 |
| Joven.Ica | Joven | Ica | 0.6657781 | 0.0230009 | 0.6357642 | 0.6957920 |
| Joven.Junin | Joven | Junin | 0.6694042 | 0.0273534 | 0.6335164 | 0.7052921 |
| Joven.La Libertad | Joven | La Libertad | 0.6433234 | 0.0250205 | 0.6117753 | 0.6748716 |
| Joven.Lambayeque | Joven | Lambayeque | 0.6171134 | 0.0250025 | 0.5868724 | 0.6473545 |
| Joven.Lima Metropolitana | Joven | Lima Metropolitana | 0.5854520 | 0.0197124 | 0.5628328 | 0.6080712 |
| Joven.Lima Region | Joven | Lima Region | 0.5786129 | 0.0282908 | 0.5465295 | 0.6106964 |
| Joven.Loreto | Joven | Loreto | 0.5634476 | 0.0304914 | 0.5297748 | 0.5971204 |
| Joven.Madre de Dios | Joven | Madre de Dios | 0.6289035 | 0.0571234 | 0.5584916 | 0.6993153 |
| Joven.Moquegua | Joven | Moquegua | 0.6047475 | 0.0420818 | 0.5548687 | 0.6546263 |
| Joven.Pasco | Joven | Pasco | 0.6749379 | 0.0386003 | 0.6238753 | 0.7260005 |
| Joven.Piura | Joven | Piura | 0.6317249 | 0.0217603 | 0.6047822 | 0.6586676 |
| Joven.Puno | Joven | Puno | 0.6315265 | 0.0391726 | 0.5830399 | 0.6800132 |
| Joven.San Martín | Joven | San Martín | 0.6702518 | 0.0239916 | 0.6387348 | 0.7017688 |
| Joven.Tacna | Joven | Tacna | 0.5373515 | 0.0437259 | 0.4912999 | 0.5834031 |
| Joven.Tumbes | Joven | Tumbes | 0.5936581 | 0.0414074 | 0.5454786 | 0.6418376 |
| Joven.Ucayali | Joven | Ucayali | 0.6534202 | 0.0250829 | 0.6212971 | 0.6855434 |
generar_archivo_excel2("DINDES-27-EMP-05-CALG.xlsx", list(datos,datos1))#save(enaho,file=paste(ruta,"BASEDEDATOSIndicador1ENDES.RData",sep = "/"))
#BORRAMOS TODO MENOS "RUTA"
#rm(list=setdiff(ls(), c("ruta")))
#VOLVEMOS A CARGAR NUESTRA BD LIMPIA
#load(paste(ruta,"BASEDEDATOSIndicador1ENDES.RData",sep="/"))