library(openxlsx)
library(rmarkdown)
library(tidyverse)
library(haven)
library(foreign)
library(survey) Una ruta hacia una carpeta donde almacenaremos los excel que se elaborarán posteriormente, que contarán con dataset de información construida.
ruta <- "C:/Users/Trabajo/Desktop/RDATA" #choose.files()
modulo1637 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENDES\\2022\\mort materna y violencia fam - 786-Modulo1637\\REC84DV.sav") #MORTALIDAD MATERNA Y VIOLENCIA FAM
datosmef1 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENDES\\2022\\Datosmef - 786-Modulo1631\\REC0111.sav") #DATOS MEF MODULO 1631
datosmef2 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENDES\\2022\\Datosmef - 786-Modulo1631\\REC91.sav") #DATOS MEF MOD 1631
conyugue <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENDES\\2022\\nupcialidad, fec, conyugue y mujer - 786-Modulo1635\\RE516171.sav") #MOD 1635 NUOCIAS, FECUNDIDAD
salud2022 <- read_spss("C:\\Users\\Trabajo\\Desktop\\ENDES\\2022\\Salud - 786- modulo1640\\CSALUD01.sav")
modulo1637 <- subset(modulo1637, select=c("CASEID","D105A","D105B","D105C","D105D","D105E","D105F","D105G","D105H","D105I"))endes_inicial <- left_join(datosmef1,modulo1637, by=c("CASEID"))
endes_inicial <- left_join(endes_inicial, datosmef2, by=c("CASEID"))
endes_inicial <- left_join(endes_inicial, conyugue, by=c("CASEID"))
endes_inicial <- left_join(endes_inicial, salud2022, by=c("HHID"))
endes <- endes_inicialAquellas variables que nos sirven para realizar las desagregaciones posteriores.
Hay que convertir la variable ubigeo.x (el “.x” es producto de la unión de bases de datos) a numérico para no encontrar problemas al momento de recodificar
endes$ubigeonum <- as.numeric(endes$UBIGEO)
endes <- endes %>%
mutate(regiones2 =
ifelse(ubigeonum >= 010101 & ubigeonum <= 010707, "Amazonas",
ifelse(ubigeonum >= 020101 & ubigeonum <= 022008, "Ancash",
ifelse(ubigeonum >= 030101 & ubigeonum <= 030714, "Apurimac",
ifelse(ubigeonum >= 040101 & ubigeonum <= 040811, "Arequipa",
ifelse(ubigeonum >= 050101 & ubigeonum <= 051108, "Ayacucho",
ifelse(ubigeonum >= 060101 & ubigeonum <= 061311, "Cajamarca",
ifelse(ubigeonum >= 070101 & ubigeonum <= 070107, "Callao",
ifelse(ubigeonum >= 080101 & ubigeonum <= 081307, "Cusco",
ifelse(ubigeonum >= 090101 & ubigeonum <= 090723, "Huancavelica",
ifelse(ubigeonum >= 100101 & ubigeonum <= 101108, "Huanuco",
ifelse(ubigeonum >= 110101 & ubigeonum <= 110508, "Ica",
ifelse(ubigeonum >= 120101 & ubigeonum <= 120909, "Junin",
ifelse(ubigeonum >= 130101 & ubigeonum <= 131203, "La Libertad",
ifelse(ubigeonum >= 140101 & ubigeonum <= 140312, "Lambayeque",
ifelse(ubigeonum >= 150101 & ubigeonum <= 150143, "Lima Metropolitana",
ifelse(ubigeonum >= 150201 & ubigeonum <= 151033, "Lima Region",
ifelse(ubigeonum >= 160101 & ubigeonum <= 160804, "Loreto",
ifelse(ubigeonum >= 170101 & ubigeonum <= 170303, "Madre de Dios",
ifelse(ubigeonum >= 180101 & ubigeonum <= 180303, "Moquegua",
ifelse(ubigeonum >= 190101 & ubigeonum <= 190308, "Pasco",
ifelse(ubigeonum >= 200101 & ubigeonum <= 200806, "Piura",
ifelse(ubigeonum >= 210101 & ubigeonum <= 211307, "Puno",
ifelse(ubigeonum >= 220101 & ubigeonum <= 221005, "San Martín",
ifelse(ubigeonum >= 230101 & ubigeonum <= 230408, "Tacna",
ifelse(ubigeonum >= 240101 & ubigeonum <= 240304, "Tumbes",
ifelse(ubigeonum >= 250101 & ubigeonum <= 250401, "Ucayali",NA)))))))))))))))))))))))))))
table(endes$regiones2, useNA = "alw")##
## Amazonas Ancash Apurimac Arequipa
## 1443 1353 1228 1385
## Ayacucho Cajamarca Callao Cusco
## 1554 1377 1562 1093
## Huancavelica Huanuco Ica Junin
## 1237 1457 1422 1238
## La Libertad Lambayeque Lima Metropolitana Lima Region
## 1377 1478 3683 1392
## Loreto Madre de Dios Moquegua Pasco
## 1696 1261 1278 1215
## Piura Puno San Martín Tacna
## 1535 1067 1470 1358
## Tumbes Ucayali <NA>
## 1402 1544 0
endes <- endes %>%
mutate(regnat = ifelse(SREGION==1 | SREGION==2,"Costa",
ifelse(SREGION==3,"Sierra",
ifelse(SREGION==4,"Selva",NA))))
table(endes$regnat, useNA = "alw")##
## Costa Selva Sierra <NA>
## 16443 9381 12281 0
endes <- endes %>% mutate(area = ifelse(V025==1, "urbano", "rural"))
table(endes$area, useNA = "alw")##
## rural urbano <NA>
## 11570 26535 0
endes <- endes %>%
mutate(pobreza3 = ifelse(V190==1, "El más pobre",
ifelse(V190==2, "Pobre",
ifelse(V190==3, "Medio",
ifelse(V190==4,"Rico",
ifelse(V190==5,"Más Rico",NA))))))
table(endes$pobreza3, useNA = "alw")##
## El más pobre Más Rico Medio Pobre Rico <NA>
## 10750 4325 7607 9612 5811 0
endes <- endes %>% mutate(lengua = case_when(
V131 == 1 ~ "Andino/Amazónico",
V131 == 2 ~ "Andino/Amazónico",
V131 == 3 ~ "Andino/Amazónico",
V131 == 4 ~ "Andino/Amazónico",
V131 == 5 ~ "Andino/Amazónico",
V131 == 6 ~ "Andino/Amazónico",
V131 == 7 ~ "Andino/Amazónico",
V131 == 8 ~ "Andino/Amazónico",
V131 == 9 ~ "Andino/Amazónico",
V131 == 10 ~ "Castellano",
V131 == 11 ~ "lengua extranjera",
V131 == 12 ~ "lengua extranjera",
TRUE ~ NA
))
endes$lengua <- as.factor(endes$lengua)
table(endes$lengua, useNA = "alw")##
## Andino/Amazónico Castellano lengua extranjera <NA>
## 3562 32158 67 2318
endes <- endes %>%
mutate(discapacidad = ifelse(QD333_1==1 | QD333_2 ==1 | QD333_3==1 | QD333_4==1 | QD333_5==1 | QD333_6==1,1,0))
endes <- endes %>%
mutate(discapacidad1 = case_when(
QD333_1 == 1 ~ 1,
QD333_2 == 1 ~ 1,
QD333_3 == 1 ~ 1,
QD333_4 == 1 ~ 1,
QD333_5 == 1 ~ 1,
QD333_6 == 1 ~ 1,
is.na(QD333_1) ~ 0,
is.na(QD333_2) ~ 0,
is.na(QD333_3) ~ 0,
is.na(QD333_4) ~ 0,
is.na(QD333_5) ~ 0,
is.na(QD333_6) ~ 0,
TRUE ~ 0
))
table(endes$discapacidad, useNA = "alw")##
## 0 1 <NA>
## 35526 449 2130
endes <- endes %>%
mutate(discapacidad2 = case_when(
QS25C1 == 1 ~ 1,
QS25C2 == 1 ~ 1,
QS25C3 == 1 ~ 1,
QS25C4 == 1 ~ 1,
QS25C5 == 1 ~ 1,
QS25C6 == 1 ~ 1,
is.na(QS25C1) ~ 0,
is.na(QS25C2) ~ 0,
is.na(QS25C3) ~ 0,
is.na(QS25C4) ~ 0,
is.na(QS25C5) ~ 0,
is.na(QS25C6) ~ 0,
TRUE ~ 0
))
table(endes$violenciaFS , useNA = "alw")## Warning: Unknown or uninitialised column: `violenciaFS`.
##
## <NA>
## 0
table(endes$QS25BB)##
## 1 2 3 4 5 6 7 8 98
## 10198 1069 811 156 3916 2206 14908 274 1765
endes <- endes %>%
mutate(defiet2 = case_when(
QS25BB == 1 ~ "Quechua",
QS25BB == 2 ~ "Aimara",
QS25BB == 3 ~ "Nativo o indigena de la Amazonia",
QS25BB == 4 ~ "perteneciente o parte de otro pueblo",
QS25BB == 5 ~ "Negro/moreno/zambo/mulato/pueblo afroperuano
o afrodescendiente",
QS25BB == 6 ~ "Blanco",
QS25BB == 7 ~ "Mestizo",
QS25BB == 8 ~ "Otro",
QS25BB == 98 ~ "No sabe/No responde",
TRUE ~ NA_character_
))
endes$defiet2 <- as.factor(endes$defiet2)
table(endes$defiet2, useNA = "alw")##
## Aimara
## 1069
## Blanco
## 2206
## Mestizo
## 14908
## Nativo o indigena de la Amazonia
## 811
## Negro/moreno/zambo/mulato/pueblo afroperuano \no afrodescendiente
## 3916
## No sabe/No responde
## 1765
## Otro
## 274
## perteneciente o parte de otro pueblo
## 156
## Quechua
## 10198
## <NA>
## 2802
#MUEJRES UNIDAS#####
endes <- endes %>%
mutate(unidas = ifelse(V502!=0 & V015==1,1,0))
table(endes$unidas, useNA = "alw")##
## 0 1 <NA>
## 12850 25255 0
endes <- endes %>% mutate(Edadcompleta = ifelse(V012>=15 & V012<=49,1,0))
table(endes$Edadcompleta, useNA = "alw")##
## 0 1 <NA>
## 3506 32281 2318
endes <- endes %>% mutate(jovenes = ifelse(V012>=15 & V012<=29,1,0))
table(endes$jovenes, useNA = "alw")##
## 0 1 <NA>
## 20518 15269 2318
endes <- endes %>% mutate(unidasjoven = ifelse(jovenes==1 & unidas==1,"muj unidas jovenes","otrasedades"))
table(endes$unidasjoven, useNA = "alw")##
## muj unidas jovenes otrasedades <NA>
## 9167 28938 0
endes <- endes %>% mutate(unidasedadcomp = ifelse(Edadcompleta==1 & unidas==1,"muj unidas edadcompleta","otrasedades"))
table(endes$unidasedadcomp, useNA = "alw")##
## muj unidas edadcompleta otrasedades <NA>
## 25243 12862 0
endes <- endes %>%
mutate(ABOFETEO = ifelse(D105B == 0, 0, ifelse(D105B %in% 1:2, 1, 0)),
PUNHO = ifelse(D105C == 0, 0, ifelse(D105C %in% 1:2, 1, 0)),
ARRASTRO = ifelse(D105D == 0, 0, ifelse(D105D %in% 1:2, 1, 0)),
ESTRANGULO = ifelse(D105E == 0, 0, ifelse(D105E %in% 1:2, 1, 0)),
AMENAZA = ifelse(D105F == 0, 0, ifelse(D105F %in% 1:2, 1, 0)),
ATACO = ifelse(D105G == 0, 0, ifelse(D105G %in% 1:2, 1, 0)),
EMPUJO = ifelse(D105A==0, 0, ifelse(D105A %in% 1:2, 1, 0)),
ATACO = ifelse(D105G==0, 0, ifelse(D105G %in% 1:2, 1, 0)))
endes <- endes %>%
mutate(VIOL_FIS = ifelse(EMPUJO == 0 & ABOFETEO == 0 & PUNHO == 0 & ARRASTRO == 0 & ESTRANGULO == 0 & AMENAZA == 0 & ATACO == 0, 0, 1))
table(endes$VIOL_FIS, useNA = "alw")##
## 0 1 <NA>
## 19478 1843 16784
endes <- endes %>%
mutate(violenciasexual1 = ifelse(D105H== 0, 0, ifelse(D105H %in% 1:2,1,0)),
Violenciasexual2 = ifelse(D105I== 0, 0, ifelse(D105I %in% 1:2,1,0)))
endes <- endes %>%
mutate(VIOL_SX = ifelse(violenciasexual1 == 0 & Violenciasexual2 == 0, 0, 1))
table(endes$VIOL_SX, useNA = "alw")##
## 0 1 <NA>
## 20860 461 16784
endes <- endes %>%
mutate(violenciaFS = ifelse(VIOL_FIS==1 | VIOL_SX==1,1,0))
table(endes$violenciaFS, useNA="alw")##
## 0 1 <NA>
## 19356 1965 16784
# Diseño muestral para la ponderación de valores
encuesta = svydesign(data=endes, id=~V001, strata=NULL,
weights=~V005)
# Función para generar un archivo excel con todas las desagregaciones en pestañas
generar_archivo_excel2 <- function(nombre_archivo, datos) {
workbook <- createWorkbook()
for (i in seq_along(datos)) {
addWorksheet(workbook, sheetName = paste("Datos", i-1, sep = ""))
writeData(workbook, sheet = paste("Datos", i-1, sep = ""), x = datos[[i]], colNames = TRUE)
}
saveWorkbook(workbook, nombre_archivo)
}# Calcular promedio, coeficiente de variación e intervalos de confianza
tabla0 <- svyby(~violenciaFS, ~unidasjoven, encuesta, svymean, deff=F, na.rm=T)
ic0 <- confint(tabla0)
cv0 <- matrix(cv(tabla0), nrow = length(cv(tabla0)), ncol = 1, dimnames = list(names(cv(tabla0)), "Coef. Var."))
datos0<-bind_cols(tabla0, cv0, ic0)
datos0tabla2 <- svyby(~violenciaFS, ~unidasjoven+area, encuesta, svymean, deff=F, na.rm=T)
ic2 <- confint(tabla2)
cv2 <- matrix(cv(tabla2), nrow = length(cv(tabla2)), ncol = 1, dimnames = list(names(cv(tabla2)), "Coef. Var."))
datos2<-bind_cols(tabla2, cv2, ic2)
datos2tabla3 <- svyby(~violenciaFS, ~unidasjoven+regnat, encuesta, svymean, deff=F, na.rm=T)
ic3 <- confint(tabla3)
cv3 <- matrix(cv(tabla3), nrow = length(cv(tabla3)), ncol = 1, dimnames = list(names(cv(tabla3)), "Coef. Var."))
datos3<-bind_cols(tabla3, cv3, ic3)
datos3tabla4 <- svyby(~violenciaFS, ~unidasjoven+regiones2, encuesta, svymean, deff=F, na.rm=T)
ic4 <- confint(tabla4)
cv4 <- matrix(cv(tabla4), nrow = length(cv(tabla4)), ncol = 1, dimnames = list(names(cv(tabla4)), "Coef. Var."))
datos4<-bind_cols(tabla4, cv4, ic4)
datos4tabla5 <- svyby(~violenciaFS, ~unidasjoven+pobreza3, encuesta, svymean, deff=F, na.rm=T)
ic5 <- confint(tabla5)
cv5 <- matrix(cv(tabla5), nrow = length(cv(tabla5)), ncol = 1, dimnames = list(names(cv(tabla5)), "Coef. Var."))
datos5<-bind_cols(tabla5, cv5, ic5)
datos5tabla6 <- svyby(~violenciaFS, ~unidasjoven+discapacidad2, encuesta, svymean, deff=F, na.rm=T)
ic6 <- confint(tabla6)
cv6 <- matrix(cv(tabla6), nrow = length(cv(tabla6)), ncol = 1, dimnames = list(names(cv(tabla6)), "Coef. Var."))
datos6<-bind_cols(tabla6, cv6, ic6)
datos6tabla7 <- svyby(~violenciaFS, ~unidasjoven+defiet2, encuesta, svymean, deff=F, na.rm=T)
ic7 <- confint(tabla7)
cv7 <- matrix(cv(tabla7), nrow = length(cv(tabla7)), ncol = 1, dimnames = list(names(cv(tabla7)), "Coef. Var."))
datos7<-bind_cols(tabla7, cv7, ic7)
datos7tabla8 <- svyby(~violenciaFS, ~unidasjoven+lengua, encuesta, svymean, deff=F, na.rm=T)
ic8 <- confint(tabla8)
cv8 <- matrix(cv(tabla8), nrow = length(cv(tabla8)), ncol = 1, dimnames = list(names(cv(tabla8)), "Coef. Var."))
datos8<-bind_cols(tabla8, cv8, ic8)
datos8generar_archivo_excel2("DINDES-12-DIPOV-02-PNJ.xlsx", list(datos0, datos2, datos3, datos4, datos5, datos6, datos7, datos8))