library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
datos2019 = read.csv("Datos_molec_2019-1.csv", stringsAsFactors = FALSE )
datos2024 = read.csv("Datos_molec_2024-1.csv", stringsAsFactors = FALSE )
dim(datos2019)
## [1] 1980 108
dim(datos2024)
## [1] 2016 108
variables2019 = colnames(datos2019)
variables2024 = colnames(datos2024)
variables_comunes = intersect(variables2019,variables2024)
variables_solo_2019 = setdiff(variables2019,variables2024)
variables_solo_2024 = setdiff(variables2024,variables2019)
variables_comunes
## [1] "cd" "periodo" "folio" "entidad" "control" "viv_sel"
## [7] "num_hog" "hog_mud" "p1" "p2" "p3_1" "p3_2"
## [13] "p3_3" "p3_4" "p3_5" "p4" "p5" "p5_6esp"
## [19] "p6_1" "p6_2" "p6_3" "p6_4" "p6_5" "p6_6"
## [25] "p6_6esp" "p7" "p7_3" "p8_1" "p8_2" "p9"
## [31] "p9_5esp" "p10" "p11" "p11_6esp" "p12_1" "p12_2"
## [37] "p12_3" "p12_4" "p12_5" "p12_6" "p12_7" "p12_8"
## [43] "p12_9" "p12_9esp" "p13" "p13_3" "p14_1" "p14_2"
## [49] "p15" "p15_5esp" "p16" "p17" "p17_6esp" "p18_1"
## [55] "p18_2" "p18_3" "p18_4" "p18_5" "p19" "p19_3"
## [61] "p20_1" "p20_2" "p21" "p21_5esp" "p22" "p23_1"
## [67] "p23_2" "p24" "p25" "p25_6esp" "p26" "p27"
## [73] "p28" "p28_7esp" "p29" "p30" "p31" "p32"
## [79] "p32_6esp" "p33_1" "p33_2" "p33_3" "p33_4" "p34_1"
## [85] "p34_2" "p34_3" "p34_3_1" "p34_4" "p34_4_1" "p35"
## [91] "p36_1" "p36_2" "p36_3" "p36_4" "factor" "h_lec"
## [97] "mat_lec" "perslec" "edad" "sexo" "anio" "nivel"
datos2019_comunes <- datos2019[, variables_comunes]
datos2024_comunes <- datos2024[, variables_comunes]
datos2019_comunes$anio <- 2019
datos2024_comunes$anio <- 2024
datos_combinados <- rbind(datos2019_comunes, datos2024_comunes)
summary(datos_combinados)
## cd periodo folio entidad
## Min. : 1.00 Min. :219.0 Length:3996 Min. : 1.00
## 1st Qu.: 2.00 1st Qu.:219.0 Class :character 1st Qu.: 9.00
## Median : 9.00 Median :224.0 Mode :character Median :15.00
## Mean :13.85 Mean :221.5 Mean :15.66
## 3rd Qu.:25.00 3rd Qu.:224.0 3rd Qu.:21.00
## Max. :43.00 Max. :224.0 Max. :32.00
##
## control viv_sel num_hog hog_mud p1
## Min. :40001 Min. :1.000 Min. :1 Min. :0.00000 Min. :1.000
## 1st Qu.:40130 1st Qu.:2.000 1st Qu.:1 1st Qu.:0.00000 1st Qu.:1.000
## Median :40198 Median :3.000 Median :1 Median :0.00000 Median :1.000
## Mean :40267 Mean :2.497 Mean :1 Mean :0.04079 Mean :1.027
## 3rd Qu.:40356 3rd Qu.:3.000 3rd Qu.:1 3rd Qu.:0.00000 3rd Qu.:1.000
## Max. :41420 Max. :4.000 Max. :1 Max. :2.00000 Max. :2.000
##
## p2 p3_1 p3_2 p3_3
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.378 Mean :1.541 Mean :1.679 Mean :1.691
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
##
## p3_4 p3_5 p4 p5
## Min. :0.000 Min. :0.000 Min. : 0.000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.: 0.000 1st Qu.:0.000
## Median :2.000 Median :2.000 Median : 0.000 Median :0.000
## Mean :1.905 Mean :1.585 Mean : 1.305 Mean :1.359
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.: 2.000 3rd Qu.:3.000
## Max. :2.000 Max. :2.000 Max. :70.000 Max. :6.000
##
## p5_6esp p6_1 p6_2 p6_3
## Length:3996 Min. :0.0000 Min. :0.0000 Min. :0.0000
## Class :character 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Mode :character Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.7785 Mean :0.6874 Mean :0.6869
## 3rd Qu.:2.0000 3rd Qu.:2.0000 3rd Qu.:2.0000
## Max. :2.0000 Max. :2.0000 Max. :2.0000
##
## p6_4 p6_5 p6_6 p6_6esp
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Length:3996
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 Class :character
## Median :0.0000 Median :0.0000 Median :0.0000 Mode :character
## Mean :0.6461 Mean :0.7137 Mean :0.8091
## 3rd Qu.:1.0000 3rd Qu.:2.0000 3rd Qu.:2.0000
## Max. :2.0000 Max. :2.0000 Max. :2.0000
##
## p7 p7_3 p8_1 p8_2
## Min. :0.0000 Min. : 0 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 0 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median : 0 Median :0.0000 Median :0.0000
## Mean :0.8974 Mean : 1669 Mean :0.7297 Mean :0.4667
## 3rd Qu.:2.0000 3rd Qu.: 0 3rd Qu.:2.0000 3rd Qu.:1.0000
## Max. :3.0000 Max. :999999 Max. :2.0000 Max. :2.0000
##
## p9 p9_5esp p10 p11
## Min. :0.0000 Length:3996 Min. : 0.0000 Min. :0.0000
## 1st Qu.:0.0000 Class :character 1st Qu.: 0.0000 1st Qu.:0.0000
## Median :0.0000 Mode :character Median : 0.0000 Median :0.0000
## Mean :0.4687 Mean : 0.9955 Mean :0.9422
## 3rd Qu.:1.0000 3rd Qu.: 1.0000 3rd Qu.:1.0000
## Max. :5.0000 Max. :99.0000 Max. :6.0000
##
## p11_6esp p12_1 p12_2 p12_3
## Length:3996 Min. :0.0000 Min. :0.000 Min. :0.000
## Class :character 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.000
## Mode :character Median :0.0000 Median :0.000 Median :0.000
## Mean :0.5075 Mean :0.488 Mean :0.482
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :2.0000 Max. :2.000 Max. :2.000
##
## p12_4 p12_5 p12_6 p12_7
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4747 Mean :0.5015 Mean :0.4755 Mean :0.4722
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :2.0000 Max. :2.0000 Max. :2.0000 Max. :2.0000
##
## p12_8 p12_9 p12_9esp p13
## Min. :0.0000 Min. :0.000 Length:3996 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 Class :character 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Mode :character Median :0.0000
## Mean :0.4542 Mean :0.533 Mean :0.5911
## 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :2.0000 Max. :2.000 Max. :3.0000
##
## p13_3 p14_1 p14_2 p15
## Min. : 0.0 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 0.0 Median :0.0000 Median :0.0000 Median :0.0000
## Mean : 264.6 Mean :0.4947 Mean :0.3006 Mean :0.3386
## 3rd Qu.: 0.0 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :999999.0 Max. :2.0000 Max. :2.0000 Max. :5.0000
##
## p15_5esp p16 p17 p17_6esp
## Length:3996 Min. : 0.0000 Min. :0.0000 Length:3996
## Class :character 1st Qu.: 0.0000 1st Qu.:0.0000 Class :character
## Mode :character Median : 0.0000 Median :0.0000 Mode :character
## Mean : 0.8311 Mean :0.8421
## 3rd Qu.: 1.0000 3rd Qu.:1.0000
## Max. :60.0000 Max. :6.0000
##
## p18_1 p18_2 p18_3 p18_4
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.3206 Mean :0.3471 Mean :0.3476 Mean :0.3774
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :2.0000 Max. :2.0000 Max. :2.0000 Max. :2.0000
##
## p18_5 p19 p19_3 p20_1
## Min. :0.0000 Min. :0.0000 Min. : 0.0 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 0.0 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median : 0.0 Median :0.000
## Mean :0.3073 Mean :0.6021 Mean : 756.5 Mean :0.478
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 0.0 3rd Qu.:1.000
## Max. :2.0000 Max. :3.0000 Max. :999999.0 Max. :2.000
##
## p20_2 p21 p21_5esp p22
## Min. :0.0000 Min. :0.0000 Length:3996 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 Class :character 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Mode :character Median :0.0000
## Mean :0.2848 Mean :0.3634 Mean :0.1409
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :2.0000 Max. :5.0000 Max. :5.0000
##
## p23_1 p23_2 p24 p25
## Min. :0.00000 Min. :0.00000 Min. :0.000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.00000 Median :0.00000 Median :0.000 Median :0.000
## Mean :0.06456 Mean :0.05781 Mean :0.781 Mean :1.092
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.:3.000
## Max. :2.00000 Max. :2.00000 Max. :5.000 Max. :6.000
##
## p25_6esp p26 p27 p28
## Length:3996 Min. : 0.00 Min. :0.000 Min. :0.0000
## Class :character 1st Qu.: 0.00 1st Qu.:0.000 1st Qu.:0.0000
## Mode :character Median : 20.00 Median :2.000 Median :0.0000
## Mean : 27.16 Mean :1.276 Mean :0.5688
## 3rd Qu.: 35.00 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :360.00 Max. :2.000 Max. :7.0000
##
## p28_7esp p29 p30 p31
## Length:3996 Min. :0.000 Min. :0.000 Min. :0.000
## Class :character 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Mode :character Median :2.000 Median :3.000 Median :1.000
## Mean :1.713 Mean :2.052 Mean :1.095
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :4.000 Max. :4.000 Max. :2.000
##
## p32 p32_6esp p33_1 p33_2
## Min. :0.0000 Length:3996 Min. :0.00 Min. :0.000
## 1st Qu.:0.0000 Class :character 1st Qu.:2.00 1st Qu.:2.000
## Median :0.0000 Mode :character Median :2.00 Median :2.000
## Mean :0.7645 Mean :1.75 Mean :1.835
## 3rd Qu.:1.0000 3rd Qu.:2.00 3rd Qu.:2.000
## Max. :6.0000 Max. :2.00 Max. :2.000
##
## p33_3 p33_4 p34_1 p34_2 p34_3
## Min. :0.000 Min. :0.000 Min. :0.00 Min. :0.000 Min. :0.00
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:1.00
## Median :2.000 Median :2.000 Median :2.00 Median :1.000 Median :2.00
## Mean :1.871 Mean :1.796 Mean :1.69 Mean :1.473 Mean :1.64
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:2.00
## Max. :2.000 Max. :2.000 Max. :3.00 Max. :3.000 Max. :3.00
##
## p34_3_1 p34_4 p34_4_1 p35
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.000 Median :1.000 Median :1.0000 Median :1.0000
## Mean :0.538 Mean :1.421 Mean :0.9802 Mean :0.9845
## 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :3.000 Max. :3.000 Max. :6.0000 Max. :2.0000
##
## p36_1 p36_2 p36_3 p36_4
## Min. :0.00 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.00 Median :1.000 Median :1.000 Median :1.000
## Mean :1.39 Mean :1.197 Mean :1.313 Mean :1.409
## 3rd Qu.:2.00 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :3.00 Max. :3.000 Max. :3.000 Max. :3.000
##
## factor h_lec mat_lec perslec
## Min. : 2852 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.: 12890 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median : 17615 Median :1.000 Median :3.000 Median :1.000
## Mean : 20255 Mean :2.093 Mean :2.734 Mean :1.271
## 3rd Qu.: 25718 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:2.000
## Max. :124941 Max. :4.000 Max. :4.000 Max. :2.000
## NA's :60 NA's :60 NA's :60
## edad sexo anio nivel
## Min. :18.00 Min. :1.000 Min. :2019 Min. : 0.000
## 1st Qu.:32.00 1st Qu.:1.000 1st Qu.:2019 1st Qu.: 3.000
## Median :45.00 Median :2.000 Median :2024 Median : 4.000
## Mean :45.79 Mean :1.567 Mean :2022 Mean : 4.515
## 3rd Qu.:58.00 3rd Qu.:2.000 3rd Qu.:2024 3rd Qu.: 7.000
## Max. :95.00 Max. :2.000 Max. :2024 Max. :99.000
##
na_por_variable <- sapply(datos_combinados, function(x) sum(is.na(x)))
ceros_por_variable <- sapply(datos_combinados, function(x) {
if (is.numeric(x)) sum(x == 0, na.rm = TRUE) else NA
})
resumen <- data.frame(
Variable = names(datos_combinados),
NAs = na_por_variable,
Ceros = ceros_por_variable
)
resumen
## Variable NAs Ceros
## cd cd 0 0
## periodo periodo 0 0
## folio folio 0 NA
## entidad entidad 0 0
## control control 0 0
## viv_sel viv_sel 0 0
## num_hog num_hog 0 0
## hog_mud hog_mud 0 3842
## p1 p1 0 0
## p2 p2 0 107
## p3_1 p3_1 0 107
## p3_2 p3_2 0 107
## p3_3 p3_3 0 107
## p3_4 p3_4 0 107
## p3_5 p3_5 0 107
## p4 p4 0 2377
## p5 p5 0 2377
## p5_6esp p5_6esp 0 NA
## p6_1 p6_1 0 2377
## p6_2 p6_2 0 2377
## p6_3 p6_3 0 2377
## p6_4 p6_4 0 2377
## p6_5 p6_5 0 2377
## p6_6 p6_6 0 2377
## p6_6esp p6_6esp 0 NA
## p7 p7 0 2377
## p7_3 p7_3 0 3381
## p8_1 p8_1 0 2377
## p8_2 p8_2 0 2377
## p9 p9 0 2377
## p9_5esp p9_5esp 0 NA
## p10 p10 0 2927
## p11 p11 0 2927
## p11_6esp p11_6esp 0 NA
## p12_1 p12_1 0 2927
## p12_2 p12_2 0 2927
## p12_3 p12_3 0 2927
## p12_4 p12_4 0 2927
## p12_5 p12_5 0 2927
## p12_6 p12_6 0 2927
## p12_7 p12_7 0 2927
## p12_8 p12_8 0 2927
## p12_9 p12_9 0 2927
## p12_9esp p12_9esp 0 NA
## p13 p13 0 2927
## p13_3 p13_3 0 3633
## p14_1 p14_1 0 2927
## p14_2 p14_2 0 2927
## p15 p15 0 2927
## p15_5esp p15_5esp 0 NA
## p16 p16 0 2976
## p17 p17 0 2976
## p17_6esp p17_6esp 0 NA
## p18_1 p18_1 0 2976
## p18_2 p18_2 0 2976
## p18_3 p18_3 0 2976
## p18_4 p18_4 0 2976
## p18_5 p18_5 0 2976
## p19 p19 0 2976
## p19_3 p19_3 0 3516
## p20_1 p20_1 0 2976
## p20_2 p20_2 0 2976
## p21 p21 0 2976
## p21_5esp p21_5esp 0 NA
## p22 p22 0 3830
## p23_1 p23_1 0 3830
## p23_2 p23_2 0 3830
## p24 p24 0 2552
## p25 p25 0 2552
## p25_6esp p25_6esp 0 NA
## p26 p26 0 1222
## p27 p27 0 1222
## p28 p28 0 3545
## p28_7esp p28_7esp 0 NA
## p29 p29 0 1222
## p30 p30 0 1222
## p31 p31 0 1222
## p32 p32 0 2881
## p32_6esp p32_6esp 0 NA
## p33_1 p33_1 0 107
## p33_2 p33_2 0 107
## p33_3 p33_3 0 107
## p33_4 p33_4 0 107
## p34_1 p34_1 0 107
## p34_2 p34_2 0 107
## p34_3 p34_3 0 107
## p34_3_1 p34_3_1 0 2709
## p34_4 p34_4 0 107
## p34_4_1 p34_4_1 0 1777
## p35 p35 0 107
## p36_1 p36_1 0 152
## p36_2 p36_2 0 152
## p36_3 p36_3 0 152
## p36_4 p36_4 0 152
## factor factor 0 0
## h_lec h_lec 60 47
## mat_lec mat_lec 60 47
## perslec perslec 60 47
## edad edad 0 0
## sexo sexo 0 0
## anio anio 0 0
## nivel nivel 0 102
Nota: Se omitieron las variables p34_3_1 y p34_4_1 por tener una gran cantidad de ceros.
variables_seleccionadas <- c(
"folio", "entidad", "p1", "p2", "p5", "p11", "p17",
"p34_1", "p34_2", "p34_3", "p34_4",
"p35", "p36_1", "p36_2", "p36_3", "p36_4"
)
datos_equipo7 <- datos_combinados[, variables_seleccionadas]
write.csv(datos_equipo7, "DatosEquipo7", row.names=FALSE)