library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
datos2019 = read.csv("Datos_molec_2019-1.csv", stringsAsFactors = FALSE )
datos2024 = read.csv("Datos_molec_2024-1.csv", stringsAsFactors = FALSE )

dim(datos2019)
## [1] 1980  108
dim(datos2024)
## [1] 2016  108
variables2019 = colnames(datos2019)
variables2024 = colnames(datos2024)
variables_comunes = intersect(variables2019,variables2024)

variables_solo_2019 = setdiff(variables2019,variables2024)
variables_solo_2024 = setdiff(variables2024,variables2019)
variables_comunes 
##   [1] "cd"       "periodo"  "folio"    "entidad"  "control"  "viv_sel" 
##   [7] "num_hog"  "hog_mud"  "p1"       "p2"       "p3_1"     "p3_2"    
##  [13] "p3_3"     "p3_4"     "p3_5"     "p4"       "p5"       "p5_6esp" 
##  [19] "p6_1"     "p6_2"     "p6_3"     "p6_4"     "p6_5"     "p6_6"    
##  [25] "p6_6esp"  "p7"       "p7_3"     "p8_1"     "p8_2"     "p9"      
##  [31] "p9_5esp"  "p10"      "p11"      "p11_6esp" "p12_1"    "p12_2"   
##  [37] "p12_3"    "p12_4"    "p12_5"    "p12_6"    "p12_7"    "p12_8"   
##  [43] "p12_9"    "p12_9esp" "p13"      "p13_3"    "p14_1"    "p14_2"   
##  [49] "p15"      "p15_5esp" "p16"      "p17"      "p17_6esp" "p18_1"   
##  [55] "p18_2"    "p18_3"    "p18_4"    "p18_5"    "p19"      "p19_3"   
##  [61] "p20_1"    "p20_2"    "p21"      "p21_5esp" "p22"      "p23_1"   
##  [67] "p23_2"    "p24"      "p25"      "p25_6esp" "p26"      "p27"     
##  [73] "p28"      "p28_7esp" "p29"      "p30"      "p31"      "p32"     
##  [79] "p32_6esp" "p33_1"    "p33_2"    "p33_3"    "p33_4"    "p34_1"   
##  [85] "p34_2"    "p34_3"    "p34_3_1"  "p34_4"    "p34_4_1"  "p35"     
##  [91] "p36_1"    "p36_2"    "p36_3"    "p36_4"    "factor"   "h_lec"   
##  [97] "mat_lec"  "perslec"  "edad"     "sexo"     "anio"     "nivel"
datos2019_comunes <- datos2019[, variables_comunes]
datos2024_comunes <- datos2024[, variables_comunes]


datos2019_comunes$anio <- 2019
datos2024_comunes$anio <- 2024


datos_combinados <- rbind(datos2019_comunes, datos2024_comunes)
summary(datos_combinados)
##        cd           periodo         folio              entidad     
##  Min.   : 1.00   Min.   :219.0   Length:3996        Min.   : 1.00  
##  1st Qu.: 2.00   1st Qu.:219.0   Class :character   1st Qu.: 9.00  
##  Median : 9.00   Median :224.0   Mode  :character   Median :15.00  
##  Mean   :13.85   Mean   :221.5                      Mean   :15.66  
##  3rd Qu.:25.00   3rd Qu.:224.0                      3rd Qu.:21.00  
##  Max.   :43.00   Max.   :224.0                      Max.   :32.00  
##                                                                    
##     control         viv_sel         num_hog     hog_mud              p1       
##  Min.   :40001   Min.   :1.000   Min.   :1   Min.   :0.00000   Min.   :1.000  
##  1st Qu.:40130   1st Qu.:2.000   1st Qu.:1   1st Qu.:0.00000   1st Qu.:1.000  
##  Median :40198   Median :3.000   Median :1   Median :0.00000   Median :1.000  
##  Mean   :40267   Mean   :2.497   Mean   :1   Mean   :0.04079   Mean   :1.027  
##  3rd Qu.:40356   3rd Qu.:3.000   3rd Qu.:1   3rd Qu.:0.00000   3rd Qu.:1.000  
##  Max.   :41420   Max.   :4.000   Max.   :1   Max.   :2.00000   Max.   :2.000  
##                                                                               
##        p2             p3_1            p3_2            p3_3      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :1.000   Median :2.000   Median :2.000   Median :2.000  
##  Mean   :1.378   Mean   :1.541   Mean   :1.679   Mean   :1.691  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :2.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##                                                                 
##       p3_4            p3_5             p4               p5       
##  Min.   :0.000   Min.   :0.000   Min.   : 0.000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.: 0.000   1st Qu.:0.000  
##  Median :2.000   Median :2.000   Median : 0.000   Median :0.000  
##  Mean   :1.905   Mean   :1.585   Mean   : 1.305   Mean   :1.359  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.: 2.000   3rd Qu.:3.000  
##  Max.   :2.000   Max.   :2.000   Max.   :70.000   Max.   :6.000  
##                                                                  
##    p5_6esp               p6_1             p6_2             p6_3       
##  Length:3996        Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  Class :character   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Mode  :character   Median :0.0000   Median :0.0000   Median :0.0000  
##                     Mean   :0.7785   Mean   :0.6874   Mean   :0.6869  
##                     3rd Qu.:2.0000   3rd Qu.:2.0000   3rd Qu.:2.0000  
##                     Max.   :2.0000   Max.   :2.0000   Max.   :2.0000  
##                                                                       
##       p6_4             p6_5             p6_6          p6_6esp         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Length:3996       
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   Class :character  
##  Median :0.0000   Median :0.0000   Median :0.0000   Mode  :character  
##  Mean   :0.6461   Mean   :0.7137   Mean   :0.8091                     
##  3rd Qu.:1.0000   3rd Qu.:2.0000   3rd Qu.:2.0000                     
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000                     
##                                                                       
##        p7              p7_3             p8_1             p8_2       
##  Min.   :0.0000   Min.   :     0   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:     0   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :     0   Median :0.0000   Median :0.0000  
##  Mean   :0.8974   Mean   :  1669   Mean   :0.7297   Mean   :0.4667  
##  3rd Qu.:2.0000   3rd Qu.:     0   3rd Qu.:2.0000   3rd Qu.:1.0000  
##  Max.   :3.0000   Max.   :999999   Max.   :2.0000   Max.   :2.0000  
##                                                                     
##        p9           p9_5esp               p10               p11        
##  Min.   :0.0000   Length:3996        Min.   : 0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   Class :character   1st Qu.: 0.0000   1st Qu.:0.0000  
##  Median :0.0000   Mode  :character   Median : 0.0000   Median :0.0000  
##  Mean   :0.4687                      Mean   : 0.9955   Mean   :0.9422  
##  3rd Qu.:1.0000                      3rd Qu.: 1.0000   3rd Qu.:1.0000  
##  Max.   :5.0000                      Max.   :99.0000   Max.   :6.0000  
##                                                                        
##    p11_6esp             p12_1            p12_2           p12_3      
##  Length:3996        Min.   :0.0000   Min.   :0.000   Min.   :0.000  
##  Class :character   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.000  
##  Mode  :character   Median :0.0000   Median :0.000   Median :0.000  
##                     Mean   :0.5075   Mean   :0.488   Mean   :0.482  
##                     3rd Qu.:1.0000   3rd Qu.:1.000   3rd Qu.:1.000  
##                     Max.   :2.0000   Max.   :2.000   Max.   :2.000  
##                                                                     
##      p12_4            p12_5            p12_6            p12_7       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.4747   Mean   :0.5015   Mean   :0.4755   Mean   :0.4722  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000   Max.   :2.0000  
##                                                                     
##      p12_8            p12_9         p12_9esp              p13        
##  Min.   :0.0000   Min.   :0.000   Length:3996        Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   Class :character   1st Qu.:0.0000  
##  Median :0.0000   Median :0.000   Mode  :character   Median :0.0000  
##  Mean   :0.4542   Mean   :0.533                      Mean   :0.5911  
##  3rd Qu.:1.0000   3rd Qu.:2.000                      3rd Qu.:1.0000  
##  Max.   :2.0000   Max.   :2.000                      Max.   :3.0000  
##                                                                      
##      p13_3              p14_1            p14_2             p15        
##  Min.   :     0.0   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:     0.0   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :     0.0   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :   264.6   Mean   :0.4947   Mean   :0.3006   Mean   :0.3386  
##  3rd Qu.:     0.0   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :999999.0   Max.   :2.0000   Max.   :2.0000   Max.   :5.0000  
##                                                                       
##    p15_5esp              p16               p17           p17_6esp        
##  Length:3996        Min.   : 0.0000   Min.   :0.0000   Length:3996       
##  Class :character   1st Qu.: 0.0000   1st Qu.:0.0000   Class :character  
##  Mode  :character   Median : 0.0000   Median :0.0000   Mode  :character  
##                     Mean   : 0.8311   Mean   :0.8421                     
##                     3rd Qu.: 1.0000   3rd Qu.:1.0000                     
##                     Max.   :60.0000   Max.   :6.0000                     
##                                                                          
##      p18_1            p18_2            p18_3            p18_4       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.3206   Mean   :0.3471   Mean   :0.3476   Mean   :0.3774  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000   Max.   :2.0000  
##                                                                     
##      p18_5             p19             p19_3              p20_1      
##  Min.   :0.0000   Min.   :0.0000   Min.   :     0.0   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:     0.0   1st Qu.:0.000  
##  Median :0.0000   Median :0.0000   Median :     0.0   Median :0.000  
##  Mean   :0.3073   Mean   :0.6021   Mean   :   756.5   Mean   :0.478  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:     0.0   3rd Qu.:1.000  
##  Max.   :2.0000   Max.   :3.0000   Max.   :999999.0   Max.   :2.000  
##                                                                      
##      p20_2             p21           p21_5esp              p22        
##  Min.   :0.0000   Min.   :0.0000   Length:3996        Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   Class :character   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Mode  :character   Median :0.0000  
##  Mean   :0.2848   Mean   :0.3634                      Mean   :0.1409  
##  3rd Qu.:1.0000   3rd Qu.:1.0000                      3rd Qu.:0.0000  
##  Max.   :2.0000   Max.   :5.0000                      Max.   :5.0000  
##                                                                       
##      p23_1             p23_2              p24             p25       
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.00000   Median :0.00000   Median :0.000   Median :0.000  
##  Mean   :0.06456   Mean   :0.05781   Mean   :0.781   Mean   :1.092  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.:3.000  
##  Max.   :2.00000   Max.   :2.00000   Max.   :5.000   Max.   :6.000  
##                                                                     
##    p25_6esp              p26              p27             p28        
##  Length:3996        Min.   :  0.00   Min.   :0.000   Min.   :0.0000  
##  Class :character   1st Qu.:  0.00   1st Qu.:0.000   1st Qu.:0.0000  
##  Mode  :character   Median : 20.00   Median :2.000   Median :0.0000  
##                     Mean   : 27.16   Mean   :1.276   Mean   :0.5688  
##                     3rd Qu.: 35.00   3rd Qu.:2.000   3rd Qu.:0.0000  
##                     Max.   :360.00   Max.   :2.000   Max.   :7.0000  
##                                                                      
##    p28_7esp              p29             p30             p31       
##  Length:3996        Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  Class :character   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Mode  :character   Median :2.000   Median :3.000   Median :1.000  
##                     Mean   :1.713   Mean   :2.052   Mean   :1.095  
##                     3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:2.000  
##                     Max.   :4.000   Max.   :4.000   Max.   :2.000  
##                                                                    
##       p32           p32_6esp             p33_1          p33_2      
##  Min.   :0.0000   Length:3996        Min.   :0.00   Min.   :0.000  
##  1st Qu.:0.0000   Class :character   1st Qu.:2.00   1st Qu.:2.000  
##  Median :0.0000   Mode  :character   Median :2.00   Median :2.000  
##  Mean   :0.7645                      Mean   :1.75   Mean   :1.835  
##  3rd Qu.:1.0000                      3rd Qu.:2.00   3rd Qu.:2.000  
##  Max.   :6.0000                      Max.   :2.00   Max.   :2.000  
##                                                                    
##      p33_3           p33_4           p34_1          p34_2           p34_3     
##  Min.   :0.000   Min.   :0.000   Min.   :0.00   Min.   :0.000   Min.   :0.00  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:1.00   1st Qu.:1.000   1st Qu.:1.00  
##  Median :2.000   Median :2.000   Median :2.00   Median :1.000   Median :2.00  
##  Mean   :1.871   Mean   :1.796   Mean   :1.69   Mean   :1.473   Mean   :1.64  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.00   3rd Qu.:2.000   3rd Qu.:2.00  
##  Max.   :2.000   Max.   :2.000   Max.   :3.00   Max.   :3.000   Max.   :3.00  
##                                                                               
##     p34_3_1          p34_4          p34_4_1            p35        
##  Min.   :0.000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:1.0000  
##  Median :0.000   Median :1.000   Median :1.0000   Median :1.0000  
##  Mean   :0.538   Mean   :1.421   Mean   :0.9802   Mean   :0.9845  
##  3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :3.000   Max.   :3.000   Max.   :6.0000   Max.   :2.0000  
##                                                                   
##      p36_1          p36_2           p36_3           p36_4      
##  Min.   :0.00   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:1.00   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :1.00   Median :1.000   Median :1.000   Median :1.000  
##  Mean   :1.39   Mean   :1.197   Mean   :1.313   Mean   :1.409  
##  3rd Qu.:2.00   3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :3.00   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##                                                                
##      factor           h_lec          mat_lec         perslec     
##  Min.   :  2852   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 12890   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median : 17615   Median :1.000   Median :3.000   Median :1.000  
##  Mean   : 20255   Mean   :2.093   Mean   :2.734   Mean   :1.271  
##  3rd Qu.: 25718   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:2.000  
##  Max.   :124941   Max.   :4.000   Max.   :4.000   Max.   :2.000  
##                   NA's   :60      NA's   :60      NA's   :60     
##       edad            sexo            anio          nivel       
##  Min.   :18.00   Min.   :1.000   Min.   :2019   Min.   : 0.000  
##  1st Qu.:32.00   1st Qu.:1.000   1st Qu.:2019   1st Qu.: 3.000  
##  Median :45.00   Median :2.000   Median :2024   Median : 4.000  
##  Mean   :45.79   Mean   :1.567   Mean   :2022   Mean   : 4.515  
##  3rd Qu.:58.00   3rd Qu.:2.000   3rd Qu.:2024   3rd Qu.: 7.000  
##  Max.   :95.00   Max.   :2.000   Max.   :2024   Max.   :99.000  
## 
na_por_variable <- sapply(datos_combinados, function(x) sum(is.na(x)))


ceros_por_variable <- sapply(datos_combinados, function(x) {
  if (is.numeric(x)) sum(x == 0, na.rm = TRUE) else NA
})


resumen <- data.frame(
  Variable = names(datos_combinados),
  NAs = na_por_variable,
  Ceros = ceros_por_variable
)

resumen
##          Variable NAs Ceros
## cd             cd   0     0
## periodo   periodo   0     0
## folio       folio   0    NA
## entidad   entidad   0     0
## control   control   0     0
## viv_sel   viv_sel   0     0
## num_hog   num_hog   0     0
## hog_mud   hog_mud   0  3842
## p1             p1   0     0
## p2             p2   0   107
## p3_1         p3_1   0   107
## p3_2         p3_2   0   107
## p3_3         p3_3   0   107
## p3_4         p3_4   0   107
## p3_5         p3_5   0   107
## p4             p4   0  2377
## p5             p5   0  2377
## p5_6esp   p5_6esp   0    NA
## p6_1         p6_1   0  2377
## p6_2         p6_2   0  2377
## p6_3         p6_3   0  2377
## p6_4         p6_4   0  2377
## p6_5         p6_5   0  2377
## p6_6         p6_6   0  2377
## p6_6esp   p6_6esp   0    NA
## p7             p7   0  2377
## p7_3         p7_3   0  3381
## p8_1         p8_1   0  2377
## p8_2         p8_2   0  2377
## p9             p9   0  2377
## p9_5esp   p9_5esp   0    NA
## p10           p10   0  2927
## p11           p11   0  2927
## p11_6esp p11_6esp   0    NA
## p12_1       p12_1   0  2927
## p12_2       p12_2   0  2927
## p12_3       p12_3   0  2927
## p12_4       p12_4   0  2927
## p12_5       p12_5   0  2927
## p12_6       p12_6   0  2927
## p12_7       p12_7   0  2927
## p12_8       p12_8   0  2927
## p12_9       p12_9   0  2927
## p12_9esp p12_9esp   0    NA
## p13           p13   0  2927
## p13_3       p13_3   0  3633
## p14_1       p14_1   0  2927
## p14_2       p14_2   0  2927
## p15           p15   0  2927
## p15_5esp p15_5esp   0    NA
## p16           p16   0  2976
## p17           p17   0  2976
## p17_6esp p17_6esp   0    NA
## p18_1       p18_1   0  2976
## p18_2       p18_2   0  2976
## p18_3       p18_3   0  2976
## p18_4       p18_4   0  2976
## p18_5       p18_5   0  2976
## p19           p19   0  2976
## p19_3       p19_3   0  3516
## p20_1       p20_1   0  2976
## p20_2       p20_2   0  2976
## p21           p21   0  2976
## p21_5esp p21_5esp   0    NA
## p22           p22   0  3830
## p23_1       p23_1   0  3830
## p23_2       p23_2   0  3830
## p24           p24   0  2552
## p25           p25   0  2552
## p25_6esp p25_6esp   0    NA
## p26           p26   0  1222
## p27           p27   0  1222
## p28           p28   0  3545
## p28_7esp p28_7esp   0    NA
## p29           p29   0  1222
## p30           p30   0  1222
## p31           p31   0  1222
## p32           p32   0  2881
## p32_6esp p32_6esp   0    NA
## p33_1       p33_1   0   107
## p33_2       p33_2   0   107
## p33_3       p33_3   0   107
## p33_4       p33_4   0   107
## p34_1       p34_1   0   107
## p34_2       p34_2   0   107
## p34_3       p34_3   0   107
## p34_3_1   p34_3_1   0  2709
## p34_4       p34_4   0   107
## p34_4_1   p34_4_1   0  1777
## p35           p35   0   107
## p36_1       p36_1   0   152
## p36_2       p36_2   0   152
## p36_3       p36_3   0   152
## p36_4       p36_4   0   152
## factor     factor   0     0
## h_lec       h_lec  60    47
## mat_lec   mat_lec  60    47
## perslec   perslec  60    47
## edad         edad   0     0
## sexo         sexo   0     0
## anio         anio   0     0
## nivel       nivel   0   102

Nota: Se omitieron las variables p34_3_1 y p34_4_1 por tener una gran cantidad de ceros.

variables_seleccionadas <- c(
  "folio", "entidad", "p1", "p2", "p5", "p11", "p17",
  "p34_1", "p34_2", "p34_3", "p34_4",
  "p35", "p36_1", "p36_2", "p36_3", "p36_4"
)

datos_equipo7 <- datos_combinados[, variables_seleccionadas]

write.csv(datos_equipo7, "DatosEquipo7", row.names=FALSE)