AVANCE 1

Librerías

library(readr)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(purrr)
## Warning: package 'purrr' was built under R version 4.4.3
library(stringr)
library(janitor)
## Warning: package 'janitor' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ ggplot2   3.5.2     ✔ tidyr     1.3.1
## ✔ lubridate 1.9.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(forcats)
library(scales)
## Warning: package 'scales' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

Bases de datos

m20 <- read.csv("Datos_molec_2020-1.csv")
m22 <- read.csv("Datos_molec_2022-1.csv")
m24<- read.csv("Datos_molec_2024-1.csv")

Exploración de bases de datos

# Año 2020
str(m20)     
## 'data.frame':    2010 obs. of  108 variables:
##  $ folio   : chr  "11A192" "11A192" "11A192" "12A192" ...
##  $ entidad : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ control : int  40060 40060 40060 40091 40091 40131 40131 40131 40131 40132 ...
##  $ viv_sel : int  1 3 2 4 1 2 3 4 1 1 ...
##  $ num_hog : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hog_mud : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_ren_el: int  1 4 2 1 2 1 1 1 1 1 ...
##  $ cd      : int  14 14 14 14 14 14 14 14 14 14 ...
##  $ periodo : int  220 220 220 220 220 220 220 220 220 220 ...
##  $ sexo    : int  1 2 2 1 2 2 1 1 1 1 ...
##  $ edad    : int  48 41 55 55 60 48 67 40 43 68 ...
##  $ anio    : int  6 3 3 5 3 3 3 3 2 6 ...
##  $ nivel   : int  2 3 3 7 6 3 6 3 2 2 ...
##  $ cond_act: int  1 7 1 1 7 1 1 1 1 8 ...
##  $ p1      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ p2      : int  1 2 2 1 1 1 1 1 2 1 ...
##  $ p3_1    : int  1 2 2 1 1 2 2 2 2 1 ...
##  $ p3_2    : int  1 2 2 2 2 1 2 2 2 2 ...
##  $ p3_3    : int  1 2 2 1 2 2 1 1 2 2 ...
##  $ p3_4    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p3_5    : int  1 1 2 1 2 1 2 1 2 2 ...
##  $ p4      : int  1 0 0 2 2 0 0 0 0 1 ...
##  $ p5      : int  4 0 0 4 4 0 0 0 0 4 ...
##  $ p5_6esp : chr  "" "" "" "" ...
##  $ p6_1    : int  2 0 0 2 2 0 0 0 0 2 ...
##  $ p6_2    : int  2 0 0 2 2 0 0 0 0 2 ...
##  $ p6_3    : int  2 0 0 2 1 0 0 0 0 2 ...
##  $ p6_4    : int  1 0 0 1 2 0 0 0 0 1 ...
##  $ p6_5    : int  2 0 0 2 2 0 0 0 0 2 ...
##  $ p6_6    : int  2 0 0 2 2 0 0 0 0 2 ...
##  $ p6_6esp : chr  "" "" "" "" ...
##  $ p7      : int  2 0 0 2 2 0 0 0 0 2 ...
##  $ p7_3    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p8_1    : int  2 0 0 2 2 0 0 0 0 2 ...
##  $ p8_2    : int  1 0 0 1 1 0 0 0 0 1 ...
##  $ p9      : int  1 0 0 1 1 0 0 0 0 1 ...
##  $ p9_5esp : chr  "" "" "" "" ...
##  $ p10     : int  2 0 0 0 0 2 0 0 0 0 ...
##  $ p11     : int  4 0 0 0 0 4 0 0 0 0 ...
##  $ p11_6esp: chr  "" "" "" "" ...
##  $ p12_1   : int  2 0 0 0 0 2 0 0 0 0 ...
##  $ p12_2   : int  2 0 0 0 0 2 0 0 0 0 ...
##  $ p12_3   : int  2 0 0 0 0 2 0 0 0 0 ...
##  $ p12_4   : int  1 0 0 0 0 1 0 0 0 0 ...
##  $ p12_5   : int  2 0 0 0 0 2 0 0 0 0 ...
##  $ p12_6   : int  2 0 0 0 0 2 0 0 0 0 ...
##  $ p12_7   : int  1 0 0 0 0 2 0 0 0 0 ...
##  $ p12_8   : int  2 0 0 0 0 1 0 0 0 0 ...
##  $ p12_9   : int  2 0 0 0 0 2 0 0 0 0 ...
##  $ p12_9esp: chr  "" "" "" "" ...
##  $ p13     : int  2 0 0 0 0 2 0 0 0 0 ...
##  $ p13_3   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p14_1   : int  2 0 0 0 0 2 0 0 0 0 ...
##  $ p14_2   : int  1 0 0 0 0 1 0 0 0 0 ...
##  $ p15     : int  1 0 0 0 0 2 0 0 0 0 ...
##  $ p15_5esp: chr  "" "" "" "" ...
##  $ p16     : int  1 0 0 1 0 0 1 1 0 0 ...
##  $ p17     : int  4 0 0 4 0 0 3 4 0 0 ...
##  $ p17_6esp: chr  "" "" "" "" ...
##  $ p18_1   : int  2 0 0 1 0 0 1 2 0 0 ...
##  $ p18_2   : int  1 0 0 1 0 0 1 1 0 0 ...
##  $ p18_3   : int  2 0 0 1 0 0 2 2 0 0 ...
##  $ p18_4   : int  1 0 0 1 0 0 2 2 0 0 ...
##  $ p18_5   : int  1 0 0 1 0 0 1 1 0 0 ...
##  $ p19     : int  2 0 0 2 0 0 2 2 0 0 ...
##  $ p19_3   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p20_1   : int  2 0 0 2 0 0 2 2 0 0 ...
##  $ p20_2   : int  1 0 0 1 0 0 1 1 0 0 ...
##  $ p21     : int  2 0 0 2 0 0 2 2 0 0 ...
##  $ p21_5esp: chr  "" "" "" "" ...
##  $ p22     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p23_1   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p23_2   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p24     : int  5 5 0 2 0 3 0 1 0 0 ...
##  $ p25     : int  4 4 0 4 0 4 0 4 0 0 ...
##  $ p25_6esp: chr  "" "" "" "" ...
##  $ p26     : int  30 10 0 20 20 30 20 10 0 60 ...
##  $ p27     : int  2 2 0 2 2 2 1 2 0 2 ...
##  $ p28     : int  0 0 0 0 0 0 6 0 0 0 ...
##  $ p28_7esp: chr  "" "" "" "" ...
##  $ p29     : int  2 2 0 2 2 4 3 4 0 3 ...
##  $ p30     : int  3 3 0 3 3 4 3 4 0 3 ...
##  $ p31     : int  2 2 0 1 2 2 2 2 0 2 ...
##  $ p32     : int  0 0 3 0 0 0 0 0 3 0 ...
##  $ p32_6esp: chr  "" "" "" "" ...
##  $ p33_1   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p33_2   : int  2 2 2 2 2 2 1 2 2 2 ...
##  $ p33_3   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p33_4   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p34_1   : int  2 2 2 1 2 3 3 3 3 1 ...
##  $ p34_2   : int  2 2 2 1 2 3 3 1 3 1 ...
##  $ p34_3   : int  2 2 2 2 2 1 3 3 3 1 ...
##  $ p34_3_1 : int  0 0 0 0 0 2 0 0 0 1 ...
##  $ p34_4   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ p34_4_1 : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ p35     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ p36_1   : int  1 1 2 1 2 1 3 3 3 3 ...
##  $ p36_2   : int  1 1 1 1 2 1 3 3 3 3 ...
##  $ p36_3   : int  1 1 1 1 2 1 1 1 3 1 ...
##   [list output truncated]
names(m20)   
##   [1] "folio"    "entidad"  "control"  "viv_sel"  "num_hog"  "hog_mud" 
##   [7] "n_ren_el" "cd"       "periodo"  "sexo"     "edad"     "anio"    
##  [13] "nivel"    "cond_act" "p1"       "p2"       "p3_1"     "p3_2"    
##  [19] "p3_3"     "p3_4"     "p3_5"     "p4"       "p5"       "p5_6esp" 
##  [25] "p6_1"     "p6_2"     "p6_3"     "p6_4"     "p6_5"     "p6_6"    
##  [31] "p6_6esp"  "p7"       "p7_3"     "p8_1"     "p8_2"     "p9"      
##  [37] "p9_5esp"  "p10"      "p11"      "p11_6esp" "p12_1"    "p12_2"   
##  [43] "p12_3"    "p12_4"    "p12_5"    "p12_6"    "p12_7"    "p12_8"   
##  [49] "p12_9"    "p12_9esp" "p13"      "p13_3"    "p14_1"    "p14_2"   
##  [55] "p15"      "p15_5esp" "p16"      "p17"      "p17_6esp" "p18_1"   
##  [61] "p18_2"    "p18_3"    "p18_4"    "p18_5"    "p19"      "p19_3"   
##  [67] "p20_1"    "p20_2"    "p21"      "p21_5esp" "p22"      "p23_1"   
##  [73] "p23_2"    "p24"      "p25"      "p25_6esp" "p26"      "p27"     
##  [79] "p28"      "p28_7esp" "p29"      "p30"      "p31"      "p32"     
##  [85] "p32_6esp" "p33_1"    "p33_2"    "p33_3"    "p33_4"    "p34_1"   
##  [91] "p34_2"    "p34_3"    "p34_3_1"  "p34_4"    "p34_4_1"  "p35"     
##  [97] "p36_1"    "p36_2"    "p36_3"    "p36_4"    "factor"   "h_lec"   
## [103] "mat_lec"  "perslec"  "l_format" "r_format" "p_format" "perslecl"
head(m20)    
##    folio entidad control viv_sel num_hog hog_mud n_ren_el cd periodo sexo edad
## 1 11A192       1   40060       1       1       0        1 14     220    1   48
## 2 11A192       1   40060       3       1       0        4 14     220    2   41
## 3 11A192       1   40060       2       1       0        2 14     220    2   55
## 4 12A192       1   40091       4       1       0        1 14     220    1   55
## 5 12A192       1   40091       1       1       0        2 14     220    2   60
## 6 11B176       1   40131       2       1       0        1 14     220    2   48
##   anio nivel cond_act p1 p2 p3_1 p3_2 p3_3 p3_4 p3_5 p4 p5 p5_6esp p6_1 p6_2
## 1    6     2        1  1  1    1    1    1    2    1  1  4            2    2
## 2    3     3        7  1  2    2    2    2    2    1  0  0            0    0
## 3    3     3        1  1  2    2    2    2    2    2  0  0            0    0
## 4    5     7        1  1  1    1    2    1    2    1  2  4            2    2
## 5    3     6        7  1  1    1    2    2    2    2  2  4            2    2
## 6    3     3        1  1  1    2    1    2    2    1  0  0            0    0
##   p6_3 p6_4 p6_5 p6_6 p6_6esp p7 p7_3 p8_1 p8_2 p9 p9_5esp p10 p11 p11_6esp
## 1    2    1    2    2          2    0    2    1  1           2   4         
## 2    0    0    0    0          0    0    0    0  0           0   0         
## 3    0    0    0    0          0    0    0    0  0           0   0         
## 4    2    1    2    2          2    0    2    1  1           0   0         
## 5    1    2    2    2          2    0    2    1  1           0   0         
## 6    0    0    0    0          0    0    0    0  0           2   4         
##   p12_1 p12_2 p12_3 p12_4 p12_5 p12_6 p12_7 p12_8 p12_9 p12_9esp p13 p13_3
## 1     2     2     2     1     2     2     1     2     2            2     0
## 2     0     0     0     0     0     0     0     0     0            0     0
## 3     0     0     0     0     0     0     0     0     0            0     0
## 4     0     0     0     0     0     0     0     0     0            0     0
## 5     0     0     0     0     0     0     0     0     0            0     0
## 6     2     2     2     1     2     2     2     1     2            2     0
##   p14_1 p14_2 p15 p15_5esp p16 p17 p17_6esp p18_1 p18_2 p18_3 p18_4 p18_5 p19
## 1     2     1   1            1   4              2     1     2     1     1   2
## 2     0     0   0            0   0              0     0     0     0     0   0
## 3     0     0   0            0   0              0     0     0     0     0   0
## 4     0     0   0            1   4              1     1     1     1     1   2
## 5     0     0   0            0   0              0     0     0     0     0   0
## 6     2     1   2            0   0              0     0     0     0     0   0
##   p19_3 p20_1 p20_2 p21 p21_5esp p22 p23_1 p23_2 p24 p25 p25_6esp p26 p27 p28
## 1     0     2     1   2            0     0     0   5   4           30   2   0
## 2     0     0     0   0            0     0     0   5   4           10   2   0
## 3     0     0     0   0            0     0     0   0   0            0   0   0
## 4     0     2     1   2            0     0     0   2   4           20   2   0
## 5     0     0     0   0            0     0     0   0   0           20   2   0
## 6     0     0     0   0            0     0     0   3   4           30   2   0
##   p28_7esp p29 p30 p31 p32 p32_6esp p33_1 p33_2 p33_3 p33_4 p34_1 p34_2 p34_3
## 1            2   3   2   0              2     2     2     2     2     2     2
## 2            2   3   2   0              2     2     2     2     2     2     2
## 3            0   0   0   3              2     2     2     2     2     2     2
## 4            2   3   1   0              2     2     2     2     1     1     2
## 5            2   3   2   0              2     2     2     2     2     2     2
## 6            4   4   2   0              2     2     2     2     3     3     1
##   p34_3_1 p34_4 p34_4_1 p35 p36_1 p36_2 p36_3 p36_4 factor h_lec mat_lec
## 1       0     1       1   1     1     1     1     2  17463     1       2
## 2       0     1       1   1     1     1     1     1  37353     3       3
## 3       0     1       1   1     2     1     1     2  20668     4       4
## 4       0     1       1   1     1     1     1     1  30309     1       2
## 5       0     1       1   1     2     2     2     2  13886     1       1
## 6       2     1       1   1     1     1     1     1  33374     1       3
##   perslec l_format r_format p_format perslecl
## 1       1        2        2        2        1
## 2       1        0        0        0        2
## 3       2        0        0        0        2
## 4       1        2        0        2        1
## 5       1        2        0        0        1
## 6       1        0        2        0        1
#View(m20)    
dim(m20) 
## [1] 2010  108
summary(m20)
##     folio              entidad        control         viv_sel         num_hog 
##  Length:2010        Min.   : 1.0   Min.   :40025   Min.   :1.000   Min.   :1  
##  Class :character   1st Qu.: 9.0   1st Qu.:40153   1st Qu.:2.000   1st Qu.:1  
##  Mode  :character   Median :15.0   Median :40248   Median :3.000   Median :1  
##                     Mean   :15.6   Mean   :40307   Mean   :2.517   Mean   :1  
##                     3rd Qu.:20.0   3rd Qu.:40398   3rd Qu.:4.000   3rd Qu.:1  
##                     Max.   :32.0   Max.   :41420   Max.   :4.000   Max.   :1  
##                                                                               
##     hog_mud           n_ren_el            cd           periodo   
##  Min.   :0.00000   Min.   : 1.000   Min.   : 1.00   Min.   :220  
##  1st Qu.:0.00000   1st Qu.: 1.000   1st Qu.: 2.00   1st Qu.:220  
##  Median :0.00000   Median : 1.000   Median : 9.00   Median :220  
##  Mean   :0.04726   Mean   : 1.734   Mean   :14.01   Mean   :220  
##  3rd Qu.:0.00000   3rd Qu.: 2.000   3rd Qu.:25.00   3rd Qu.:220  
##  Max.   :2.00000   Max.   :10.000   Max.   :43.00   Max.   :220  
##                                                                  
##       sexo            edad            anio          nivel       
##  Min.   :1.000   Min.   :18.00   Min.   :1.00   Min.   : 0.000  
##  1st Qu.:1.000   1st Qu.:31.00   1st Qu.:3.00   1st Qu.: 3.000  
##  Median :2.000   Median :44.00   Median :3.00   Median : 4.000  
##  Mean   :1.552   Mean   :45.49   Mean   :3.47   Mean   : 4.418  
##  3rd Qu.:2.000   3rd Qu.:58.00   3rd Qu.:4.00   3rd Qu.: 7.000  
##  Max.   :2.000   Max.   :94.00   Max.   :6.00   Max.   :99.000  
##                                  NA's   :65                     
##     cond_act            p1              p2             p3_1      
##  Min.   : 1.000   Min.   :1.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median : 1.000   Median :1.000   Median :1.000   Median :2.000  
##  Mean   : 3.289   Mean   :1.026   Mean   :1.367   Mean   :1.545  
##  3rd Qu.: 7.000   3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :99.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##                                                                  
##       p3_2            p3_3            p3_4            p3_5      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :2.000   Median :2.000   Median :2.000   Median :2.000  
##  Mean   :1.638   Mean   :1.655   Mean   :1.904   Mean   :1.588  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :2.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##                                                                 
##        p4               p5         p5_6esp               p6_1       
##  Min.   : 0.000   Min.   :0.00   Length:2010        Min.   :0.0000  
##  1st Qu.: 0.000   1st Qu.:0.00   Class :character   1st Qu.:0.0000  
##  Median : 0.000   Median :0.00   Mode  :character   Median :0.0000  
##  Mean   : 1.428   Mean   :1.33                      Mean   :0.7781  
##  3rd Qu.: 2.000   3rd Qu.:3.00                      3rd Qu.:2.0000  
##  Max.   :60.000   Max.   :6.00                      Max.   :2.0000  
##                                                                     
##       p6_2             p6_3             p6_4             p6_5      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.000  
##  Mean   :0.6801   Mean   :0.6861   Mean   :0.6363   Mean   :0.696  
##  3rd Qu.:2.0000   3rd Qu.:2.0000   3rd Qu.:1.0000   3rd Qu.:2.000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000   Max.   :2.000  
##                                                                    
##       p6_6         p6_6esp                p7              p7_3       
##  Min.   :0.000   Length:2010        Min.   :0.0000   Min.   :     0  
##  1st Qu.:0.000   Class :character   1st Qu.:0.0000   1st Qu.:     0  
##  Median :0.000   Mode  :character   Median :0.0000   Median :     0  
##  Mean   :0.805                      Mean   :0.9224   Mean   :  1631  
##  3rd Qu.:2.000                      3rd Qu.:2.0000   3rd Qu.:     0  
##  Max.   :2.000                      Max.   :3.0000   Max.   :999999  
##                                                                      
##       p8_1             p8_2              p9           p9_5esp         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Length:2010       
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   Class :character  
##  Median :0.0000   Median :0.0000   Median :0.0000   Mode  :character  
##  Mean   :0.7473   Mean   :0.4468   Mean   :0.4905                     
##  3rd Qu.:2.0000   3rd Qu.:1.0000   3rd Qu.:1.0000                     
##  Max.   :2.0000   Max.   :2.0000   Max.   :5.0000                     
##                                                                       
##       p10             p11          p11_6esp             p12_1      
##  Min.   : 0.00   Min.   :0.000   Length:2010        Min.   :0.000  
##  1st Qu.: 0.00   1st Qu.:0.000   Class :character   1st Qu.:0.000  
##  Median : 0.00   Median :0.000   Mode  :character   Median :0.000  
##  Mean   : 1.12   Mean   :1.077                      Mean   :0.594  
##  3rd Qu.: 2.00   3rd Qu.:3.000                      3rd Qu.:2.000  
##  Max.   :90.00   Max.   :6.000                      Max.   :2.000  
##                                                                    
##      p12_2            p12_3            p12_4            p12_5      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.000  
##  Mean   :0.5582   Mean   :0.5697   Mean   :0.5348   Mean   :0.595  
##  3rd Qu.:1.0000   3rd Qu.:2.0000   3rd Qu.:1.0000   3rd Qu.:2.000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000   Max.   :2.000  
##                                                                    
##      p12_6            p12_7            p12_8            p12_9       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.5473   Mean   :0.5443   Mean   :0.5209   Mean   :0.6194  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:2.0000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000   Max.   :2.0000  
##                                                                     
##    p12_9esp              p13             p13_3             p14_1       
##  Length:2010        Min.   :0.0000   Min.   :   0.00   Min.   :0.0000  
##  Class :character   1st Qu.:0.0000   1st Qu.:   0.00   1st Qu.:0.0000  
##  Mode  :character   Median :0.0000   Median :   0.00   Median :0.0000  
##                     Mean   :0.7144   Mean   :  19.04   Mean   :0.5841  
##                     3rd Qu.:2.0000   3rd Qu.:   0.00   3rd Qu.:2.0000  
##                     Max.   :3.0000   Max.   :1200.00   Max.   :2.0000  
##                                                                        
##      p14_2             p15           p15_5esp              p16        
##  Min.   :0.0000   Min.   :0.0000   Length:2010        Min.   : 0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   Class :character   1st Qu.: 0.000  
##  Median :0.0000   Median :0.0000   Mode  :character   Median : 0.000  
##  Mean   :0.3428   Mean   :0.4005                      Mean   : 1.077  
##  3rd Qu.:1.0000   3rd Qu.:1.0000                      3rd Qu.: 1.000  
##  Max.   :2.0000   Max.   :5.0000                      Max.   :80.000  
##                                                                       
##       p17           p17_6esp             p18_1            p18_2      
##  Min.   :0.0000   Length:2010        Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   Class :character   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.0000   Mode  :character   Median :0.0000   Median :0.000  
##  Mean   :0.9662                      Mean   :0.3721   Mean   :0.392  
##  3rd Qu.:3.0000                      3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :6.0000                      Max.   :2.0000   Max.   :2.000  
##                                                                      
##      p18_3          p18_4            p18_5             p19        
##  Min.   :0.00   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.39   Mean   :0.4239   Mean   :0.3363   Mean   :0.7065  
##  3rd Qu.:1.00   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:2.0000  
##  Max.   :2.00   Max.   :2.0000   Max.   :2.0000   Max.   :3.0000  
##                                                                   
##      p19_3              p20_1            p20_2             p21        
##  Min.   :   0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:   0.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :   0.000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :   6.793   Mean   :0.5567   Mean   :0.3184   Mean   :0.4194  
##  3rd Qu.:   0.000   3rd Qu.:2.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1300.000   Max.   :2.0000   Max.   :2.0000   Max.   :5.0000  
##                                                                       
##    p21_5esp              p22             p23_1             p23_2        
##  Length:2010        Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  Class :character   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Mode  :character   Median :0.0000   Median :0.00000   Median :0.00000  
##                     Mean   :0.1433   Mean   :0.07413   Mean   :0.05672  
##                     3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##                     Max.   :5.0000   Max.   :2.00000   Max.   :2.00000  
##                                                                         
##       p24              p25          p25_6esp              p26        
##  Min.   :0.0000   Min.   :0.000   Length:2010        Min.   :  0.00  
##  1st Qu.:0.0000   1st Qu.:0.000   Class :character   1st Qu.:  0.00  
##  Median :0.0000   Median :0.000   Mode  :character   Median : 20.00  
##  Mean   :0.7697   Mean   :1.077                      Mean   : 28.25  
##  3rd Qu.:1.0000   3rd Qu.:3.000                      3rd Qu.: 40.00  
##  Max.   :5.0000   Max.   :6.000                      Max.   :480.00  
##                                                                      
##       p27             p28           p28_7esp              p29       
##  Min.   :0.000   Min.   :0.0000   Length:2010        Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.0000   Class :character   1st Qu.:0.000  
##  Median :2.000   Median :0.0000   Mode  :character   Median :2.000  
##  Mean   :1.286   Mean   :0.6209                      Mean   :1.748  
##  3rd Qu.:2.000   3rd Qu.:0.0000                      3rd Qu.:3.000  
##  Max.   :2.000   Max.   :7.0000                      Max.   :4.000  
##                                                                     
##       p30             p31             p32           p32_6esp        
##  Min.   :0.000   Min.   :0.000   Min.   :0.0000   Length:2010       
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.0000   Class :character  
##  Median :3.000   Median :1.000   Median :0.0000   Mode  :character  
##  Mean   :2.125   Mean   :1.096   Mean   :0.7299                     
##  3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:1.0000                     
##  Max.   :4.000   Max.   :2.000   Max.   :6.0000                     
##                                                                     
##      p33_1           p33_2           p33_3           p33_4      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :2.000   Median :2.000   Median :2.000  
##  Mean   :1.749   Mean   :1.819   Mean   :1.864   Mean   :1.779  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :2.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##                                                                 
##      p34_1           p34_2           p34_3          p34_3_1      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:0.0000  
##  Median :2.000   Median :1.000   Median :2.000   Median :0.0000  
##  Mean   :1.682   Mean   :1.456   Mean   :1.625   Mean   :0.5652  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:1.0000  
##  Max.   :3.000   Max.   :3.000   Max.   :3.000   Max.   :3.0000  
##                                                                  
##      p34_4          p34_4_1           p35             p36_1      
##  Min.   :0.000   Min.   :0.000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:0.000   1st Qu.:1.0000   1st Qu.:1.000  
##  Median :1.000   Median :1.000   Median :1.0000   Median :1.000  
##  Mean   :1.412   Mean   :1.021   Mean   :0.9891   Mean   :1.439  
##  3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.:2.000  
##  Max.   :3.000   Max.   :6.000   Max.   :2.0000   Max.   :3.000  
##                                                                  
##      p36_2          p36_3           p36_4           factor          h_lec      
##  Min.   :0.00   Min.   :0.000   Min.   :0.000   Min.   : 2976   Min.   :0.000  
##  1st Qu.:1.00   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:12684   1st Qu.:1.000  
##  Median :1.00   Median :1.000   Median :1.000   Median :17143   Median :1.000  
##  Mean   :1.21   Mean   :1.321   Mean   :1.402   Mean   :19353   Mean   :2.032  
##  3rd Qu.:1.00   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:24472   3rd Qu.:3.000  
##  Max.   :3.00   Max.   :3.000   Max.   :3.000   Max.   :84991   Max.   :4.000  
##                                                                                
##     mat_lec         perslec         l_format         r_format    
##  Min.   :0.000   Min.   :0.000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :3.000   Median :1.000   Median :0.0000   Median :0.000  
##  Mean   :2.712   Mean   :1.246   Mean   :0.7801   Mean   :0.593  
##  3rd Qu.:4.000   3rd Qu.:2.000   3rd Qu.:2.0000   3rd Qu.:2.000  
##  Max.   :4.000   Max.   :2.000   Max.   :3.0000   Max.   :3.000  
##                                                                  
##     p_format         perslecl    
##  Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:1.000  
##  Median :0.0000   Median :1.000  
##  Mean   :0.5677   Mean   :1.305  
##  3rd Qu.:2.0000   3rd Qu.:2.000  
##  Max.   :3.0000   Max.   :2.000  
## 
str(m22)     
## 'data.frame':    2016 obs. of  108 variables:
##  $ folio   : chr  "12A207" "12A207" "12A207" "12A207" ...
##  $ entidad : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ control : int  40007 40007 40007 40007 40048 40048 40048 40048 40085 40085 ...
##  $ viv_sel : int  4 2 1 3 1 2 3 4 4 1 ...
##  $ num_hog : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hog_mud : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_ren_el: int  1 2 1 2 6 2 1 1 1 2 ...
##  $ cd      : int  14 14 14 14 14 14 14 14 14 14 ...
##  $ periodo : int  222 222 222 222 222 222 222 222 222 222 ...
##  $ sexo    : int  1 2 1 2 2 2 1 1 1 2 ...
##  $ edad    : int  51 73 52 35 28 41 44 69 45 23 ...
##  $ nivel   : int  6 2 3 3 4 2 3 2 4 4 ...
##  $ anio    : int  3 6 3 3 3 6 3 6 3 3 ...
##  $ cond_act: int  1 7 1 1 7 7 1 1 1 7 ...
##  $ p1      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ p2      : int  2 2 2 2 2 1 2 2 1 1 ...
##  $ p3_1    : int  2 2 2 2 2 2 2 2 1 2 ...
##  $ p3_2    : int  2 2 2 2 2 2 2 2 1 2 ...
##  $ p3_3    : int  2 2 2 2 2 2 2 2 1 2 ...
##  $ p3_4    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p3_5    : int  1 2 1 1 1 2 2 2 1 1 ...
##  $ p4      : int  0 0 0 0 0 0 0 0 3 0 ...
##  $ p5      : int  0 0 0 0 0 0 0 0 4 0 ...
##  $ p5_6esp : chr  "" "" "" "" ...
##  $ p6_1    : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p6_2    : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p6_3    : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p6_4    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p6_5    : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p6_6    : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p6_6esp : chr  "" "" "" "" ...
##  $ p7      : int  0 0 0 0 0 0 0 0 3 0 ...
##  $ p7_3    : int  0 0 0 0 0 0 0 0 600 0 ...
##  $ p8_1    : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p8_2    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p9      : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p9_5esp : chr  "" "" "" "" ...
##  $ p10     : int  0 0 0 0 0 0 0 0 10 0 ...
##  $ p11     : int  0 0 0 0 0 0 0 0 4 0 ...
##  $ p11_6esp: chr  "" "" "" "" ...
##  $ p12_1   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p12_2   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_3   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_4   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_5   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_6   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_7   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_8   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p12_9   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_9esp: chr  "" "" "" "" ...
##  $ p13     : int  0 0 0 0 0 0 0 0 3 0 ...
##  $ p13_3   : int  0 0 0 0 0 0 0 0 150 0 ...
##  $ p14_1   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p14_2   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p15     : int  0 0 0 0 0 0 0 0 4 0 ...
##  $ p15_5esp: chr  "" "" "" "" ...
##  $ p16     : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p17     : int  0 0 0 0 0 0 0 0 4 0 ...
##  $ p17_6esp: chr  "" "" "" "" ...
##  $ p18_1   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p18_2   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p18_3   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p18_4   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p18_5   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p19     : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p19_3   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p20_1   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p20_2   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p21     : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p21_5esp: chr  "" "" "" "" ...
##  $ p22     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p23_1   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p23_2   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p24     : int  5 0 5 2 5 0 0 0 1 1 ...
##  $ p25     : int  4 0 4 2 2 0 0 0 4 4 ...
##  $ p25_6esp: chr  "" "" "" "" ...
##  $ p26     : int  20 0 15 20 20 0 0 0 20 60 ...
##  $ p27     : int  2 0 2 2 2 0 0 0 2 2 ...
##  $ p28     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p28_7esp: chr  "" "" "" "" ...
##  $ p29     : int  2 0 2 3 2 0 0 0 3 2 ...
##  $ p30     : int  3 0 3 3 3 0 0 0 4 3 ...
##  $ p31     : int  2 0 2 2 2 0 0 0 2 2 ...
##  $ p32     : int  0 1 0 0 0 1 3 5 0 0 ...
##  $ p32_6esp: chr  "" "" "" "" ...
##  $ p33_1   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p33_2   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p33_3   : int  2 2 2 2 2 2 2 2 1 2 ...
##  $ p33_4   : int  2 2 2 1 2 2 2 2 2 2 ...
##  $ p34_1   : int  2 2 2 1 2 2 2 2 1 1 ...
##  $ p34_2   : int  1 2 2 2 2 2 2 2 1 1 ...
##  $ p34_3   : int  1 2 2 2 2 2 2 2 1 1 ...
##  $ p34_3_1 : int  2 0 0 0 0 0 0 0 1 2 ...
##  $ p34_4   : int  1 1 1 2 1 1 1 2 1 1 ...
##  $ p34_4_1 : int  1 1 1 0 1 1 1 0 1 1 ...
##  $ p35     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ p36_1   : int  1 1 1 1 1 1 1 2 1 1 ...
##  $ p36_2   : int  1 1 1 1 1 1 1 2 1 1 ...
##  $ p36_3   : int  1 1 1 1 1 1 1 2 1 1 ...
##   [list output truncated]
names(m22)   
##   [1] "folio"    "entidad"  "control"  "viv_sel"  "num_hog"  "hog_mud" 
##   [7] "n_ren_el" "cd"       "periodo"  "sexo"     "edad"     "nivel"   
##  [13] "anio"     "cond_act" "p1"       "p2"       "p3_1"     "p3_2"    
##  [19] "p3_3"     "p3_4"     "p3_5"     "p4"       "p5"       "p5_6esp" 
##  [25] "p6_1"     "p6_2"     "p6_3"     "p6_4"     "p6_5"     "p6_6"    
##  [31] "p6_6esp"  "p7"       "p7_3"     "p8_1"     "p8_2"     "p9"      
##  [37] "p9_5esp"  "p10"      "p11"      "p11_6esp" "p12_1"    "p12_2"   
##  [43] "p12_3"    "p12_4"    "p12_5"    "p12_6"    "p12_7"    "p12_8"   
##  [49] "p12_9"    "p12_9esp" "p13"      "p13_3"    "p14_1"    "p14_2"   
##  [55] "p15"      "p15_5esp" "p16"      "p17"      "p17_6esp" "p18_1"   
##  [61] "p18_2"    "p18_3"    "p18_4"    "p18_5"    "p19"      "p19_3"   
##  [67] "p20_1"    "p20_2"    "p21"      "p21_5esp" "p22"      "p23_1"   
##  [73] "p23_2"    "p24"      "p25"      "p25_6esp" "p26"      "p27"     
##  [79] "p28"      "p28_7esp" "p29"      "p30"      "p31"      "p32"     
##  [85] "p32_6esp" "p33_1"    "p33_2"    "p33_3"    "p33_4"    "p34_1"   
##  [91] "p34_2"    "p34_3"    "p34_3_1"  "p34_4"    "p34_4_1"  "p35"     
##  [97] "p36_1"    "p36_2"    "p36_3"    "p36_4"    "factor"   "h_lec"   
## [103] "mat_lec"  "perslec"  "l_format" "r_format" "p_format" "perslecl"
head(m22)    
##    folio entidad control viv_sel num_hog hog_mud n_ren_el cd periodo sexo edad
## 1 12A207       1   40007       4       1       0        1 14     222    1   51
## 2 12A207       1   40007       2       1       0        2 14     222    2   73
## 3 12A207       1   40007       1       1       0        1 14     222    1   52
## 4 12A207       1   40007       3       1       0        2 14     222    2   35
## 5 11B193       1   40048       1       1       0        6 14     222    2   28
## 6 11B193       1   40048       2       1       0        2 14     222    2   41
##   nivel anio cond_act p1 p2 p3_1 p3_2 p3_3 p3_4 p3_5 p4 p5 p5_6esp p6_1 p6_2
## 1     6    3        1  1  2    2    2    2    2    1  0  0            0    0
## 2     2    6        7  1  2    2    2    2    2    2  0  0            0    0
## 3     3    3        1  1  2    2    2    2    2    1  0  0            0    0
## 4     3    3        1  1  2    2    2    2    2    1  0  0            0    0
## 5     4    3        7  1  2    2    2    2    2    1  0  0            0    0
## 6     2    6        7  1  1    2    2    2    2    2  0  0            0    0
##   p6_3 p6_4 p6_5 p6_6 p6_6esp p7 p7_3 p8_1 p8_2 p9 p9_5esp p10 p11 p11_6esp
## 1    0    0    0    0          0    0    0    0  0           0   0         
## 2    0    0    0    0          0    0    0    0  0           0   0         
## 3    0    0    0    0          0    0    0    0  0           0   0         
## 4    0    0    0    0          0    0    0    0  0           0   0         
## 5    0    0    0    0          0    0    0    0  0           0   0         
## 6    0    0    0    0          0    0    0    0  0           0   0         
##   p12_1 p12_2 p12_3 p12_4 p12_5 p12_6 p12_7 p12_8 p12_9 p12_9esp p13 p13_3
## 1     0     0     0     0     0     0     0     0     0            0     0
## 2     0     0     0     0     0     0     0     0     0            0     0
## 3     0     0     0     0     0     0     0     0     0            0     0
## 4     0     0     0     0     0     0     0     0     0            0     0
## 5     0     0     0     0     0     0     0     0     0            0     0
## 6     0     0     0     0     0     0     0     0     0            0     0
##   p14_1 p14_2 p15 p15_5esp p16 p17 p17_6esp p18_1 p18_2 p18_3 p18_4 p18_5 p19
## 1     0     0   0            0   0              0     0     0     0     0   0
## 2     0     0   0            0   0              0     0     0     0     0   0
## 3     0     0   0            0   0              0     0     0     0     0   0
## 4     0     0   0            0   0              0     0     0     0     0   0
## 5     0     0   0            0   0              0     0     0     0     0   0
## 6     0     0   0            0   0              0     0     0     0     0   0
##   p19_3 p20_1 p20_2 p21 p21_5esp p22 p23_1 p23_2 p24 p25 p25_6esp p26 p27 p28
## 1     0     0     0   0            0     0     0   5   4           20   2   0
## 2     0     0     0   0            0     0     0   0   0            0   0   0
## 3     0     0     0   0            0     0     0   5   4           15   2   0
## 4     0     0     0   0            0     0     0   2   2           20   2   0
## 5     0     0     0   0            0     0     0   5   2           20   2   0
## 6     0     0     0   0            0     0     0   0   0            0   0   0
##   p28_7esp p29 p30 p31 p32 p32_6esp p33_1 p33_2 p33_3 p33_4 p34_1 p34_2 p34_3
## 1            2   3   2   0              2     2     2     2     2     1     1
## 2            0   0   0   1              2     2     2     2     2     2     2
## 3            2   3   2   0              2     2     2     2     2     2     2
## 4            3   3   2   0              2     2     2     1     1     2     2
## 5            2   3   2   0              2     2     2     2     2     2     2
## 6            0   0   0   1              2     2     2     2     2     2     2
##   p34_3_1 p34_4 p34_4_1 p35 p36_1 p36_2 p36_3 p36_4 factor h_lec mat_lec
## 1       2     1       1   1     1     1     1     2  25403     3       3
## 2       0     1       1   1     1     1     1     2  11406     4       4
## 3       0     1       1   1     1     1     1     2  19052     3       3
## 4       0     2       0   1     1     1     1     1  20172     3       3
## 5       0     1       1   1     1     1     1     1  41647     3       3
## 6       0     1       1   1     1     1     1     2  14875     2       4
##   perslec l_format r_format p_format perslecl
## 1       1        0        0        0        2
## 2       2        0        0        0        2
## 3       1        0        0        0        2
## 4       1        0        0        0        2
## 5       1        0        0        0        2
## 6       2        0        0        0        2
#View(m22)    
dim(m22) 
## [1] 2016  108
summary(m22)
##     folio              entidad         control         viv_sel         num_hog 
##  Length:2016        Min.   : 1.00   Min.   :22251   Min.   :1.000   Min.   :1  
##  Class :character   1st Qu.: 9.00   1st Qu.:40087   1st Qu.:1.000   1st Qu.:1  
##  Mode  :character   Median :15.00   Median :40211   Median :2.000   Median :1  
##                     Mean   :15.65   Mean   :40227   Mean   :2.475   Mean   :1  
##                     3rd Qu.:20.00   3rd Qu.:40348   3rd Qu.:3.000   3rd Qu.:1  
##                     Max.   :32.00   Max.   :41398   Max.   :4.000   Max.   :1  
##                                                                                
##     hog_mud           n_ren_el            cd           periodo   
##  Min.   :0.00000   Min.   : 1.000   Min.   : 1.00   Min.   :222  
##  1st Qu.:0.00000   1st Qu.: 1.000   1st Qu.: 2.00   1st Qu.:222  
##  Median :0.00000   Median : 1.000   Median : 9.00   Median :222  
##  Mean   :0.04911   Mean   : 1.684   Mean   :13.65   Mean   :222  
##  3rd Qu.:0.00000   3rd Qu.: 2.000   3rd Qu.:25.00   3rd Qu.:222  
##  Max.   :3.00000   Max.   :10.000   Max.   :43.00   Max.   :222  
##                                                                  
##       sexo           edad           nivel             anio     
##  Min.   :1.00   Min.   :18.00   Min.   : 0.000   Min.   :1.00  
##  1st Qu.:1.00   1st Qu.:32.00   1st Qu.: 3.000   1st Qu.:3.00  
##  Median :2.00   Median :44.00   Median : 4.000   Median :3.00  
##  Mean   :1.56   Mean   :45.88   Mean   : 4.655   Mean   :3.47  
##  3rd Qu.:2.00   3rd Qu.:58.00   3rd Qu.: 7.000   3rd Qu.:4.00  
##  Max.   :2.00   Max.   :97.00   Max.   :99.000   Max.   :6.00  
##                                                  NA's   :39    
##     cond_act            p1              p2             p3_1      
##  Min.   : 1.000   Min.   :1.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median : 1.000   Median :1.000   Median :1.000   Median :2.000  
##  Mean   : 3.274   Mean   :1.018   Mean   :1.371   Mean   :1.547  
##  3rd Qu.: 7.000   3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :10.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##                                                                  
##       p3_2           p3_3            p3_4            p3_5      
##  Min.   :0.00   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:1.00   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :2.00   Median :2.000   Median :2.000   Median :2.000  
##  Mean   :1.72   Mean   :1.731   Mean   :1.909   Mean   :1.583  
##  3rd Qu.:2.00   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :2.00   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##                                                                
##        p4               p5          p5_6esp               p6_1       
##  Min.   : 0.000   Min.   :0.000   Length:2016        Min.   :0.0000  
##  1st Qu.: 0.000   1st Qu.:0.000   Class :character   1st Qu.:0.0000  
##  Median : 0.000   Median :0.000   Mode  :character   Median :0.0000  
##  Mean   : 1.627   Mean   :1.406                      Mean   :0.7996  
##  3rd Qu.: 2.000   3rd Qu.:3.000                      3rd Qu.:2.0000  
##  Max.   :99.000   Max.   :6.000                      Max.   :2.0000  
##                                                                      
##       p6_2             p6_3             p6_4             p6_5       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.7173   Mean   :0.7009   Mean   :0.6711   Mean   :0.7292  
##  3rd Qu.:2.0000   3rd Qu.:2.0000   3rd Qu.:2.0000   3rd Qu.:2.0000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000   Max.   :2.0000  
##                                                                     
##       p6_6          p6_6esp                p7              p7_3       
##  Min.   :0.0000   Length:2016        Min.   :0.0000   Min.   :     0  
##  1st Qu.:0.0000   Class :character   1st Qu.:0.0000   1st Qu.:     0  
##  Median :0.0000   Mode  :character   Median :0.0000   Median :     0  
##  Mean   :0.8323                      Mean   :0.9062   Mean   :  3699  
##  3rd Qu.:2.0000                      3rd Qu.:2.0000   3rd Qu.:     0  
##  Max.   :2.0000                      Max.   :3.0000   Max.   :999999  
##                                                                       
##       p8_1             p8_2              p9           p9_5esp         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Length:2016       
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   Class :character  
##  Median :0.0000   Median :0.0000   Median :0.0000   Mode  :character  
##  Mean   :0.7292   Mean   :0.4931   Mean   :0.4712                     
##  3rd Qu.:2.0000   3rd Qu.:1.0000   3rd Qu.:1.0000                     
##  Max.   :2.0000   Max.   :2.0000   Max.   :5.0000                     
##                                                                       
##       p10              p11           p11_6esp             p12_1       
##  Min.   : 0.000   Min.   :0.0000   Length:2016        Min.   :0.0000  
##  1st Qu.: 0.000   1st Qu.:0.0000   Class :character   1st Qu.:0.0000  
##  Median : 0.000   Median :0.0000   Mode  :character   Median :0.0000  
##  Mean   : 0.869   Mean   :0.8318                      Mean   :0.4628  
##  3rd Qu.: 0.000   3rd Qu.:0.0000                      3rd Qu.:0.0000  
##  Max.   :60.000   Max.   :6.0000                      Max.   :2.0000  
##                                                                       
##      p12_2            p12_3            p12_4            p12_5       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.4435   Mean   :0.4375   Mean   :0.4479   Mean   :0.4588  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000   Max.   :2.0000  
##                                                                     
##      p12_6            p12_7            p12_8            p12_9       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.4206   Mean   :0.4291   Mean   :0.4335   Mean   :0.4836  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000   Max.   :2.0000  
##                                                                     
##    p12_9esp              p13             p13_3            p14_1      
##  Length:2016        Min.   :0.0000   Min.   :     0   Min.   :0.000  
##  Class :character   1st Qu.:0.0000   1st Qu.:     0   1st Qu.:0.000  
##  Mode  :character   Median :0.0000   Median :     0   Median :0.000  
##                     Mean   :0.5263   Mean   :  1008   Mean   :0.433  
##                     3rd Qu.:0.0000   3rd Qu.:     0   3rd Qu.:0.000  
##                     Max.   :3.0000   Max.   :999999   Max.   :2.000  
##                                                                      
##      p14_2             p15           p15_5esp              p16         
##  Min.   :0.0000   Min.   :0.0000   Length:2016        Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   Class :character   1st Qu.: 0.0000  
##  Median :0.0000   Median :0.0000   Mode  :character   Median : 0.0000  
##  Mean   :0.2912   Mean   :0.2917                      Mean   : 0.7445  
##  3rd Qu.:0.0000   3rd Qu.:0.0000                      3rd Qu.: 0.0000  
##  Max.   :2.0000   Max.   :5.0000                      Max.   :30.0000  
##                                                                        
##       p17           p17_6esp             p18_1            p18_2       
##  Min.   :0.0000   Length:2016        Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   Class :character   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Mode  :character   Median :0.0000   Median :0.0000  
##  Mean   :0.7733                      Mean   :0.3085   Mean   :0.3175  
##  3rd Qu.:0.0000                      3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :6.0000                      Max.   :2.0000   Max.   :2.0000  
##                                                                       
##      p18_3            p18_4            p18_5             p19        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.3189   Mean   :0.3413   Mean   :0.2951   Mean   :0.5397  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000   Max.   :3.0000  
##                                                                     
##      p19_3              p20_1            p20_2             p21        
##  Min.   :     0.0   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:     0.0   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :     0.0   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :   998.2   Mean   :0.4206   Mean   :0.2753   Mean   :0.3105  
##  3rd Qu.:     0.0   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :999999.0   Max.   :2.0000   Max.   :2.0000   Max.   :5.0000  
##                                                                       
##    p21_5esp              p22             p23_1             p23_2        
##  Length:2016        Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  Class :character   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Mode  :character   Median :0.0000   Median :0.00000   Median :0.00000  
##                     Mean   :0.1969   Mean   :0.08185   Mean   :0.07391  
##                     3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##                     Max.   :5.0000   Max.   :2.00000   Max.   :2.00000  
##                                                                         
##       p24              p25          p25_6esp              p26        
##  Min.   :0.0000   Min.   :0.000   Length:2016        Min.   :  0.00  
##  1st Qu.:0.0000   1st Qu.:0.000   Class :character   1st Qu.:  0.00  
##  Median :0.0000   Median :0.000   Mode  :character   Median : 20.00  
##  Mean   :0.8408   Mean   :1.163                      Mean   : 28.45  
##  3rd Qu.:1.0000   3rd Qu.:3.000                      3rd Qu.: 40.00  
##  Max.   :5.0000   Max.   :6.000                      Max.   :360.00  
##                                                                      
##       p27             p28           p28_7esp              p29      
##  Min.   :0.000   Min.   :0.0000   Length:2016        Min.   :0.00  
##  1st Qu.:0.000   1st Qu.:0.0000   Class :character   1st Qu.:0.00  
##  Median :2.000   Median :0.0000   Mode  :character   Median :2.00  
##  Mean   :1.267   Mean   :0.6141                      Mean   :1.75  
##  3rd Qu.:2.000   3rd Qu.:0.0000                      3rd Qu.:3.00  
##  Max.   :2.000   Max.   :7.0000                      Max.   :4.00  
##                                                                    
##       p30             p31             p32          p32_6esp        
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Length:2016       
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   Class :character  
##  Median :3.000   Median :1.000   Median :0.000   Mode  :character  
##  Mean   :2.129   Mean   :1.077   Mean   :0.754                     
##  3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:1.000                     
##  Max.   :4.000   Max.   :2.000   Max.   :6.000                     
##                                                                    
##      p33_1           p33_2           p33_3           p33_4      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :2.000   Median :2.000   Median :2.000  
##  Mean   :1.813   Mean   :1.865   Mean   :1.931   Mean   :1.854  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :2.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##                                                                 
##      p34_1           p34_2         p34_3          p34_3_1           p34_4      
##  Min.   :0.000   Min.   :0.0   Min.   :0.000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.0   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:1.000  
##  Median :2.000   Median :1.0   Median :2.000   Median :0.0000   Median :1.000  
##  Mean   :1.693   Mean   :1.5   Mean   :1.645   Mean   :0.5938   Mean   :1.428  
##  3rd Qu.:2.000   3rd Qu.:2.0   3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.:2.000  
##  Max.   :3.000   Max.   :3.0   Max.   :3.000   Max.   :3.0000   Max.   :3.000  
##                                                                                
##     p34_4_1           p35             p36_1           p36_2      
##  Min.   :0.000   Min.   :0.0000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:1.0000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :1.000   Median :1.0000   Median :1.000   Median :1.000  
##  Mean   :1.098   Mean   :0.9896   Mean   :1.426   Mean   :1.243  
##  3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :6.000   Max.   :2.0000   Max.   :3.000   Max.   :3.000  
##                                                                  
##      p36_3           p36_4           factor           h_lec      
##  Min.   :0.000   Min.   :0.000   Min.   :  1481   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.: 12064   1st Qu.:1.000  
##  Median :1.000   Median :1.000   Median : 17342   Median :1.000  
##  Mean   :1.347   Mean   :1.439   Mean   : 19896   Mean   :2.049  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.: 24978   3rd Qu.:4.000  
##  Max.   :3.000   Max.   :3.000   Max.   :101135   Max.   :4.000  
##                                                                  
##     mat_lec         perslec         l_format         r_format     
##  Min.   :0.000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :3.000   Median :1.000   Median :0.0000   Median :0.0000  
##  Mean   :2.708   Mean   :1.271   Mean   :0.7847   Mean   :0.4459  
##  3rd Qu.:4.000   3rd Qu.:2.000   3rd Qu.:2.0000   3rd Qu.:0.0000  
##  Max.   :4.000   Max.   :2.000   Max.   :3.0000   Max.   :3.0000  
##                                                                   
##     p_format         perslecl    
##  Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:1.000  
##  Median :0.0000   Median :1.000  
##  Mean   :0.4246   Mean   :1.357  
##  3rd Qu.:0.0000   3rd Qu.:2.000  
##  Max.   :3.0000   Max.   :2.000  
## 
str(m24)     
## 'data.frame':    2016 obs. of  108 variables:
##  $ folio   : chr  "12B221" "12B221" "12B221" "12B221" ...
##  $ entidad : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ control : int  40002 40002 40002 40002 40003 40003 40003 40003 40028 40028 ...
##  $ viv_sel : int  2 1 3 4 1 2 4 3 2 3 ...
##  $ num_hog : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hog_mud : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ n_ren_el: int  1 1 1 2 1 1 1 2 1 1 ...
##  $ cd      : int  14 14 14 14 14 14 14 14 14 14 ...
##  $ periodo : int  224 224 224 224 224 224 224 224 224 224 ...
##  $ sexo    : int  1 1 1 1 1 1 2 2 1 2 ...
##  $ edad    : int  52 55 59 30 54 30 66 45 26 41 ...
##  $ nivel   : int  3 3 3 4 3 4 2 3 4 4 ...
##  $ anio    : int  3 3 3 3 3 3 6 3 3 3 ...
##  $ cond_act: int  9 1 1 1 1 1 7 7 1 1 ...
##  $ p1      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ p2      : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p3_1    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p3_2    : int  2 2 2 2 2 2 2 2 1 2 ...
##  $ p3_3    : int  2 2 2 2 1 2 2 2 1 2 ...
##  $ p3_4    : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p3_5    : int  2 2 2 2 2 1 2 2 1 1 ...
##  $ p4      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p5      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p5_6esp : chr  "" "" "" "" ...
##  $ p6_1    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p6_2    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p6_3    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p6_4    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p6_5    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p6_6    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p6_6esp : chr  "" "" "" "" ...
##  $ p7      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p7_3    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p8_1    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p8_2    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p9      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p9_5esp : chr  "" "" "" "" ...
##  $ p10     : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p11     : int  0 0 0 0 0 0 0 0 4 0 ...
##  $ p11_6esp: chr  "" "" "" "" ...
##  $ p12_1   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_2   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_3   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_4   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p12_5   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_6   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_7   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p12_8   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_9   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p12_9esp: chr  "" "" "" "" ...
##  $ p13     : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p13_3   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p14_1   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p14_2   : int  0 0 0 0 0 0 0 0 2 0 ...
##  $ p15     : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ p15_5esp: chr  "" "" "" "" ...
##  $ p16     : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ p17     : int  0 0 0 0 4 0 0 0 4 0 ...
##  $ p17_6esp: chr  "" "" "" "" ...
##  $ p18_1   : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ p18_2   : int  0 0 0 0 1 0 0 0 2 0 ...
##  $ p18_3   : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ p18_4   : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ p18_5   : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ p19     : int  0 0 0 0 2 0 0 0 2 0 ...
##  $ p19_3   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p20_1   : int  0 0 0 0 1 0 0 0 1 0 ...
##  $ p20_2   : int  0 0 0 0 2 0 0 0 2 0 ...
##  $ p21     : int  0 0 0 0 1 0 0 0 2 0 ...
##  $ p21_5esp: chr  "" "" "" "" ...
##  $ p22     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p23_1   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p23_2   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p24     : int  0 0 0 0 0 5 0 0 5 5 ...
##  $ p25     : int  0 0 0 0 0 4 0 0 1 4 ...
##  $ p25_6esp: chr  "" "" "" "" ...
##  $ p26     : int  0 0 0 0 20 15 0 0 20 10 ...
##  $ p27     : int  0 0 0 0 2 2 0 0 2 2 ...
##  $ p28     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ p28_7esp: chr  "" "" "" "" ...
##  $ p29     : int  0 0 0 0 2 2 0 0 2 2 ...
##  $ p30     : int  0 0 0 0 3 3 0 0 3 3 ...
##  $ p31     : int  0 0 0 0 2 2 0 0 2 2 ...
##  $ p32     : int  5 2 3 4 0 0 2 3 0 0 ...
##  $ p32_6esp: chr  "" "" "" "" ...
##  $ p33_1   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p33_2   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p33_3   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p33_4   : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ p34_1   : int  2 2 2 2 2 1 2 2 1 1 ...
##  $ p34_2   : int  1 2 1 2 2 1 1 2 1 1 ...
##  $ p34_3   : int  2 2 2 2 2 1 2 2 1 1 ...
##  $ p34_3_1 : int  0 0 0 0 0 1 0 0 2 1 ...
##  $ p34_4   : int  1 2 1 1 1 1 1 1 1 1 ...
##  $ p34_4_1 : int  1 0 1 1 1 1 1 1 1 1 ...
##  $ p35     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ p36_1   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ p36_2   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ p36_3   : int  1 1 1 1 1 1 1 1 1 1 ...
##   [list output truncated]
names(m24)   
##   [1] "folio"    "entidad"  "control"  "viv_sel"  "num_hog"  "hog_mud" 
##   [7] "n_ren_el" "cd"       "periodo"  "sexo"     "edad"     "nivel"   
##  [13] "anio"     "cond_act" "p1"       "p2"       "p3_1"     "p3_2"    
##  [19] "p3_3"     "p3_4"     "p3_5"     "p4"       "p5"       "p5_6esp" 
##  [25] "p6_1"     "p6_2"     "p6_3"     "p6_4"     "p6_5"     "p6_6"    
##  [31] "p6_6esp"  "p7"       "p7_3"     "p8_1"     "p8_2"     "p9"      
##  [37] "p9_5esp"  "p10"      "p11"      "p11_6esp" "p12_1"    "p12_2"   
##  [43] "p12_3"    "p12_4"    "p12_5"    "p12_6"    "p12_7"    "p12_8"   
##  [49] "p12_9"    "p12_9esp" "p13"      "p13_3"    "p14_1"    "p14_2"   
##  [55] "p15"      "p15_5esp" "p16"      "p17"      "p17_6esp" "p18_1"   
##  [61] "p18_2"    "p18_3"    "p18_4"    "p18_5"    "p19"      "p19_3"   
##  [67] "p20_1"    "p20_2"    "p21"      "p21_5esp" "p22"      "p23_1"   
##  [73] "p23_2"    "p24"      "p25"      "p25_6esp" "p26"      "p27"     
##  [79] "p28"      "p28_7esp" "p29"      "p30"      "p31"      "p32"     
##  [85] "p32_6esp" "p33_1"    "p33_2"    "p33_3"    "p33_4"    "p34_1"   
##  [91] "p34_2"    "p34_3"    "p34_3_1"  "p34_4"    "p34_4_1"  "p35"     
##  [97] "p36_1"    "p36_2"    "p36_3"    "p36_4"    "factor"   "h_lec"   
## [103] "mat_lec"  "perslec"  "l_format" "r_format" "p_format" "perslecl"
head(m24)    
##    folio entidad control viv_sel num_hog hog_mud n_ren_el cd periodo sexo edad
## 1 12B221       1   40002       2       1       0        1 14     224    1   52
## 2 12B221       1   40002       1       1       0        1 14     224    1   55
## 3 12B221       1   40002       3       1       0        1 14     224    1   59
## 4 12B221       1   40002       4       1       0        2 14     224    1   30
## 5 11B213       1   40003       1       1       0        1 14     224    1   54
## 6 11B213       1   40003       2       1       0        1 14     224    1   30
##   nivel anio cond_act p1 p2 p3_1 p3_2 p3_3 p3_4 p3_5 p4 p5 p5_6esp p6_1 p6_2
## 1     3    3        9  1  2    2    2    2    2    2  0  0            0    0
## 2     3    3        1  1  2    2    2    2    2    2  0  0            0    0
## 3     3    3        1  1  2    2    2    2    2    2  0  0            0    0
## 4     4    3        1  1  2    2    2    2    2    2  0  0            0    0
## 5     3    3        1  1  2    2    2    1    2    2  0  0            0    0
## 6     4    3        1  1  2    2    2    2    2    1  0  0            0    0
##   p6_3 p6_4 p6_5 p6_6 p6_6esp p7 p7_3 p8_1 p8_2 p9 p9_5esp p10 p11 p11_6esp
## 1    0    0    0    0          0    0    0    0  0           0   0         
## 2    0    0    0    0          0    0    0    0  0           0   0         
## 3    0    0    0    0          0    0    0    0  0           0   0         
## 4    0    0    0    0          0    0    0    0  0           0   0         
## 5    0    0    0    0          0    0    0    0  0           0   0         
## 6    0    0    0    0          0    0    0    0  0           0   0         
##   p12_1 p12_2 p12_3 p12_4 p12_5 p12_6 p12_7 p12_8 p12_9 p12_9esp p13 p13_3
## 1     0     0     0     0     0     0     0     0     0            0     0
## 2     0     0     0     0     0     0     0     0     0            0     0
## 3     0     0     0     0     0     0     0     0     0            0     0
## 4     0     0     0     0     0     0     0     0     0            0     0
## 5     0     0     0     0     0     0     0     0     0            0     0
## 6     0     0     0     0     0     0     0     0     0            0     0
##   p14_1 p14_2 p15 p15_5esp p16 p17 p17_6esp p18_1 p18_2 p18_3 p18_4 p18_5 p19
## 1     0     0   0            0   0              0     0     0     0     0   0
## 2     0     0   0            0   0              0     0     0     0     0   0
## 3     0     0   0            0   0              0     0     0     0     0   0
## 4     0     0   0            0   0              0     0     0     0     0   0
## 5     0     0   0            1   4              1     1     1     1     1   2
## 6     0     0   0            0   0              0     0     0     0     0   0
##   p19_3 p20_1 p20_2 p21 p21_5esp p22 p23_1 p23_2 p24 p25 p25_6esp p26 p27 p28
## 1     0     0     0   0            0     0     0   0   0            0   0   0
## 2     0     0     0   0            0     0     0   0   0            0   0   0
## 3     0     0     0   0            0     0     0   0   0            0   0   0
## 4     0     0     0   0            0     0     0   0   0            0   0   0
## 5     0     1     2   1            0     0     0   0   0           20   2   0
## 6     0     0     0   0            0     0     0   5   4           15   2   0
##   p28_7esp p29 p30 p31 p32 p32_6esp p33_1 p33_2 p33_3 p33_4 p34_1 p34_2 p34_3
## 1            0   0   0   5              2     2     2     2     2     1     2
## 2            0   0   0   2              2     2     2     2     2     2     2
## 3            0   0   0   3              2     2     2     2     2     1     2
## 4            0   0   0   4              2     2     2     2     2     2     2
## 5            2   3   2   0              2     2     2     2     2     2     2
## 6            2   3   2   0              2     2     2     2     1     1     1
##   p34_3_1 p34_4 p34_4_1 p35 p36_1 p36_2 p36_3 p36_4 factor h_lec mat_lec
## 1       0     1       1   1     1     1     1     2  36731     4       4
## 2       0     2       0   1     1     1     1     2   9183     4       4
## 3       0     1       1   1     1     1     1     2  36731     4       4
## 4       0     1       1   1     1     1     1     1  18365     4       4
## 5       0     1       1   1     1     1     1     2   7488     3       3
## 6       1     1       1   1     1     1     1     1   7488     3       3
##   perslec l_format r_format p_format perslecl
## 1       2        0        0        0        2
## 2       2        0        0        0        2
## 3       2        0        0        0        2
## 4       2        0        0        0        2
## 5       1        0        0        1        1
## 6       1        0        0        0        2
#View(m24)    
dim(m24) 
## [1] 2016  108
summary(m24)
##     folio              entidad         control         viv_sel         num_hog 
##  Length:2016        Min.   : 1.00   Min.   :40001   Min.   :1.000   Min.   :1  
##  Class :character   1st Qu.: 9.00   1st Qu.:40095   1st Qu.:1.750   1st Qu.:1  
##  Mode  :character   Median :15.00   Median :40191   Median :2.000   Median :1  
##                     Mean   :15.58   Mean   :40244   Mean   :2.493   Mean   :1  
##                     3rd Qu.:20.25   3rd Qu.:40315   3rd Qu.:3.000   3rd Qu.:1  
##                     Max.   :32.00   Max.   :41419   Max.   :4.000   Max.   :1  
##                                                                                
##     hog_mud           n_ren_el           cd           periodo   
##  Min.   :0.00000   Min.   :1.000   Min.   : 1.00   Min.   :224  
##  1st Qu.:0.00000   1st Qu.:1.000   1st Qu.: 2.00   1st Qu.:224  
##  Median :0.00000   Median :1.000   Median : 9.00   Median :224  
##  Mean   :0.03472   Mean   :1.673   Mean   :13.74   Mean   :224  
##  3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:25.00   3rd Qu.:224  
##  Max.   :2.00000   Max.   :8.000   Max.   :43.00   Max.   :224  
##                                                                 
##       sexo            edad           nivel            anio      
##  Min.   :1.000   Min.   :18.00   Min.   : 0.00   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:33.00   1st Qu.: 3.00   1st Qu.:3.000  
##  Median :2.000   Median :46.00   Median : 4.00   Median :3.000  
##  Mean   :1.574   Mean   :46.49   Mean   : 4.66   Mean   :3.456  
##  3rd Qu.:2.000   3rd Qu.:59.00   3rd Qu.: 7.00   3rd Qu.:4.000  
##  Max.   :2.000   Max.   :94.00   Max.   :99.00   Max.   :9.000  
##                                                  NA's   :48     
##     cond_act            p1              p2             p3_1      
##  Min.   : 1.000   Min.   :1.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median : 1.000   Median :1.000   Median :1.000   Median :2.000  
##  Mean   : 3.325   Mean   :1.023   Mean   :1.405   Mean   :1.552  
##  3rd Qu.: 7.000   3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :10.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##                                                                  
##       p3_2            p3_3            p3_4            p3_5      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :2.000   Median :2.000   Median :2.000   Median :2.000  
##  Mean   :1.745   Mean   :1.781   Mean   :1.912   Mean   :1.588  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :2.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##                                                                 
##        p4               p5          p5_6esp               p6_1       
##  Min.   : 0.000   Min.   :0.000   Length:2016        Min.   :0.0000  
##  1st Qu.: 0.000   1st Qu.:0.000   Class :character   1st Qu.:0.0000  
##  Median : 0.000   Median :0.000   Mode  :character   Median :0.0000  
##  Mean   : 1.275   Mean   :1.348                      Mean   :0.7723  
##  3rd Qu.: 2.000   3rd Qu.:3.000                      3rd Qu.:2.0000  
##  Max.   :70.000   Max.   :6.000                      Max.   :2.0000  
##                                                                      
##       p6_2            p6_3             p6_4             p6_5       
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.686   Mean   :0.6756   Mean   :0.6458   Mean   :0.7143  
##  3rd Qu.:2.000   3rd Qu.:2.0000   3rd Qu.:1.0000   3rd Qu.:2.0000  
##  Max.   :2.000   Max.   :2.0000   Max.   :2.0000   Max.   :2.0000  
##                                                                    
##       p6_6          p6_6esp                p7              p7_3       
##  Min.   :0.0000   Length:2016        Min.   :0.0000   Min.   :     0  
##  1st Qu.:0.0000   Class :character   1st Qu.:0.0000   1st Qu.:     0  
##  Median :0.0000   Mode  :character   Median :0.0000   Median :     0  
##  Mean   :0.8006                      Mean   :0.8591   Mean   :  3115  
##  3rd Qu.:2.0000                      3rd Qu.:2.0000   3rd Qu.:     0  
##  Max.   :2.0000                      Max.   :3.0000   Max.   :999999  
##                                                                       
##       p8_1             p8_2              p9           p9_5esp         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Length:2016       
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   Class :character  
##  Median :0.0000   Median :0.0000   Median :0.0000   Mode  :character  
##  Mean   :0.7054   Mean   :0.4772   Mean   :0.4529                     
##  3rd Qu.:2.0000   3rd Qu.:1.0000   3rd Qu.:1.0000                     
##  Max.   :2.0000   Max.   :2.0000   Max.   :5.0000                     
##                                                                       
##       p10               p11           p11_6esp             p12_1       
##  Min.   : 0.0000   Min.   :0.0000   Length:2016        Min.   :0.0000  
##  1st Qu.: 0.0000   1st Qu.:0.0000   Class :character   1st Qu.:0.0000  
##  Median : 0.0000   Median :0.0000   Mode  :character   Median :0.0000  
##  Mean   : 0.6349   Mean   :0.7326                      Mean   :0.3973  
##  3rd Qu.: 0.0000   3rd Qu.:0.0000                      3rd Qu.:0.0000  
##  Max.   :25.0000   Max.   :6.0000                      Max.   :2.0000  
##                                                                        
##      p12_2            p12_3           p12_4           p12_5       
##  Min.   :0.0000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.000   Median :0.000   Median :0.0000  
##  Mean   :0.3824   Mean   :0.371   Mean   :0.377   Mean   :0.3879  
##  3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.0000  
##  Max.   :2.0000   Max.   :2.000   Max.   :2.000   Max.   :2.0000  
##                                                                   
##      p12_6           p12_7           p12_8            p12_9       
##  Min.   :0.000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.000   Median :0.000   Median :0.0000   Median :0.0000  
##  Mean   :0.371   Mean   :0.369   Mean   :0.3562   Mean   :0.4172  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :2.000   Max.   :2.000   Max.   :2.0000   Max.   :2.0000  
##                                                                   
##    p12_9esp              p13            p13_3              p14_1      
##  Length:2016        Min.   :0.000   Min.   :     0.0   Min.   :0.000  
##  Class :character   1st Qu.:0.000   1st Qu.:     0.0   1st Qu.:0.000  
##  Mode  :character   Median :0.000   Median :     0.0   Median :0.000  
##                     Mean   :0.433   Mean   :   506.5   Mean   :0.372  
##                     3rd Qu.:0.000   3rd Qu.:     0.0   3rd Qu.:0.000  
##                     Max.   :3.000   Max.   :999999.0   Max.   :2.000  
##                                                                       
##      p14_2            p15           p15_5esp              p16         
##  Min.   :0.000   Min.   :0.0000   Length:2016        Min.   : 0.0000  
##  1st Qu.:0.000   1st Qu.:0.0000   Class :character   1st Qu.: 0.0000  
##  Median :0.000   Median :0.0000   Mode  :character   Median : 0.0000  
##  Mean   :0.247   Mean   :0.2688                      Mean   : 0.5749  
##  3rd Qu.:0.000   3rd Qu.:0.0000                      3rd Qu.: 0.0000  
##  Max.   :2.000   Max.   :5.0000                      Max.   :60.0000  
##                                                                       
##       p17           p17_6esp             p18_1            p18_2       
##  Min.   :0.0000   Length:2016        Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   Class :character   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Mode  :character   Median :0.0000   Median :0.0000  
##  Mean   :0.5794                      Mean   :0.2163   Mean   :0.2272  
##  3rd Qu.:0.0000                      3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :6.0000                      Max.   :2.0000   Max.   :2.0000  
##                                                                       
##      p18_3            p18_4            p18_5             p19        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.2361   Mean   :0.2594   Mean   :0.2133   Mean   :0.3904  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :2.0000   Max.   :2.0000   Max.   :2.0000   Max.   :3.0000  
##                                                                     
##      p19_3              p20_1            p20_2             p21        
##  Min.   :     0.0   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:     0.0   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :     0.0   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :   995.8   Mean   :0.3115   Mean   :0.2039   Mean   :0.2411  
##  3rd Qu.:     0.0   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :999999.0   Max.   :2.0000   Max.   :2.0000   Max.   :5.0000  
##                                                                       
##    p21_5esp              p22             p23_1             p23_2      
##  Length:2016        Min.   :0.0000   Min.   :0.00000   Min.   :0.000  
##  Class :character   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.000  
##  Mode  :character   Median :0.0000   Median :0.00000   Median :0.000  
##                     Mean   :0.1443   Mean   :0.05804   Mean   :0.063  
##                     3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.000  
##                     Max.   :5.0000   Max.   :2.00000   Max.   :2.000  
##                                                                       
##       p24              p25          p25_6esp              p26        
##  Min.   :0.0000   Min.   :0.000   Length:2016        Min.   :  0.00  
##  1st Qu.:0.0000   1st Qu.:0.000   Class :character   1st Qu.:  0.00  
##  Median :0.0000   Median :0.000   Mode  :character   Median : 20.00  
##  Mean   :0.7986   Mean   :1.117                      Mean   : 26.78  
##  3rd Qu.:1.0000   3rd Qu.:3.000                      3rd Qu.: 30.00  
##  Max.   :5.0000   Max.   :6.000                      Max.   :360.00  
##                                                                      
##       p27            p28          p28_7esp              p29      
##  Min.   :0.00   Min.   :0.000   Length:2016        Min.   :0.00  
##  1st Qu.:0.00   1st Qu.:0.000   Class :character   1st Qu.:0.00  
##  Median :2.00   Median :0.000   Mode  :character   Median :2.00  
##  Mean   :1.23   Mean   :0.557                      Mean   :1.68  
##  3rd Qu.:2.00   3rd Qu.:0.000                      3rd Qu.:3.00  
##  Max.   :2.00   Max.   :7.000                      Max.   :4.00  
##                                                                  
##       p30             p31             p32           p32_6esp        
##  Min.   :0.000   Min.   :0.000   Min.   :0.0000   Length:2016       
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.0000   Class :character  
##  Median :3.000   Median :1.000   Median :0.0000   Mode  :character  
##  Mean   :1.995   Mean   :1.056   Mean   :0.8175                     
##  3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:1.0000                     
##  Max.   :4.000   Max.   :2.000   Max.   :6.0000                     
##                                                                     
##      p33_1           p33_2           p33_3           p33_4           p34_1     
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.00  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:1.00  
##  Median :2.000   Median :2.000   Median :2.000   Median :2.000   Median :2.00  
##  Mean   :1.787   Mean   :1.868   Mean   :1.887   Mean   :1.824   Mean   :1.67  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.00  
##  Max.   :2.000   Max.   :2.000   Max.   :2.000   Max.   :2.000   Max.   :3.00  
##                                                                                
##      p34_2           p34_3         p34_3_1           p34_4      
##  Min.   :0.000   Min.   :0.00   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.00   1st Qu.:0.0000   1st Qu.:1.000  
##  Median :1.000   Median :2.00   Median :0.0000   Median :1.000  
##  Mean   :1.477   Mean   :1.64   Mean   :0.5437   Mean   :1.427  
##  3rd Qu.:2.000   3rd Qu.:2.00   3rd Qu.:1.0000   3rd Qu.:2.000  
##  Max.   :3.000   Max.   :3.00   Max.   :3.0000   Max.   :3.000  
##                                                                 
##     p34_4_1           p35             p36_1           p36_2      
##  Min.   :0.000   Min.   :0.0000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:1.0000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :1.000   Median :1.0000   Median :1.000   Median :1.000  
##  Mean   :0.998   Mean   :0.9871   Mean   :1.357   Mean   :1.188  
##  3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :6.000   Max.   :2.0000   Max.   :3.000   Max.   :3.000  
##                                                                  
##      p36_3           p36_4           factor           h_lec      
##  Min.   :0.000   Min.   :0.000   Min.   :  3576   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.: 12994   1st Qu.:1.000  
##  Median :1.000   Median :1.000   Median : 18445   Median :1.000  
##  Mean   :1.293   Mean   :1.397   Mean   : 21196   Mean   :2.141  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.: 26839   3rd Qu.:4.000  
##  Max.   :3.000   Max.   :3.000   Max.   :124941   Max.   :4.000  
##                                                                  
##     mat_lec         perslec         l_format        r_format     
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :3.000   Median :1.000   Median :0.000   Median :0.0000  
##  Mean   :2.714   Mean   :1.284   Mean   :0.748   Mean   :0.3869  
##  3rd Qu.:4.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:0.0000  
##  Max.   :4.000   Max.   :2.000   Max.   :3.000   Max.   :3.0000  
##                                                                  
##     p_format         perslecl    
##  Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:1.000  
##  Median :0.0000   Median :1.000  
##  Mean   :0.3165   Mean   :1.376  
##  3rd Qu.:0.0000   3rd Qu.:2.000  
##  Max.   :3.0000   Max.   :2.000  
## 
files <- c(
  "Datos_molec_2020-1.csv",
  "Datos_molec_2022-1.csv",
  "Datos_molec_2024-1.csv"
)

years <- c(2020, 2022, 2024)  

aliases <- list(
  sabe_leer_escribir = c("p1", "p_1", "sabe_leer", "sabe_leer_escribir", "leer_escribir"),
  nivel_aprobado     = c("nivel", "nivel_aprobado", "escolaridad", "grado_aprob", "ultimo_grado"),
  condicion_actividad= c("cond_act", "condicion_actividad", "cond_act_pea"),
  libros_gasto       = c("p7_3", "p07_3", "p7_03", "gasto_libros", "libros_gasto"),
 libros_leidos_12m = c(
  "p4","p04","libros_leidos","num_libros","cuantos_libros",
  "libros_12m","libros_ultimos_12_meses","libros_12_meses"
),
  no_lectura_motivo  = c("p32", "p032", "no_lectura_motivo", "motivo_no_lectura"),
no_lectura_otro    = c("p32_6esp", "p032_6esp", "p32_06esp", "no_lectura_otro_esp", "no_lectura_otro"),
  lectura_minutos    = c("p26", "p026", "p26_minutos", "minutos_lectura","minutos_continuos", "p26_min", "minutos")
)

standard_vars <- names(aliases)

harmonize_one <- function(path, year, aliases) {

  df_raw <- suppressMessages(read_csv(path, col_types = cols(.default = col_character())))
  df <- clean_names(df_raw)
  nm <- names(df)

  pick_first <- function(cands) {
    cands_clean <- cands |>
      tolower() |>
      str_replace_all("[^a-z0-9_]", "_")
    hit <- intersect(cands_clean, nm)
    if (length(hit) > 0) hit[[1]] else NA_character_
  }

  n <- nrow(df)
  out <- tibble(year = rep(year, n))

  for (std in standard_vars) {
    src <- pick_first(aliases[[std]])
    if (!is.na(src)) {
      out[[std]] <- df[[src]]
    } else {
      out[[std]] <- NA_character_
      message("Año ", year, ": no se encontró columna para '", std, "'.")
    }
  }

  out
}

molec_list <- map2(files, years, ~ harmonize_one(.x, .y, aliases))
molec <- bind_rows(molec_list)

molec <- molec %>%
  mutate(
    libros_gasto_num = suppressWarnings(as.numeric(libros_gasto)),
    libros_leidos_12m_num = suppressWarnings(as.numeric(libros_leidos_12m)),
    lectura_minutos_num = suppressWarnings(as.numeric(lectura_minutos)),
    sabe_leer_bin = case_when(
      str_detect(str_trim(tolower(sabe_leer_escribir)), "^(si|sí|1)$") ~ 1,
      str_detect(str_trim(tolower(sabe_leer_escribir)), "^(no|0)$") ~ 0,
      TRUE ~ NA_real_
    )
  )

molec <- molec %>%
  mutate(
    no_lectura_motivo = case_when(
      !is.na(no_lectura_otro) & str_trim(no_lectura_otro) != "" ~ "Otro",
      TRUE ~ no_lectura_motivo
    )
  ) %>%
  select(-no_lectura_otro,-sabe_leer_bin,-libros_gasto,-libros_leidos_12m,-lectura_minutos)
molec<- na.omit(molec)

for (yy in years) {
  df_y <- subset(molec, year == yy)
  write.csv(df_y, paste0("DatosEquipo#_", yy, ".csv"), row.names = FALSE)
}

numify <- function(x) {
  y <- readr::parse_number(x, locale = readr::locale(decimal_mark = ".", grouping_mark = ","))
  if (mean(is.na(y)) > 0.9) {
    y <- readr::parse_number(x, locale = readr::locale(decimal_mark = ",", grouping_mark = "."))
  }
  y
}

write.csv(molec, "DatosEquipo#1.csv", row.names = FALSE)
str(molec)     
## tibble [6,042 × 8] (S3: tbl_df/tbl/data.frame)
##  $ year                 : num [1:6042] 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
##  $ sabe_leer_escribir   : chr [1:6042] "1" "1" "1" "1" ...
##  $ nivel_aprobado       : chr [1:6042] "2" "3" "3" "7" ...
##  $ condicion_actividad  : chr [1:6042] "1" "7" "1" "1" ...
##  $ no_lectura_motivo    : chr [1:6042] "0" "0" "3" "0" ...
##  $ libros_gasto_num     : num [1:6042] 0 0 0 0 0 0 0 0 0 0 ...
##  $ libros_leidos_12m_num: num [1:6042] 1 0 0 2 2 0 0 0 0 1 ...
##  $ lectura_minutos_num  : num [1:6042] 30 10 0 20 20 30 20 10 0 60 ...
names(molec)   
## [1] "year"                  "sabe_leer_escribir"    "nivel_aprobado"       
## [4] "condicion_actividad"   "no_lectura_motivo"     "libros_gasto_num"     
## [7] "libros_leidos_12m_num" "lectura_minutos_num"
head(molec)    
## # A tibble: 6 × 8
##    year sabe_leer_escribir nivel_aprobado condicion_actividad no_lectura_motivo
##   <dbl> <chr>              <chr>          <chr>               <chr>            
## 1  2020 1                  2              1                   0                
## 2  2020 1                  3              7                   0                
## 3  2020 1                  3              1                   3                
## 4  2020 1                  7              1                   0                
## 5  2020 1                  6              7                   0                
## 6  2020 1                  3              1                   0                
## # ℹ 3 more variables: libros_gasto_num <dbl>, libros_leidos_12m_num <dbl>,
## #   lectura_minutos_num <dbl>
#View(molec)    
dim(molec)     
## [1] 6042    8
summary(molec)
##       year      sabe_leer_escribir nivel_aprobado     condicion_actividad
##  Min.   :2020   Length:6042        Length:6042        Length:6042        
##  1st Qu.:2020   Class :character   Class :character   Class :character   
##  Median :2022   Mode  :character   Mode  :character   Mode  :character   
##  Mean   :2022                                                            
##  3rd Qu.:2024                                                            
##  Max.   :2024                                                            
##  no_lectura_motivo  libros_gasto_num libros_leidos_12m_num lectura_minutos_num
##  Length:6042        Min.   :     0   Min.   : 0.000        Min.   :  0.00     
##  Class :character   1st Qu.:     0   1st Qu.: 0.000        1st Qu.:  0.00     
##  Mode  :character   Median :     0   Median : 0.000        Median : 20.00     
##                     Mean   :  2816   Mean   : 1.443        Mean   : 27.83     
##                     3rd Qu.:     0   3rd Qu.: 2.000        3rd Qu.: 40.00     
##                     Max.   :999999   Max.   :99.000        Max.   :480.00
colSums(is.na(molec))
##                  year    sabe_leer_escribir        nivel_aprobado 
##                     0                     0                     0 
##   condicion_actividad     no_lectura_motivo      libros_gasto_num 
##                     0                     0                     0 
## libros_leidos_12m_num   lectura_minutos_num 
##                     0                     0
round(colSums(is.na(molec)) / nrow(molec) * 100, 2)
##                  year    sabe_leer_escribir        nivel_aprobado 
##                     0                     0                     0 
##   condicion_actividad     no_lectura_motivo      libros_gasto_num 
##                     0                     0                     0 
## libros_leidos_12m_num   lectura_minutos_num 
##                     0                     0
table(molec$libros_gasto_num)           +
prop.table(table(molec$libros_gasto_num))  
## 
##           0           1          10          15          20          25 
## 5128.848726    1.000166    1.000166    1.000166    3.000497    1.000166 
##          30          40          45          50          55          60 
##    4.000662    7.001159    1.000166    8.001324    1.000166    5.000828 
##          70          75          85          89          90         100 
##    2.000331    1.000166    1.000166    1.000166    3.000497   26.004303 
##         110         120         130         140         150         160 
##    1.000166    7.001159    1.000166    1.000166   36.005958    2.000331 
##         170         180         185         200         215         219 
##    2.000331    7.001159    1.000166   63.010427    1.000166    1.000166 
##         220         230         239         240         250         270 
##    1.000166    1.000166    1.000166    2.000331   24.003972    2.000331 
##         280         299         300         311         329         340 
##    4.000662    1.000166   77.012744    1.000166    1.000166    1.000166 
##         350         360         370         380         390         400 
##   17.002814    1.000166    1.000166    2.000331    1.000166   46.007613 
##         420         430         450         480         500         530 
##    1.000166    1.000166   10.001655    2.000331   78.012910    1.000166 
##         550         580         600         650         700         750 
##    3.000497    1.000166   69.011420    4.000662   20.003310    6.000993 
##         780         800         850         860         875         900 
##    1.000166   37.006124    1.000166    1.000166    1.000166   18.002979 
##         950         960        1000        1100        1200        1300 
##    3.000497    1.000166   54.008937    3.000497   29.004800    6.000993 
##        1400        1500        1560        1600        1700        1800 
##    3.000497   43.007117    1.000166    8.001324    2.000331    6.000993 
##        1900        2000        2200        2300        2400        2500 
##    1.000166   31.005131    1.000166    2.000331    2.000331    9.001490 
##        2599        3000        3400        3500        4000        4200 
##    1.000166   17.002814    1.000166    7.001159    7.001159    1.000166 
##        5000        5200        6000        7000        7200        7500 
##   13.002152    1.000166    5.000828    1.000166    1.000166    1.000166 
##        8000        9999       10000       15000       50000      120000 
##    2.000331    1.000166    1.000166    2.000331    1.000166    1.000166 
##      999999 
##   16.002648
table(molec$libros_leidos_12m_num)           +
prop.table(table(molec$libros_leidos_12m_num))  
## 
##           0           1           2           3           4           5 
## 3582.592850  758.125455  635.105098  399.066038  214.035419  158.026150 
##           6           7           8           9          10          11 
##   75.012413   19.003145   37.006124    8.001324   46.007613    4.000662 
##          12          13          14          15          16          17 
##   30.004965    3.000497    2.000331   18.002979    1.000166    1.000166 
##          18          20          24          25          30          35 
##    1.000166   19.003145    5.000828    2.000331    9.001490    1.000166 
##          40          42          45          50          60          66 
##    4.000662    1.000166    1.000166    5.000828    1.000166    1.000166 
##          70          99 
##    1.000166    1.000166
table(molec$lectura_minutos_num)           +
prop.table(table(molec$lectura_minutos_num)) 
## 
##           0           1           2           3           4           5 
## 1886.312148    1.000166    4.000662    4.000662    1.000166   55.009103 
##           6           7          10          15          16          18 
##    2.000331    1.000166  243.040218  322.053294    1.000166    1.000166 
##          20          21          25          26          30          35 
##  658.108904    1.000166  117.019364    1.000166 1156.191327   61.010096 
##          38          40          44          45          48          50 
##    2.000331  211.034922    1.000166  154.025488    1.000166   33.005462 
##          60          70          80          90         120         160 
##  773.127938    5.000828    2.000331   97.016054  191.031612    1.000166 
##         180         200         240         300         360         480 
##   38.006289    2.000331   11.001821    2.000331    2.000331    1.000166

AVANCE 2

datos <- read.csv("DatosEquipo#1.csv", stringsAsFactors = FALSE)
#View(datos)
datos <- datos %>% mutate(
  year = as.integer(year),                       
  lee_algo = libros_leidos_12m_num > 0           
)

cat("Filas:", nrow(datos), " Columnas:", ncol(datos), "\n")
## Filas: 6042  Columnas: 9
print(names(datos))
## [1] "year"                  "sabe_leer_escribir"    "nivel_aprobado"       
## [4] "condicion_actividad"   "no_lectura_motivo"     "libros_gasto_num"     
## [7] "libros_leidos_12m_num" "lectura_minutos_num"   "lee_algo"
str(datos)
## 'data.frame':    6042 obs. of  9 variables:
##  $ year                 : int  2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
##  $ sabe_leer_escribir   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ nivel_aprobado       : int  2 3 3 7 6 3 6 3 2 2 ...
##  $ condicion_actividad  : int  1 7 1 1 7 1 1 1 1 8 ...
##  $ no_lectura_motivo    : chr  "0" "0" "3" "0" ...
##  $ libros_gasto_num     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ libros_leidos_12m_num: int  1 0 0 2 2 0 0 0 0 1 ...
##  $ lectura_minutos_num  : int  30 10 0 20 20 30 20 10 0 60 ...
##  $ lee_algo             : logi  TRUE FALSE FALSE TRUE TRUE FALSE ...
porc_nulos <- sapply(datos, function(x) mean(is.na(x))*100)
print(round(porc_nulos, 2))
##                  year    sabe_leer_escribir        nivel_aprobado 
##                     0                     0                     0 
##   condicion_actividad     no_lectura_motivo      libros_gasto_num 
##                     0                     0                     0 
## libros_leidos_12m_num   lectura_minutos_num              lee_algo 
##                     0                     0                     0
dup_count <- sum(duplicated(datos))
cat("Duplicados:", dup_count, "\n")
## Duplicados: 3419
datos <- datos[!duplicated(datos), ]

es_texto <- sapply(datos, is.character)
datos[es_texto] <- lapply(datos[es_texto], function(x) trimws(tolower(x)))

if ("libros_leidos_12m_num" %in% names(datos)) datos$libros_leidos_12m_num[datos$libros_leidos_12m_num %in% c(99)] <- NA
if ("libros_gasto_num"  %in% names(datos)) datos$libros_gasto_num[datos$libros_gasto_num %in% c(999999)] <- NA
if ("lee_algo" %in% names(datos))
datos$lee_algo[is.na(datos$lee_algo)] <- FALSE
diccionario <- data.frame(
  variable    = names(datos),
  tipo        = sapply(datos, function(x) class(x)[1]),
  porc_nulos  = round(colMeans(is.na(datos)) * 100, 2),
  stringsAsFactors = FALSE
)

desc <- c(
  year = "Año del dato.",
  saber_leer_escribir = "Se pregunta por alfabetización",
  nivel_aprobado = "Nivel de escolaridad. ¿Hasta qué año o grado aprobó en la escuela?.",
  condicion_actividad = "Condición de actividad: ¿La semana pasada...?",
  no_lectura_motivo = "Motivo principal por el que no lee los materiales de lectura mencionados.",
  libros_gasto_num = "Aproximadamente, ¿cuánto gastó?",
  libros_leidos_12m_num = "¿Cuántos libros leyó en los últimos doce meses?",
  lectura_minutos_num = "¿Cuántos minutos lee?",
  lee_algo = "Si la persona sabe leer, ¿ha leido algo en los últimos 12m?"
)

#View(diccionario)
tabla <- datos[,c(3,6,7,8)]
nums <- Filter(is.numeric, tabla)
res_num <- t(sapply(nums, function(x) c(
  n = sum(!is.na(x)),
  media = mean(x, na.rm=TRUE),
  mediana = median(x, na.rm=TRUE),
  min = min(x, na.rm=TRUE),
  max = max(x, na.rm=TRUE),
  rango_medio = (min(x, na.rm=TRUE) + max(x, na.rm=TRUE))/2,
  sd = sd(x, na.rm=TRUE),
  coef_var = ifelse(mean(x,na.rm=TRUE)==0, NA, sd(x,na.rm=TRUE)/mean(x,na.rm=TRUE)),
  q25 = quantile(x, .25, na.rm=TRUE),
  q75 = quantile(x, .75, na.rm=TRUE)
)))
round(res_num, 3)
##                          n   media mediana min    max rango_medio       sd
## nivel_aprobado        2623   5.253       5   0     99        49.5    5.615
## libros_gasto_num      2607 381.250       0   0 120000     60000.0 2696.898
## libros_leidos_12m_num 2622   2.815       2   0     70        35.0    5.105
## lectura_minutos_num   2623  41.997      30   0    480       240.0   40.337
##                       coef_var q25.25% q75.75%
## nivel_aprobado           1.069       3       7
## libros_gasto_num         7.074       0     250
## libros_leidos_12m_num    1.813       0       3
## lectura_minutos_num      0.960      20      60
cats <- datos[,c(1,2,4,5,9)]
res_cat <- lapply(cats, function(v){
  tb <- table(v, useNA="ifany")
  list(freq = tb, prop = round(prop.table(tb), 4))
})
res_cat
## $year
## $year$freq
## v
## 2020 2022 2024 
##  861  890  872 
## 
## $year$prop
## v
##   2020   2022   2024 
## 0.3283 0.3393 0.3324 
## 
## 
## $sabe_leer_escribir
## $sabe_leer_escribir$freq
## v
##    1    2 
## 2586   37 
## 
## $sabe_leer_escribir$prop
## v
##      1      2 
## 0.9859 0.0141 
## 
## 
## $condicion_actividad
## $condicion_actividad$freq
## v
##    1    2    3    4    5    6    7    8    9   10   99 
## 1309   27   46  110   17  192  551  317   21   32    1 
## 
## $condicion_actividad$prop
## v
##      1      2      3      4      5      6      7      8      9     10     99 
## 0.4990 0.0103 0.0175 0.0419 0.0065 0.0732 0.2101 0.1209 0.0080 0.0122 0.0004 
## 
## 
## $no_lectura_motivo
## $no_lectura_motivo$freq
## v
##    0    1    2    3    4    5 otro 
## 2312   76   56   81   15   69   14 
## 
## $no_lectura_motivo$prop
## v
##      0      1      2      3      4      5   otro 
## 0.8814 0.0290 0.0213 0.0309 0.0057 0.0263 0.0053 
## 
## 
## $lee_algo
## $lee_algo$freq
## v
## FALSE  TRUE 
##   778  1845 
## 
## $lee_algo$prop
## v
##  FALSE   TRUE 
## 0.2966 0.7034
theme_set(theme_minimal(base_size = 12))
datos %>%
  group_by(year) %>%
  summarise(prom_libros = mean(libros_leidos_12m_num, na.rm = TRUE), .groups = "drop") %>%
  ggplot(aes(year, prom_libros)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  geom_text(aes(label = round(prom_libros, 2)),
            vjust = -0.6, size = 3) +             
  scale_y_continuous(expand = expansion(mult = c(0.02, 0.12))) +  
  labs(title = "Promedio de libros leídos por año",
       x = "Año", y = "Libros (promedio)")

datos %>%
  group_by(year) %>%
  summarise(pct_lee = mean(lee_algo, na.rm = TRUE)) %>%
  ggplot(aes(year, pct_lee)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  geom_text(aes(label = round(pct_lee, 4)),
            vjust = -0.6, size = 3) + 
  scale_y_continuous(labels = percent_format(accuracy = 1.2)) +
  labs(title = "% que leyó ≥1 libro en los últimos 12 meses",
       x = "Año", y = "Porcentaje")

datos <- datos %>%
  mutate(no_lectura_motivo = recode(as.character(no_lectura_motivo),
    "1" = "Falta interés/gusto",
    "2" = "Prefiere otras actividades",
    "3" = "Falta de tiempo",
    "4" = "Falta de dinero",
    "5" = "Problemas de salud",
    "6" = "otro",
    .default = "pase"
  ))

datos %>%
  filter(!is.na(no_lectura_motivo)) %>%
  count(no_lectura_motivo) %>%
  mutate(
    p = n / sum(n),  # calcula el porcentaje
    no_lectura_motivo = fct_reorder(no_lectura_motivo, p)  # ordena por porcentaje
  ) %>%             
  ggplot(aes(no_lectura_motivo, n, fill = no_lectura_motivo)) +
  geom_col() +
  geom_text(aes(label = percent(p)), hjust = -0.2, size = 3.8) +
  coord_flip(clip = "off") +
  scale_y_continuous(expand = expansion(mult = c(0, 0.12))) +
  guides(fill = "none") +
  labs(title = "Motivos para NO leer", x = NULL, y = "Porcentaje")

datos %>%
  filter(!is.na(no_lectura_motivo), no_lectura_motivo != "pase") %>%
  count(no_lectura_motivo) %>%
  mutate(
    p = n / sum(n), 
    no_lectura_motivo = fct_reorder(no_lectura_motivo, p) 
  ) %>%
  ggplot(aes(no_lectura_motivo, n, fill = no_lectura_motivo)) +
  geom_col() +
  geom_text(aes(label = percent(p)), hjust = -0.2, size = 3.8) +
  coord_flip(clip = "off") +
  scale_y_continuous(expand = expansion(mult = c(0, 0.12))) +
  guides(fill = "none") +
  labs(title = "Motivos para NO leer (sin “PASE”)", x = NULL, y = "Porcentaje")

datos <- datos %>%
  mutate(nivel_aprobado = recode(as.character(nivel_aprobado),
    "0" = "Ninguno",
    "1" = "Preescolar",
    "2" = "Primaria",
    "3" = "Secundaria",
    "4" = "Preparatoria",
    "5" = "Normal básica",
    "6" = "Carrera técnica",
    "7" = "Profesional",
    "8" = "Maestría",
    "9" = "Doctorado",
    "99" = "No sabe",
    .default = "Otro"
  ))

df_plot <- datos %>%
  filter(!is.na(nivel_aprobado)) %>%
  group_by(nivel_aprobado) %>%
  filter(condicion_actividad != "No sabe") %>%
  summarise(total_libros = sum(libros_leidos_12m_num, na.rm = TRUE), .groups = "drop") %>%
  mutate(nivel_aprobado = fct_reorder(nivel_aprobado, total_libros))  

ggplot(df_plot, aes(nivel_aprobado, total_libros, fill = nivel_aprobado)) +
  geom_col() +
  geom_text(aes(label = comma(round(total_libros, 0))),
            hjust = -0.2, size = 3) +     
  coord_flip() +
  scale_y_continuous(expand = expansion(mult = c(0, 0.12))) +  
  guides(fill = "none") +
  labs(title = "Libros leídos por nivel educativo",
       x = "Nivel aprobado", y = "Libros en 12 meses (3 años)")

datos <- datos %>%
  mutate(condicion_actividad = recode(as.character(condicion_actividad),
    "1" = "trabajo para obtener ingresos",
    "2" = "trabajo sin pago",
    "3" = "tenía trabajo",
    "4" = "busca trabajo",
    "5" = "espera de solicitud de trabajo",
    "6" = "estudiante",
    "7" = "quehaceres del hogar",
    "8" = "jubilado(a)/pensionado(a)",
    "9" = "incapacitado(a)",
    "10" = "Otra situación",
    "99" = "No especificado",
    .default = "Otro"
  ))

datos %>%
  filter(!is.na(condicion_actividad)) %>%
  group_by(condicion_actividad) %>%
  summarise(pct = mean(lee_algo, na.rm = TRUE)) %>%
  mutate(condicion_actividad = fct_reorder(condicion_actividad, pct)) %>%
  ggplot(aes(condicion_actividad, pct, fill=condicion_actividad)) +
  geom_col() +
  geom_text(aes(label = percent(pct, 1)), hjust = -0.05, size = 3.5) +
  coord_flip(clip = "off") +
  scale_y_continuous(labels = percent_format(), expand = expansion(mult = c(0, .1))) +
  labs(title = "% que leyó ≥1 libro por condición de actividad",
       x = NULL, y = "Porcentaje")

datos %>%
  ggplot(aes(x = libros_leidos_12m_num)) +
  geom_histogram(bins = 30, fill = "#69b3a2", color = "white") +
  facet_wrap(~ year, scales = "free_y") +
  labs(
    title = "Distribución de libros leídos por año",
    x = "Libros en 12 meses",
    y = "Frecuencia"
  ) +
  theme_minimal(base_size = 13)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

res_nivel <- datos%>%
  group_by(nivel_aprobado) %>%
  summarise(
    n = n(),
    pct_lee = mean(lee_algo, na.rm=TRUE)*100,
    media_lectores   = mean(libros_leidos_12m_num[lee_algo], na.rm=TRUE),
    mediana_lectores = median(libros_leidos_12m_num[lee_algo], na.rm=TRUE)
  )
res_nivel
## # A tibble: 11 × 5
##    nivel_aprobado      n pct_lee media_lectores mediana_lectores
##    <chr>           <int>   <dbl>          <dbl>            <dbl>
##  1 Carrera técnica   241    63.5           2.95              2  
##  2 Doctorado          30    86.7           9.15              4  
##  3 Maestría          152    83.6           5.98              4  
##  4 Ninguno            62    16.1           1.1               1  
##  5 No sabe             8    12.5           1                 1  
##  6 Normal básica      34    52.9           3.89              2.5
##  7 Preescolar          2     0           NaN                NA  
##  8 Preparatoria      492    73.6           3.78              2  
##  9 Primaria          315    53.0           3.32              2  
## 10 Profesional       856    83.4           4.32              3  
## 11 Secundaria        431    61.9           3.18              2
res_act <- datos %>%
  group_by(condicion_actividad) %>%
  summarise(
    n = n(),
    pct_lee = mean(lee_algo, na.rm=TRUE)*100,
    media_lectores   = mean(libros_leidos_12m_num[lee_algo], na.rm=TRUE),
    mediana_lectores = median(libros_leidos_12m_num[lee_algo], na.rm=TRUE)
  )
res_act
## # A tibble: 11 × 5
##    condicion_actividad                n pct_lee media_lectores mediana_lectores
##    <chr>                          <int>   <dbl>          <dbl>            <dbl>
##  1 No especificado                    1   100             1                 1  
##  2 Otra situación                    32    25             7.75              2.5
##  3 busca trabajo                    110    48.2           3.42              2  
##  4 espera de solicitud de trabajo    17    41.2           4.57              5  
##  5 estudiante                       192    87.5           4.52              4  
##  6 incapacitado(a)                   21    19.0           1.5               1  
##  7 jubilado(a)/pensionado(a)        317    61.8           3.48              2  
##  8 quehaceres del hogar             551    65.7           3.13              2  
##  9 tenía trabajo                     46    37.0           2.76              2  
## 10 trabajo para obtener ingresos   1309    77.8           4.37              3  
## 11 trabajo sin pago                  27    37.0           2.7               3
datos %>% filter(nivel_aprobado != "No sabe") %>% 
ggplot(aes(nivel_aprobado, libros_leidos_12m_num, fill=nivel_aprobado)) +
  geom_boxplot(outlier.alpha=.3) + coord_flip() +
  labs(title="Libros en 12m por nivel educativo", x=NULL, y="Libros en 12 meses")
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

datos %>% filter(condicion_actividad != "No especificado") %>% 
ggplot(aes(condicion_actividad, libros_leidos_12m_num,fill=condicion_actividad)) +
  geom_boxplot(outlier.alpha=.3) + coord_flip() +
  labs(title="Libros en 12m por condición de actividad", x=NULL, y="Libros en 12 meses")
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

datos %>%
  group_by(nivel_aprobado) %>%
  filter(nivel_aprobado != "No sabe") %>% 
  summarise(pct = mean(lee_algo, na.rm=TRUE)) %>%
  ggplot(aes(fct_reorder(nivel_aprobado, pct), pct,fill=nivel_aprobado)) +
  geom_col() +
  geom_text(aes(label=percent(pct,1)), hjust=-0.05, size=3.5) +
  coord_flip(clip="off") +
  labs(title="% que leyó ≥1 libro por nivel educativo", x=NULL, y="Porcentaje")

datos %>%
  group_by(condicion_actividad) %>%
  filter(condicion_actividad != "no especificado") %>%
  summarise(pct = mean(lee_algo, na.rm=TRUE)) %>%
  ggplot(aes(fct_reorder(condicion_actividad, pct), pct,fill=condicion_actividad)) +
  geom_col() +
  geom_text(aes(label=percent(pct,1)), hjust=-0.05, size=3.5) +
  coord_flip(clip="off") +
  labs(title="% que leyó ≥1 libro por condición de actividad", x=NULL, y="Porcentaje")

AVANCE 3

datos <- datos %>%
  mutate(
    lee_f = factor(lee_algo, levels = c(FALSE,TRUE), labels = c("No","Sí")),
    educacion_grupo = case_when(
      nivel_aprobado %in% c("Ninguno","Preescolar") ~ "Sin educación",
      nivel_aprobado %in% "Primaria" ~ "Básica",
      nivel_aprobado %in% c("Secundaria","Preparatoria","Normal básica","Carrera técnica") ~ "Media",
      nivel_aprobado %in% c("Profesional","Maestría","Doctorado") ~ "Superior y Posgrado",
      TRUE ~ NA_character_
    ),
    actividad_grupo = case_when(
      condicion_actividad %in% c("trabajo con ingresos","trabajo sin pago","tenía trabajo") ~ "Trabaja",
      condicion_actividad %in% "estudiante" ~ "Estudia",
      condicion_actividad %in% c("busca trabajo","espera trabajo","quehaceres del hogar",
                                 "jubilado/pensionado","incapacitado","otra situación",
                                 "no especificado","Otro") ~ "No trabaja",
      TRUE ~ NA_character_
    ),
  )

theme_set(theme_minimal(base_size = 12))

Nuevas gráficas descriptivas por categorías:

stats_educacion <- datos %>%
  filter(!is.na(educacion_grupo), !is.na(libros_leidos_12m_num)) %>%
  group_by(educacion_grupo) %>%
  summarise(
    media   = mean(libros_leidos_12m_num, na.rm = TRUE),
    mediana = median(libros_leidos_12m_num, na.rm = TRUE),
    n = n()
  )

p_edu <- datos %>%
  filter(!is.na(educacion_grupo), !is.na(libros_leidos_12m_num)) %>%
  ggplot(aes(
    x = fct_reorder(educacion_grupo, libros_leidos_12m_num, .fun = median, .na_rm = TRUE),
    y = libros_leidos_12m_num,
    fill = educacion_grupo
  )) +
  geom_boxplot(outlier.alpha = .3)
p_edu +
  geom_text(
    data = stats_educacion,
    aes(x = educacion_grupo, y = mediana,
        label = paste0("Mediana=", round(mediana, 1))),
    vjust = -1.6, hjust = -3.0, size = 3,
    inherit.aes = FALSE
  ) +
  geom_text(
    data = stats_educacion,
    aes(x = educacion_grupo, y = media,
        label = paste0("Media=", round(media, 1))),
    vjust = 2.5, hjust = -3.0, size = 3, color = "gray20",
    inherit.aes = FALSE
  ) +
  coord_flip() +
  guides(fill = "none") +
  labs(
    title = "Libros en 12 meses por nivel educativo (grupos)",
    x = NULL,
    y = "Libros en 12 meses"
  )

stats_actividad <- datos %>%
  filter(!is.na(actividad_grupo), !is.na(libros_leidos_12m_num)) %>%
  group_by(actividad_grupo) %>%
  summarise(
    media   = mean(libros_leidos_12m_num, na.rm = TRUE),
    mediana = median(libros_leidos_12m_num, na.rm = TRUE),
    n = n()
  )

p <- datos %>%
  filter(!is.na(actividad_grupo), !is.na(libros_leidos_12m_num)) %>%
  ggplot(aes(
    x = fct_reorder(actividad_grupo, libros_leidos_12m_num, .fun = median, .na_rm = TRUE),
    y = libros_leidos_12m_num, fill = actividad_grupo
  )) +
  geom_boxplot(outlier.alpha = .3)

p +
  geom_text(
    data = stats_actividad,
    aes(x = actividad_grupo, y = mediana,
        label = paste0("Mediana=", round(mediana, 1))),
    vjust = -2.6,hjust = -3.0, size = 3,
    inherit.aes = FALSE
  ) +
  geom_text(
    data = stats_actividad,
    aes(x = actividad_grupo, y = media,
        label = paste0("Media=", round(media, 1))),
    vjust = 3.5,hjust = -3.0, size = 3, color = "gray20",
    inherit.aes = FALSE
  ) +
  coord_flip() +
  guides(fill = "none") +
  labs(
    title = "Libros en 12 meses por condición de actividad (grupos)",
    x = NULL, y = "Libros en 12 meses"
  )

datos %>%
  filter(!is.na(educacion_grupo), !is.na(libros_leidos_12m_num)) %>%
  group_by(educacion_grupo) %>%
  summarise(pct = mean(libros_leidos_12m_num > 0, na.rm = TRUE), n = n(), .groups = "drop") %>%
  ggplot(aes(fct_reorder(educacion_grupo, pct), pct, fill = educacion_grupo)) +
  geom_col() +
  geom_text(aes(label = percent(pct, accuracy = 0.1)), hjust = -0.12, size = 3.8) +
  coord_flip(clip = "off") +
  scale_y_continuous(labels = percent_format(),
                     expand = expansion(mult = c(0, 0.12))) +
  guides(fill = "none") +
  labs(title = "% que leyó ≥1 libro por nivel educativo (grupos)",
       x = NULL, y = "Porcentaje")

datos %>%
  filter(!is.na(actividad_grupo), !is.na(libros_leidos_12m_num)) %>%
  group_by(actividad_grupo) %>%
  summarise(pct = mean(libros_leidos_12m_num > 0, na.rm = TRUE), n = n(), .groups = "drop") %>%
  ggplot(aes(fct_reorder(actividad_grupo, pct), pct, fill = actividad_grupo)) +
  geom_col() +
  geom_text(aes(label = percent(pct, accuracy = 0.1)), hjust = -0.12, size = 3.8) +
  coord_flip(clip = "off") +
  scale_y_continuous(labels = percent_format(),
                     expand = expansion(mult = c(0, 0.12))) +
  guides(fill = "none") +
  labs(title = "% que leyó ≥1 libro por condición de actividad (grupos)",
       x = NULL, y = "Porcentaje")

alpha <- 0.04
conf  <- 1 - alpha

Intervalos de confianza

n_por_grupo <- datos %>%
  filter(!is.na(educacion_grupo), !is.na(libros_leidos_12m_num)) %>%
  count(educacion_grupo, name = "n") %>%
  mutate(use_t = n < 30) %>%      
  arrange(n)

n_por_grupo
##       educacion_grupo    n use_t
## 1       Sin educación   64 FALSE
## 2              Básica  315 FALSE
## 3 Superior y Posgrado 1037 FALSE
## 4               Media 1198 FALSE
res_edu <- datos %>%
  filter(!is.na(educacion_grupo), !is.na(libros_leidos_12m_num)) %>%
  group_by(educacion_grupo) %>%
  summarise(
    n = n(),
    media = mean(libros_leidos_12m_num),
    sd = sd(libros_leidos_12m_num),
    se = sd/sqrt(n),
    t_crit = qt(1 - alpha/2,df=n-1),
    IC_li = media - t_crit * se,
    IC_ls = media + t_crit * se,
    .groups = "drop"
  )

print(res_edu)
## # A tibble: 4 × 8
##   educacion_grupo         n media    sd     se t_crit  IC_li IC_ls
##   <chr>               <int> <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 Básica                315 1.76  5.21  0.293    2.06 1.15   2.36 
## 2 Media                1198 2.29  4.39  0.127    2.06 2.03   2.55 
## 3 Sin educación          64 0.172 0.420 0.0525   2.10 0.0618 0.282
## 4 Superior y Posgrado  1037 3.93  5.74  0.178    2.06 3.56   4.30
res_edu$educacion_grupo <- factor(res_edu$educacion_grupo,
  levels = c("Sin educación", "Básica", "Media", "Superior y Posgrado")
)

ggplot(res_edu, aes(x = educacion_grupo, y = media)) +
  geom_point(size=3, color="blue") +
  geom_errorbar(aes(ymin=IC_li, ymax=IC_ls), width=0.2, color="darkblue") +
  geom_text(aes(label = round(media, 2)),
            vjust = -2.0, size = 3) +
  labs(title="Promedio de libros leídos por nivel educativo (96% IC)",
       x="Nivel educativo", y="Media de libros") +
  theme_minimal()

n_por_grupo <- datos %>%
  filter(!is.na(actividad_grupo), !is.na(libros_leidos_12m_num)) %>%
  count(actividad_grupo, name = "n") %>%
  mutate(use_t = n < 30) %>%       # regla práctica: t si n<30
  arrange(n)

n_por_grupo
##   actividad_grupo   n use_t
## 1         Trabaja  73 FALSE
## 2         Estudia 192 FALSE
## 3      No trabaja 661 FALSE
res_act <- datos %>%
  filter(!is.na(actividad_grupo), !is.na(libros_leidos_12m_num)) %>%
  group_by(actividad_grupo) %>%
  summarise(
    n = n(),
    media = mean(libros_leidos_12m_num),
    sd = sd(libros_leidos_12m_num),
    se = sd/sqrt(n),
    t_crit = qt(1 - alpha/2,df=n-1),
    IC_li = media - t_crit * se,
    IC_ls = media + t_crit * se,
    .groups = "drop"
  )

print(res_act)
## # A tibble: 3 × 8
##   actividad_grupo     n media    sd    se t_crit IC_li IC_ls
##   <chr>           <int> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1 Estudia           192  3.95  4.30 0.310   2.07 3.31   4.59
## 2 No trabaja        661  1.99  4.39 0.171   2.06 1.64   2.34
## 3 Trabaja            73  1.01  1.76 0.206   2.09 0.583  1.44
res_act$actividad_grupo <- factor(res_act$actividad_grupo,
  levels = c("No trabaja", "Trabaja","Estudia")
)

ggplot(res_act, aes(x = actividad_grupo, y = media)) +
  geom_point(size=3, color="green") +
  geom_errorbar(aes(ymin=IC_li, ymax=IC_ls), width=0.2, color="darkgreen") +
  geom_text(aes(label = round(media, 2)),
            vjust = -2.0, size = 3) +
  labs(title="Promedio de libros leídos por condición de actividad (96% IC)",
       x="Condición de actividad", y="Media de libros") +
  theme_minimal()

datos <- datos %>%
  filter(!is.na(educacion_grupo))%>%
  mutate(con_educacion = ifelse(educacion_grupo == "Sin educación",
                                "Sin educación", "Con educación"))

datos %>%
  group_by(con_educacion) %>%
  summarise(
    cantidad = n(),
    promedio_libros = mean(libros_leidos_12m_num, na.rm = TRUE),
    desviacion = sd(libros_leidos_12m_num, na.rm = TRUE)
  )
## # A tibble: 2 × 4
##   con_educacion cantidad promedio_libros desviacion
##   <chr>            <int>           <dbl>      <dbl>
## 1 Con educación     2551           2.89       5.16 
## 2 Sin educación       64           0.172      0.420
resultado <- t.test(libros_leidos_12m_num ~ con_educacion,
                    data = datos,
                    alternative = "greater",
                    conf.level=0.96, # Una cola derecha
                    var.equal = FALSE)         # Welch (no asumimos varianzas iguales)

print(resultado)
## 
##  Welch Two Sample t-test
## 
## data:  libros_leidos_12m_num by con_educacion
## t = 23.674, df = 1065.2, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group Con educación and group Sin educación is greater than 0
## 96 percent confidence interval:
##  2.517113      Inf
## sample estimates:
## mean in group Con educación mean in group Sin educación 
##                    2.890196                    0.171875
t_stat <- resultado$statistic    
df <- resultado$parameter   

alpha <- 0.04 
t_crit <- qt(1 - alpha, df)

x <- seq(-25, 25, length=400)
y <- dt(x, df)

plot(x, y, type="l", lwd=2, xlab="t", ylab="Densidad", 
     main="Prueba t de Student (una cola derecha)\nCon educación > Sin educación")

polygon(c(x[x>t_crit], t_crit, max(x)), 
        c(y[x>t_crit], 0, 0), col=rgb(1,0,0,0.2))

abline(v=t_crit, col="red", lwd=2, lty=2)       # Valor crítico
abline(v=t_stat, col="blue", lwd=2)             # t calculado

legend("topright", legend=c("Valor crítico", "Estadístico t"), 
       col=c("red","blue"), lty=2:1, lwd=2)

ic_medias <- datos %>%
  group_by(con_educacion) %>%
  summarise(
    n = n(),
    media = mean(libros_leidos_12m_num, na.rm = TRUE),
    sd = sd(libros_leidos_12m_num, na.rm = TRUE)
  ) %>%
  mutate(
    t_crit = qt(1 - alpha/2, df = n - 1),              
    error = t_crit * sd / sqrt(n),                     
    LI = media - error,
    LS = media + error
  )

ggplot(ic_medias, aes(x = con_educacion, y = media)) +
  geom_point(size = 3, color = "orange") +
  geom_errorbar(aes(ymin = LI, ymax = LS), width = 0.2, color = "orange", lwd = 1) +
  geom_text(aes(label = round(media, 2)),
            vjust = -2.0, size = 3) +
  labs(
    title = "Intervalos de confianza (96%) de libros leídos por grupo educativo",
    x = "Nivel educativo",
    y = "Media de libros leídos"
  ) +
  theme_minimal()

tabla1 <- table(datos$educacion_grupo, datos$lee_f)
tabla2 <- table(datos$actividad_grupo, datos$lee_f)

tabla1
##                      
##                        No  Sí
##   Básica              148 167
##   Media               398 800
##   Sin educación        54  10
##   Superior y Posgrado 171 867
tabla2
##             
##               No  Sí
##   Estudia     24 168
##   No trabaja 245 415
##   Trabaja     46  27
chi1 <- chisq.test(tabla1, correct = FALSE)
chi2 <- chisq.test(tabla2, correct = FALSE)

print(chi1); print(chi2)
## 
##  Pearson's Chi-squared test
## 
## data:  tabla1
## X-squared = 231.71, df = 3, p-value < 2.2e-16
## 
##  Pearson's Chi-squared test
## 
## data:  tabla2
## X-squared = 69.746, df = 2, p-value = 7.16e-16
datos_barras <- datos %>% drop_na(educacion_grupo, actividad_grupo, lee_f)

ggplot(datos_barras, aes(x = educacion_grupo, fill = lee_f)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent) +
  labs(
    title = "Proporción de lectores por nivel educativo",
    x = "Nivel educativo",
    y = "Proporción",
    fill = "Lee"
  ) +
  geom_text(
    aes(
      label = paste0(round((..count..) / tapply(..count.., ..x.., sum)[as.character(..x..)] * 100, 1), "%")
    ),
    stat = "count", position = position_fill(vjust = 0.5), size = 3
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 20, hjust = 1))
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(datos_barras, aes(x = actividad_grupo, fill = lee_f)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent) +
  labs(
    title = "Proporción de lectores por condición de actividad",
    x = "Condición de actividad",
    y = "Proporción",
    fill = "Lee"
  ) +
  geom_text(
    aes(
      label = paste0(round((..count..) / tapply(..count.., ..x.., sum)[as.character(..x..)] * 100, 1), "%")
    ),
    stat = "count", position = position_fill(vjust = 0.5), size = 3
  )  +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 20, hjust = 1))

alpha <- 0.04        
z <- qnorm(1 - alpha/2)  

no_lectores <- datos %>%
  filter(lee_algo == FALSE, !is.na(no_lectura_motivo), no_lectura_motivo != "pase")

ic_motivos <- no_lectores %>%
  count(no_lectura_motivo, name = "x") %>%
  mutate(
    n = sum(x),
    p = x / n,
    error = z * sqrt((p * (1 - p)) / n),
    LI = p - error,
    LS = p + error
  )

ic_motivos
##            no_lectura_motivo  x   n          p      error         LI         LS
## 1            Falta de dinero 15 292 0.05136986 0.02653130 0.02483856 0.07790117
## 2            Falta de tiempo 78 292 0.26712329 0.05317742 0.21394587 0.32030071
## 3        Falta interés/gusto 74 292 0.25342466 0.05227778 0.20114687 0.30570244
## 4 Prefiere otras actividades 56 292 0.19178082 0.04731764 0.14446318 0.23909846
## 5         Problemas de salud 69 292 0.23630137 0.05105638 0.18524499 0.28735775
ic_motivos <- ic_motivos %>%
  arrange(p) %>%
  mutate(no_lectura_motivo = factor(no_lectura_motivo, levels = no_lectura_motivo))

ggplot(ic_motivos, aes(x = no_lectura_motivo, y = p)) +
  geom_col(fill = "steelblue", alpha = 0.8) +  # barras de proporciones
  geom_errorbar(aes(ymin = LI, ymax = LS), width = 0.2, color = "black") +  # IC 96%
  geom_text(aes(label = scales::percent(p, accuracy = 0.1)), 
            vjust = -0.5, size = 3.5) +  
  labs(
    title = "Proporción de motivos de no lectura (con IC del 96%)",
    x = "Motivo principal de no lectura",
    y = "Proporción de personas"
  ) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 30, hjust = 1),
    plot.title = element_text(face = "bold")
  )

tabla3 <- table(datos$educacion_grupo, datos$no_lectura_motivo)
tabla4 <- table(datos$actividad_grupo, datos$no_lectura_motivo)

tabla3
##                      
##                       Falta de dinero Falta de tiempo Falta interés/gusto pase
##   Básica                            4              15                  14  254
##   Media                            10              38                  37 1048
##   Sin educación                     1               5                   7   40
##   Superior y Posgrado               0              20                  16  981
##                      
##                       Prefiere otras actividades Problemas de salud
##   Básica                                      12                 16
##   Media                                       30                 35
##   Sin educación                                3                  8
##   Superior y Posgrado                         11                 10
tabla4
##             
##              Falta de dinero Falta de tiempo Falta interés/gusto pase
##   Estudia                  0               5                   2  183
##   No trabaja               2              24                  26  569
##   Trabaja                  0              11                   5   53
##             
##              Prefiere otras actividades Problemas de salud
##   Estudia                             2                  0
##   No trabaja                         19                 20
##   Trabaja                             2                  2
chi3 <- chisq.test(tabla3, correct = FALSE)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
chi4 <- chisq.test(tabla4, correct = FALSE)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
print(chi3); print(chi4)
## 
##  Pearson's Chi-squared test
## 
## data:  tabla3
## X-squared = 113.57, df = 15, p-value < 2.2e-16
## 
##  Pearson's Chi-squared test
## 
## data:  tabla4
## X-squared = 39.355, df = 10, p-value = 2.2e-05
datos_barras <- datos %>% drop_na(educacion_grupo, actividad_grupo, no_lectura_motivo)

# (a) Nivel educativo vs Lector
datos_barras %>%
  filter(!is.na(educacion_grupo),
         !is.na(no_lectura_motivo),
         no_lectura_motivo != "pase") %>%
  count(educacion_grupo, no_lectura_motivo) %>%   
  group_by(educacion_grupo) %>%
  mutate(prop = n / sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x = educacion_grupo, y = prop, fill = no_lectura_motivo)) +
  geom_col(position = "fill") +
  scale_y_continuous(labels = scales::percent) +
  geom_text(aes(label = scales::percent(prop, accuracy = 1)),
            position = position_fill(vjust = 0.5), size = 3) +
  labs(
    title = "Motivos para NO leer por nivel educativo",
    x = "Nivel educativo",
    y = "Proporción",
    fill = "Motivo"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 20, hjust = 1))

datos_barras %>%
  filter(!is.na(actividad_grupo),
         !is.na(no_lectura_motivo),
         no_lectura_motivo != "pase") %>%
  count(actividad_grupo, no_lectura_motivo) %>%
  group_by(actividad_grupo) %>%
  mutate(prop = n / sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x = actividad_grupo, y = prop, fill = no_lectura_motivo)) +
  geom_col(position = "fill") +
  scale_y_continuous(labels = scales::percent) +
  geom_text(aes(label = scales::percent(prop, accuracy = 1)),
            position = position_fill(vjust = 0.5), size = 3) +
  labs(
    title = "Motivos para NO leer por condición de actividad",
    x = "Condición de actividad",
    y = "Proporción",
    fill = "Motivo"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 20, hjust = 1))

n_inicial <- nrow(molec)
n_final <- nrow(datos)
descartados <- n_inicial - n_final
porcentaje_descartados <- (descartados / n_inicial) * 100
cat("Porcentaje de datos descartados:", round(porcentaje_descartados, 2), "%")
## Porcentaje de datos descartados: 56.72 %