library(readr)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(purrr)
## Warning: package 'purrr' was built under R version 4.4.3
library(stringr)
library(janitor)
## Warning: package 'janitor' was built under R version 4.4.3
##
## Adjuntando el paquete: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ ggplot2 3.5.2 ✔ tidyr 1.3.1
## ✔ lubridate 1.9.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(forcats)
library(scales)
## Warning: package 'scales' was built under R version 4.4.3
##
## Adjuntando el paquete: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
m20 <- read.csv("Datos_molec_2020-1.csv")
m22 <- read.csv("Datos_molec_2022-1.csv")
m24<- read.csv("Datos_molec_2024-1.csv")
# Año 2020
str(m20)
## 'data.frame': 2010 obs. of 108 variables:
## $ folio : chr "11A192" "11A192" "11A192" "12A192" ...
## $ entidad : int 1 1 1 1 1 1 1 1 1 1 ...
## $ control : int 40060 40060 40060 40091 40091 40131 40131 40131 40131 40132 ...
## $ viv_sel : int 1 3 2 4 1 2 3 4 1 1 ...
## $ num_hog : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hog_mud : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_ren_el: int 1 4 2 1 2 1 1 1 1 1 ...
## $ cd : int 14 14 14 14 14 14 14 14 14 14 ...
## $ periodo : int 220 220 220 220 220 220 220 220 220 220 ...
## $ sexo : int 1 2 2 1 2 2 1 1 1 1 ...
## $ edad : int 48 41 55 55 60 48 67 40 43 68 ...
## $ anio : int 6 3 3 5 3 3 3 3 2 6 ...
## $ nivel : int 2 3 3 7 6 3 6 3 2 2 ...
## $ cond_act: int 1 7 1 1 7 1 1 1 1 8 ...
## $ p1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ p2 : int 1 2 2 1 1 1 1 1 2 1 ...
## $ p3_1 : int 1 2 2 1 1 2 2 2 2 1 ...
## $ p3_2 : int 1 2 2 2 2 1 2 2 2 2 ...
## $ p3_3 : int 1 2 2 1 2 2 1 1 2 2 ...
## $ p3_4 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p3_5 : int 1 1 2 1 2 1 2 1 2 2 ...
## $ p4 : int 1 0 0 2 2 0 0 0 0 1 ...
## $ p5 : int 4 0 0 4 4 0 0 0 0 4 ...
## $ p5_6esp : chr "" "" "" "" ...
## $ p6_1 : int 2 0 0 2 2 0 0 0 0 2 ...
## $ p6_2 : int 2 0 0 2 2 0 0 0 0 2 ...
## $ p6_3 : int 2 0 0 2 1 0 0 0 0 2 ...
## $ p6_4 : int 1 0 0 1 2 0 0 0 0 1 ...
## $ p6_5 : int 2 0 0 2 2 0 0 0 0 2 ...
## $ p6_6 : int 2 0 0 2 2 0 0 0 0 2 ...
## $ p6_6esp : chr "" "" "" "" ...
## $ p7 : int 2 0 0 2 2 0 0 0 0 2 ...
## $ p7_3 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p8_1 : int 2 0 0 2 2 0 0 0 0 2 ...
## $ p8_2 : int 1 0 0 1 1 0 0 0 0 1 ...
## $ p9 : int 1 0 0 1 1 0 0 0 0 1 ...
## $ p9_5esp : chr "" "" "" "" ...
## $ p10 : int 2 0 0 0 0 2 0 0 0 0 ...
## $ p11 : int 4 0 0 0 0 4 0 0 0 0 ...
## $ p11_6esp: chr "" "" "" "" ...
## $ p12_1 : int 2 0 0 0 0 2 0 0 0 0 ...
## $ p12_2 : int 2 0 0 0 0 2 0 0 0 0 ...
## $ p12_3 : int 2 0 0 0 0 2 0 0 0 0 ...
## $ p12_4 : int 1 0 0 0 0 1 0 0 0 0 ...
## $ p12_5 : int 2 0 0 0 0 2 0 0 0 0 ...
## $ p12_6 : int 2 0 0 0 0 2 0 0 0 0 ...
## $ p12_7 : int 1 0 0 0 0 2 0 0 0 0 ...
## $ p12_8 : int 2 0 0 0 0 1 0 0 0 0 ...
## $ p12_9 : int 2 0 0 0 0 2 0 0 0 0 ...
## $ p12_9esp: chr "" "" "" "" ...
## $ p13 : int 2 0 0 0 0 2 0 0 0 0 ...
## $ p13_3 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p14_1 : int 2 0 0 0 0 2 0 0 0 0 ...
## $ p14_2 : int 1 0 0 0 0 1 0 0 0 0 ...
## $ p15 : int 1 0 0 0 0 2 0 0 0 0 ...
## $ p15_5esp: chr "" "" "" "" ...
## $ p16 : int 1 0 0 1 0 0 1 1 0 0 ...
## $ p17 : int 4 0 0 4 0 0 3 4 0 0 ...
## $ p17_6esp: chr "" "" "" "" ...
## $ p18_1 : int 2 0 0 1 0 0 1 2 0 0 ...
## $ p18_2 : int 1 0 0 1 0 0 1 1 0 0 ...
## $ p18_3 : int 2 0 0 1 0 0 2 2 0 0 ...
## $ p18_4 : int 1 0 0 1 0 0 2 2 0 0 ...
## $ p18_5 : int 1 0 0 1 0 0 1 1 0 0 ...
## $ p19 : int 2 0 0 2 0 0 2 2 0 0 ...
## $ p19_3 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p20_1 : int 2 0 0 2 0 0 2 2 0 0 ...
## $ p20_2 : int 1 0 0 1 0 0 1 1 0 0 ...
## $ p21 : int 2 0 0 2 0 0 2 2 0 0 ...
## $ p21_5esp: chr "" "" "" "" ...
## $ p22 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p23_1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p23_2 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p24 : int 5 5 0 2 0 3 0 1 0 0 ...
## $ p25 : int 4 4 0 4 0 4 0 4 0 0 ...
## $ p25_6esp: chr "" "" "" "" ...
## $ p26 : int 30 10 0 20 20 30 20 10 0 60 ...
## $ p27 : int 2 2 0 2 2 2 1 2 0 2 ...
## $ p28 : int 0 0 0 0 0 0 6 0 0 0 ...
## $ p28_7esp: chr "" "" "" "" ...
## $ p29 : int 2 2 0 2 2 4 3 4 0 3 ...
## $ p30 : int 3 3 0 3 3 4 3 4 0 3 ...
## $ p31 : int 2 2 0 1 2 2 2 2 0 2 ...
## $ p32 : int 0 0 3 0 0 0 0 0 3 0 ...
## $ p32_6esp: chr "" "" "" "" ...
## $ p33_1 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p33_2 : int 2 2 2 2 2 2 1 2 2 2 ...
## $ p33_3 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p33_4 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p34_1 : int 2 2 2 1 2 3 3 3 3 1 ...
## $ p34_2 : int 2 2 2 1 2 3 3 1 3 1 ...
## $ p34_3 : int 2 2 2 2 2 1 3 3 3 1 ...
## $ p34_3_1 : int 0 0 0 0 0 2 0 0 0 1 ...
## $ p34_4 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ p34_4_1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ p35 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ p36_1 : int 1 1 2 1 2 1 3 3 3 3 ...
## $ p36_2 : int 1 1 1 1 2 1 3 3 3 3 ...
## $ p36_3 : int 1 1 1 1 2 1 1 1 3 1 ...
## [list output truncated]
names(m20)
## [1] "folio" "entidad" "control" "viv_sel" "num_hog" "hog_mud"
## [7] "n_ren_el" "cd" "periodo" "sexo" "edad" "anio"
## [13] "nivel" "cond_act" "p1" "p2" "p3_1" "p3_2"
## [19] "p3_3" "p3_4" "p3_5" "p4" "p5" "p5_6esp"
## [25] "p6_1" "p6_2" "p6_3" "p6_4" "p6_5" "p6_6"
## [31] "p6_6esp" "p7" "p7_3" "p8_1" "p8_2" "p9"
## [37] "p9_5esp" "p10" "p11" "p11_6esp" "p12_1" "p12_2"
## [43] "p12_3" "p12_4" "p12_5" "p12_6" "p12_7" "p12_8"
## [49] "p12_9" "p12_9esp" "p13" "p13_3" "p14_1" "p14_2"
## [55] "p15" "p15_5esp" "p16" "p17" "p17_6esp" "p18_1"
## [61] "p18_2" "p18_3" "p18_4" "p18_5" "p19" "p19_3"
## [67] "p20_1" "p20_2" "p21" "p21_5esp" "p22" "p23_1"
## [73] "p23_2" "p24" "p25" "p25_6esp" "p26" "p27"
## [79] "p28" "p28_7esp" "p29" "p30" "p31" "p32"
## [85] "p32_6esp" "p33_1" "p33_2" "p33_3" "p33_4" "p34_1"
## [91] "p34_2" "p34_3" "p34_3_1" "p34_4" "p34_4_1" "p35"
## [97] "p36_1" "p36_2" "p36_3" "p36_4" "factor" "h_lec"
## [103] "mat_lec" "perslec" "l_format" "r_format" "p_format" "perslecl"
head(m20)
## folio entidad control viv_sel num_hog hog_mud n_ren_el cd periodo sexo edad
## 1 11A192 1 40060 1 1 0 1 14 220 1 48
## 2 11A192 1 40060 3 1 0 4 14 220 2 41
## 3 11A192 1 40060 2 1 0 2 14 220 2 55
## 4 12A192 1 40091 4 1 0 1 14 220 1 55
## 5 12A192 1 40091 1 1 0 2 14 220 2 60
## 6 11B176 1 40131 2 1 0 1 14 220 2 48
## anio nivel cond_act p1 p2 p3_1 p3_2 p3_3 p3_4 p3_5 p4 p5 p5_6esp p6_1 p6_2
## 1 6 2 1 1 1 1 1 1 2 1 1 4 2 2
## 2 3 3 7 1 2 2 2 2 2 1 0 0 0 0
## 3 3 3 1 1 2 2 2 2 2 2 0 0 0 0
## 4 5 7 1 1 1 1 2 1 2 1 2 4 2 2
## 5 3 6 7 1 1 1 2 2 2 2 2 4 2 2
## 6 3 3 1 1 1 2 1 2 2 1 0 0 0 0
## p6_3 p6_4 p6_5 p6_6 p6_6esp p7 p7_3 p8_1 p8_2 p9 p9_5esp p10 p11 p11_6esp
## 1 2 1 2 2 2 0 2 1 1 2 4
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 2 1 2 2 2 0 2 1 1 0 0
## 5 1 2 2 2 2 0 2 1 1 0 0
## 6 0 0 0 0 0 0 0 0 0 2 4
## p12_1 p12_2 p12_3 p12_4 p12_5 p12_6 p12_7 p12_8 p12_9 p12_9esp p13 p13_3
## 1 2 2 2 1 2 2 1 2 2 2 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 2 2 2 1 2 2 2 1 2 2 0
## p14_1 p14_2 p15 p15_5esp p16 p17 p17_6esp p18_1 p18_2 p18_3 p18_4 p18_5 p19
## 1 2 1 1 1 4 2 1 2 1 1 2
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 1 4 1 1 1 1 1 2
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 2 1 2 0 0 0 0 0 0 0 0
## p19_3 p20_1 p20_2 p21 p21_5esp p22 p23_1 p23_2 p24 p25 p25_6esp p26 p27 p28
## 1 0 2 1 2 0 0 0 5 4 30 2 0
## 2 0 0 0 0 0 0 0 5 4 10 2 0
## 3 0 0 0 0 0 0 0 0 0 0 0 0
## 4 0 2 1 2 0 0 0 2 4 20 2 0
## 5 0 0 0 0 0 0 0 0 0 20 2 0
## 6 0 0 0 0 0 0 0 3 4 30 2 0
## p28_7esp p29 p30 p31 p32 p32_6esp p33_1 p33_2 p33_3 p33_4 p34_1 p34_2 p34_3
## 1 2 3 2 0 2 2 2 2 2 2 2
## 2 2 3 2 0 2 2 2 2 2 2 2
## 3 0 0 0 3 2 2 2 2 2 2 2
## 4 2 3 1 0 2 2 2 2 1 1 2
## 5 2 3 2 0 2 2 2 2 2 2 2
## 6 4 4 2 0 2 2 2 2 3 3 1
## p34_3_1 p34_4 p34_4_1 p35 p36_1 p36_2 p36_3 p36_4 factor h_lec mat_lec
## 1 0 1 1 1 1 1 1 2 17463 1 2
## 2 0 1 1 1 1 1 1 1 37353 3 3
## 3 0 1 1 1 2 1 1 2 20668 4 4
## 4 0 1 1 1 1 1 1 1 30309 1 2
## 5 0 1 1 1 2 2 2 2 13886 1 1
## 6 2 1 1 1 1 1 1 1 33374 1 3
## perslec l_format r_format p_format perslecl
## 1 1 2 2 2 1
## 2 1 0 0 0 2
## 3 2 0 0 0 2
## 4 1 2 0 2 1
## 5 1 2 0 0 1
## 6 1 0 2 0 1
#View(m20)
dim(m20)
## [1] 2010 108
summary(m20)
## folio entidad control viv_sel num_hog
## Length:2010 Min. : 1.0 Min. :40025 Min. :1.000 Min. :1
## Class :character 1st Qu.: 9.0 1st Qu.:40153 1st Qu.:2.000 1st Qu.:1
## Mode :character Median :15.0 Median :40248 Median :3.000 Median :1
## Mean :15.6 Mean :40307 Mean :2.517 Mean :1
## 3rd Qu.:20.0 3rd Qu.:40398 3rd Qu.:4.000 3rd Qu.:1
## Max. :32.0 Max. :41420 Max. :4.000 Max. :1
##
## hog_mud n_ren_el cd periodo
## Min. :0.00000 Min. : 1.000 Min. : 1.00 Min. :220
## 1st Qu.:0.00000 1st Qu.: 1.000 1st Qu.: 2.00 1st Qu.:220
## Median :0.00000 Median : 1.000 Median : 9.00 Median :220
## Mean :0.04726 Mean : 1.734 Mean :14.01 Mean :220
## 3rd Qu.:0.00000 3rd Qu.: 2.000 3rd Qu.:25.00 3rd Qu.:220
## Max. :2.00000 Max. :10.000 Max. :43.00 Max. :220
##
## sexo edad anio nivel
## Min. :1.000 Min. :18.00 Min. :1.00 Min. : 0.000
## 1st Qu.:1.000 1st Qu.:31.00 1st Qu.:3.00 1st Qu.: 3.000
## Median :2.000 Median :44.00 Median :3.00 Median : 4.000
## Mean :1.552 Mean :45.49 Mean :3.47 Mean : 4.418
## 3rd Qu.:2.000 3rd Qu.:58.00 3rd Qu.:4.00 3rd Qu.: 7.000
## Max. :2.000 Max. :94.00 Max. :6.00 Max. :99.000
## NA's :65
## cond_act p1 p2 p3_1
## Min. : 1.000 Min. :1.000 Min. :0.000 Min. :0.000
## 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median : 1.000 Median :1.000 Median :1.000 Median :2.000
## Mean : 3.289 Mean :1.026 Mean :1.367 Mean :1.545
## 3rd Qu.: 7.000 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :99.000 Max. :2.000 Max. :2.000 Max. :2.000
##
## p3_2 p3_3 p3_4 p3_5
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.638 Mean :1.655 Mean :1.904 Mean :1.588
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
##
## p4 p5 p5_6esp p6_1
## Min. : 0.000 Min. :0.00 Length:2010 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.:0.00 Class :character 1st Qu.:0.0000
## Median : 0.000 Median :0.00 Mode :character Median :0.0000
## Mean : 1.428 Mean :1.33 Mean :0.7781
## 3rd Qu.: 2.000 3rd Qu.:3.00 3rd Qu.:2.0000
## Max. :60.000 Max. :6.00 Max. :2.0000
##
## p6_2 p6_3 p6_4 p6_5
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.6801 Mean :0.6861 Mean :0.6363 Mean :0.696
## 3rd Qu.:2.0000 3rd Qu.:2.0000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :2.0000 Max. :2.0000 Max. :2.0000 Max. :2.000
##
## p6_6 p6_6esp p7 p7_3
## Min. :0.000 Length:2010 Min. :0.0000 Min. : 0
## 1st Qu.:0.000 Class :character 1st Qu.:0.0000 1st Qu.: 0
## Median :0.000 Mode :character Median :0.0000 Median : 0
## Mean :0.805 Mean :0.9224 Mean : 1631
## 3rd Qu.:2.000 3rd Qu.:2.0000 3rd Qu.: 0
## Max. :2.000 Max. :3.0000 Max. :999999
##
## p8_1 p8_2 p9 p9_5esp
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Length:2010
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 Class :character
## Median :0.0000 Median :0.0000 Median :0.0000 Mode :character
## Mean :0.7473 Mean :0.4468 Mean :0.4905
## 3rd Qu.:2.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :2.0000 Max. :2.0000 Max. :5.0000
##
## p10 p11 p11_6esp p12_1
## Min. : 0.00 Min. :0.000 Length:2010 Min. :0.000
## 1st Qu.: 0.00 1st Qu.:0.000 Class :character 1st Qu.:0.000
## Median : 0.00 Median :0.000 Mode :character Median :0.000
## Mean : 1.12 Mean :1.077 Mean :0.594
## 3rd Qu.: 2.00 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :90.00 Max. :6.000 Max. :2.000
##
## p12_2 p12_3 p12_4 p12_5
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.5582 Mean :0.5697 Mean :0.5348 Mean :0.595
## 3rd Qu.:1.0000 3rd Qu.:2.0000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :2.0000 Max. :2.0000 Max. :2.0000 Max. :2.000
##
## p12_6 p12_7 p12_8 p12_9
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.5473 Mean :0.5443 Mean :0.5209 Mean :0.6194
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:2.0000
## Max. :2.0000 Max. :2.0000 Max. :2.0000 Max. :2.0000
##
## p12_9esp p13 p13_3 p14_1
## Length:2010 Min. :0.0000 Min. : 0.00 Min. :0.0000
## Class :character 1st Qu.:0.0000 1st Qu.: 0.00 1st Qu.:0.0000
## Mode :character Median :0.0000 Median : 0.00 Median :0.0000
## Mean :0.7144 Mean : 19.04 Mean :0.5841
## 3rd Qu.:2.0000 3rd Qu.: 0.00 3rd Qu.:2.0000
## Max. :3.0000 Max. :1200.00 Max. :2.0000
##
## p14_2 p15 p15_5esp p16
## Min. :0.0000 Min. :0.0000 Length:2010 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 Class :character 1st Qu.: 0.000
## Median :0.0000 Median :0.0000 Mode :character Median : 0.000
## Mean :0.3428 Mean :0.4005 Mean : 1.077
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 1.000
## Max. :2.0000 Max. :5.0000 Max. :80.000
##
## p17 p17_6esp p18_1 p18_2
## Min. :0.0000 Length:2010 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 Class :character 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Mode :character Median :0.0000 Median :0.000
## Mean :0.9662 Mean :0.3721 Mean :0.392
## 3rd Qu.:3.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :6.0000 Max. :2.0000 Max. :2.000
##
## p18_3 p18_4 p18_5 p19
## Min. :0.00 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.39 Mean :0.4239 Mean :0.3363 Mean :0.7065
## 3rd Qu.:1.00 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:2.0000
## Max. :2.00 Max. :2.0000 Max. :2.0000 Max. :3.0000
##
## p19_3 p20_1 p20_2 p21
## Min. : 0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 0.000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean : 6.793 Mean :0.5567 Mean :0.3184 Mean :0.4194
## 3rd Qu.: 0.000 3rd Qu.:2.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1300.000 Max. :2.0000 Max. :2.0000 Max. :5.0000
##
## p21_5esp p22 p23_1 p23_2
## Length:2010 Min. :0.0000 Min. :0.00000 Min. :0.00000
## Class :character 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Mode :character Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.1433 Mean :0.07413 Mean :0.05672
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :5.0000 Max. :2.00000 Max. :2.00000
##
## p24 p25 p25_6esp p26
## Min. :0.0000 Min. :0.000 Length:2010 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:0.000 Class :character 1st Qu.: 0.00
## Median :0.0000 Median :0.000 Mode :character Median : 20.00
## Mean :0.7697 Mean :1.077 Mean : 28.25
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.: 40.00
## Max. :5.0000 Max. :6.000 Max. :480.00
##
## p27 p28 p28_7esp p29
## Min. :0.000 Min. :0.0000 Length:2010 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.0000 Class :character 1st Qu.:0.000
## Median :2.000 Median :0.0000 Mode :character Median :2.000
## Mean :1.286 Mean :0.6209 Mean :1.748
## 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:3.000
## Max. :2.000 Max. :7.0000 Max. :4.000
##
## p30 p31 p32 p32_6esp
## Min. :0.000 Min. :0.000 Min. :0.0000 Length:2010
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000 Class :character
## Median :3.000 Median :1.000 Median :0.0000 Mode :character
## Mean :2.125 Mean :1.096 Mean :0.7299
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :4.000 Max. :2.000 Max. :6.0000
##
## p33_1 p33_2 p33_3 p33_4
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.749 Mean :1.819 Mean :1.864 Mean :1.779
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
##
## p34_1 p34_2 p34_3 p34_3_1
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.0000
## Median :2.000 Median :1.000 Median :2.000 Median :0.0000
## Mean :1.682 Mean :1.456 Mean :1.625 Mean :0.5652
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :3.000 Max. :3.000 Max. :3.000 Max. :3.0000
##
## p34_4 p34_4_1 p35 p36_1
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:1.0000 1st Qu.:1.000
## Median :1.000 Median :1.000 Median :1.0000 Median :1.000
## Mean :1.412 Mean :1.021 Mean :0.9891 Mean :1.439
## 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :3.000 Max. :6.000 Max. :2.0000 Max. :3.000
##
## p36_2 p36_3 p36_4 factor h_lec
## Min. :0.00 Min. :0.000 Min. :0.000 Min. : 2976 Min. :0.000
## 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:12684 1st Qu.:1.000
## Median :1.00 Median :1.000 Median :1.000 Median :17143 Median :1.000
## Mean :1.21 Mean :1.321 Mean :1.402 Mean :19353 Mean :2.032
## 3rd Qu.:1.00 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:24472 3rd Qu.:3.000
## Max. :3.00 Max. :3.000 Max. :3.000 Max. :84991 Max. :4.000
##
## mat_lec perslec l_format r_format
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.000
## Median :3.000 Median :1.000 Median :0.0000 Median :0.000
## Mean :2.712 Mean :1.246 Mean :0.7801 Mean :0.593
## 3rd Qu.:4.000 3rd Qu.:2.000 3rd Qu.:2.0000 3rd Qu.:2.000
## Max. :4.000 Max. :2.000 Max. :3.0000 Max. :3.000
##
## p_format perslecl
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:1.000
## Median :0.0000 Median :1.000
## Mean :0.5677 Mean :1.305
## 3rd Qu.:2.0000 3rd Qu.:2.000
## Max. :3.0000 Max. :2.000
##
str(m22)
## 'data.frame': 2016 obs. of 108 variables:
## $ folio : chr "12A207" "12A207" "12A207" "12A207" ...
## $ entidad : int 1 1 1 1 1 1 1 1 1 1 ...
## $ control : int 40007 40007 40007 40007 40048 40048 40048 40048 40085 40085 ...
## $ viv_sel : int 4 2 1 3 1 2 3 4 4 1 ...
## $ num_hog : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hog_mud : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_ren_el: int 1 2 1 2 6 2 1 1 1 2 ...
## $ cd : int 14 14 14 14 14 14 14 14 14 14 ...
## $ periodo : int 222 222 222 222 222 222 222 222 222 222 ...
## $ sexo : int 1 2 1 2 2 2 1 1 1 2 ...
## $ edad : int 51 73 52 35 28 41 44 69 45 23 ...
## $ nivel : int 6 2 3 3 4 2 3 2 4 4 ...
## $ anio : int 3 6 3 3 3 6 3 6 3 3 ...
## $ cond_act: int 1 7 1 1 7 7 1 1 1 7 ...
## $ p1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ p2 : int 2 2 2 2 2 1 2 2 1 1 ...
## $ p3_1 : int 2 2 2 2 2 2 2 2 1 2 ...
## $ p3_2 : int 2 2 2 2 2 2 2 2 1 2 ...
## $ p3_3 : int 2 2 2 2 2 2 2 2 1 2 ...
## $ p3_4 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p3_5 : int 1 2 1 1 1 2 2 2 1 1 ...
## $ p4 : int 0 0 0 0 0 0 0 0 3 0 ...
## $ p5 : int 0 0 0 0 0 0 0 0 4 0 ...
## $ p5_6esp : chr "" "" "" "" ...
## $ p6_1 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p6_2 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p6_3 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p6_4 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p6_5 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p6_6 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p6_6esp : chr "" "" "" "" ...
## $ p7 : int 0 0 0 0 0 0 0 0 3 0 ...
## $ p7_3 : int 0 0 0 0 0 0 0 0 600 0 ...
## $ p8_1 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p8_2 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p9 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p9_5esp : chr "" "" "" "" ...
## $ p10 : int 0 0 0 0 0 0 0 0 10 0 ...
## $ p11 : int 0 0 0 0 0 0 0 0 4 0 ...
## $ p11_6esp: chr "" "" "" "" ...
## $ p12_1 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p12_2 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_3 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_4 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_5 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_6 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_7 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_8 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p12_9 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_9esp: chr "" "" "" "" ...
## $ p13 : int 0 0 0 0 0 0 0 0 3 0 ...
## $ p13_3 : int 0 0 0 0 0 0 0 0 150 0 ...
## $ p14_1 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p14_2 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p15 : int 0 0 0 0 0 0 0 0 4 0 ...
## $ p15_5esp: chr "" "" "" "" ...
## $ p16 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p17 : int 0 0 0 0 0 0 0 0 4 0 ...
## $ p17_6esp: chr "" "" "" "" ...
## $ p18_1 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p18_2 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p18_3 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p18_4 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p18_5 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p19 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p19_3 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p20_1 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p20_2 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p21 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p21_5esp: chr "" "" "" "" ...
## $ p22 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p23_1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p23_2 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p24 : int 5 0 5 2 5 0 0 0 1 1 ...
## $ p25 : int 4 0 4 2 2 0 0 0 4 4 ...
## $ p25_6esp: chr "" "" "" "" ...
## $ p26 : int 20 0 15 20 20 0 0 0 20 60 ...
## $ p27 : int 2 0 2 2 2 0 0 0 2 2 ...
## $ p28 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p28_7esp: chr "" "" "" "" ...
## $ p29 : int 2 0 2 3 2 0 0 0 3 2 ...
## $ p30 : int 3 0 3 3 3 0 0 0 4 3 ...
## $ p31 : int 2 0 2 2 2 0 0 0 2 2 ...
## $ p32 : int 0 1 0 0 0 1 3 5 0 0 ...
## $ p32_6esp: chr "" "" "" "" ...
## $ p33_1 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p33_2 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p33_3 : int 2 2 2 2 2 2 2 2 1 2 ...
## $ p33_4 : int 2 2 2 1 2 2 2 2 2 2 ...
## $ p34_1 : int 2 2 2 1 2 2 2 2 1 1 ...
## $ p34_2 : int 1 2 2 2 2 2 2 2 1 1 ...
## $ p34_3 : int 1 2 2 2 2 2 2 2 1 1 ...
## $ p34_3_1 : int 2 0 0 0 0 0 0 0 1 2 ...
## $ p34_4 : int 1 1 1 2 1 1 1 2 1 1 ...
## $ p34_4_1 : int 1 1 1 0 1 1 1 0 1 1 ...
## $ p35 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ p36_1 : int 1 1 1 1 1 1 1 2 1 1 ...
## $ p36_2 : int 1 1 1 1 1 1 1 2 1 1 ...
## $ p36_3 : int 1 1 1 1 1 1 1 2 1 1 ...
## [list output truncated]
names(m22)
## [1] "folio" "entidad" "control" "viv_sel" "num_hog" "hog_mud"
## [7] "n_ren_el" "cd" "periodo" "sexo" "edad" "nivel"
## [13] "anio" "cond_act" "p1" "p2" "p3_1" "p3_2"
## [19] "p3_3" "p3_4" "p3_5" "p4" "p5" "p5_6esp"
## [25] "p6_1" "p6_2" "p6_3" "p6_4" "p6_5" "p6_6"
## [31] "p6_6esp" "p7" "p7_3" "p8_1" "p8_2" "p9"
## [37] "p9_5esp" "p10" "p11" "p11_6esp" "p12_1" "p12_2"
## [43] "p12_3" "p12_4" "p12_5" "p12_6" "p12_7" "p12_8"
## [49] "p12_9" "p12_9esp" "p13" "p13_3" "p14_1" "p14_2"
## [55] "p15" "p15_5esp" "p16" "p17" "p17_6esp" "p18_1"
## [61] "p18_2" "p18_3" "p18_4" "p18_5" "p19" "p19_3"
## [67] "p20_1" "p20_2" "p21" "p21_5esp" "p22" "p23_1"
## [73] "p23_2" "p24" "p25" "p25_6esp" "p26" "p27"
## [79] "p28" "p28_7esp" "p29" "p30" "p31" "p32"
## [85] "p32_6esp" "p33_1" "p33_2" "p33_3" "p33_4" "p34_1"
## [91] "p34_2" "p34_3" "p34_3_1" "p34_4" "p34_4_1" "p35"
## [97] "p36_1" "p36_2" "p36_3" "p36_4" "factor" "h_lec"
## [103] "mat_lec" "perslec" "l_format" "r_format" "p_format" "perslecl"
head(m22)
## folio entidad control viv_sel num_hog hog_mud n_ren_el cd periodo sexo edad
## 1 12A207 1 40007 4 1 0 1 14 222 1 51
## 2 12A207 1 40007 2 1 0 2 14 222 2 73
## 3 12A207 1 40007 1 1 0 1 14 222 1 52
## 4 12A207 1 40007 3 1 0 2 14 222 2 35
## 5 11B193 1 40048 1 1 0 6 14 222 2 28
## 6 11B193 1 40048 2 1 0 2 14 222 2 41
## nivel anio cond_act p1 p2 p3_1 p3_2 p3_3 p3_4 p3_5 p4 p5 p5_6esp p6_1 p6_2
## 1 6 3 1 1 2 2 2 2 2 1 0 0 0 0
## 2 2 6 7 1 2 2 2 2 2 2 0 0 0 0
## 3 3 3 1 1 2 2 2 2 2 1 0 0 0 0
## 4 3 3 1 1 2 2 2 2 2 1 0 0 0 0
## 5 4 3 7 1 2 2 2 2 2 1 0 0 0 0
## 6 2 6 7 1 1 2 2 2 2 2 0 0 0 0
## p6_3 p6_4 p6_5 p6_6 p6_6esp p7 p7_3 p8_1 p8_2 p9 p9_5esp p10 p11 p11_6esp
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## p12_1 p12_2 p12_3 p12_4 p12_5 p12_6 p12_7 p12_8 p12_9 p12_9esp p13 p13_3
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## p14_1 p14_2 p15 p15_5esp p16 p17 p17_6esp p18_1 p18_2 p18_3 p18_4 p18_5 p19
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## p19_3 p20_1 p20_2 p21 p21_5esp p22 p23_1 p23_2 p24 p25 p25_6esp p26 p27 p28
## 1 0 0 0 0 0 0 0 5 4 20 2 0
## 2 0 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 5 4 15 2 0
## 4 0 0 0 0 0 0 0 2 2 20 2 0
## 5 0 0 0 0 0 0 0 5 2 20 2 0
## 6 0 0 0 0 0 0 0 0 0 0 0 0
## p28_7esp p29 p30 p31 p32 p32_6esp p33_1 p33_2 p33_3 p33_4 p34_1 p34_2 p34_3
## 1 2 3 2 0 2 2 2 2 2 1 1
## 2 0 0 0 1 2 2 2 2 2 2 2
## 3 2 3 2 0 2 2 2 2 2 2 2
## 4 3 3 2 0 2 2 2 1 1 2 2
## 5 2 3 2 0 2 2 2 2 2 2 2
## 6 0 0 0 1 2 2 2 2 2 2 2
## p34_3_1 p34_4 p34_4_1 p35 p36_1 p36_2 p36_3 p36_4 factor h_lec mat_lec
## 1 2 1 1 1 1 1 1 2 25403 3 3
## 2 0 1 1 1 1 1 1 2 11406 4 4
## 3 0 1 1 1 1 1 1 2 19052 3 3
## 4 0 2 0 1 1 1 1 1 20172 3 3
## 5 0 1 1 1 1 1 1 1 41647 3 3
## 6 0 1 1 1 1 1 1 2 14875 2 4
## perslec l_format r_format p_format perslecl
## 1 1 0 0 0 2
## 2 2 0 0 0 2
## 3 1 0 0 0 2
## 4 1 0 0 0 2
## 5 1 0 0 0 2
## 6 2 0 0 0 2
#View(m22)
dim(m22)
## [1] 2016 108
summary(m22)
## folio entidad control viv_sel num_hog
## Length:2016 Min. : 1.00 Min. :22251 Min. :1.000 Min. :1
## Class :character 1st Qu.: 9.00 1st Qu.:40087 1st Qu.:1.000 1st Qu.:1
## Mode :character Median :15.00 Median :40211 Median :2.000 Median :1
## Mean :15.65 Mean :40227 Mean :2.475 Mean :1
## 3rd Qu.:20.00 3rd Qu.:40348 3rd Qu.:3.000 3rd Qu.:1
## Max. :32.00 Max. :41398 Max. :4.000 Max. :1
##
## hog_mud n_ren_el cd periodo
## Min. :0.00000 Min. : 1.000 Min. : 1.00 Min. :222
## 1st Qu.:0.00000 1st Qu.: 1.000 1st Qu.: 2.00 1st Qu.:222
## Median :0.00000 Median : 1.000 Median : 9.00 Median :222
## Mean :0.04911 Mean : 1.684 Mean :13.65 Mean :222
## 3rd Qu.:0.00000 3rd Qu.: 2.000 3rd Qu.:25.00 3rd Qu.:222
## Max. :3.00000 Max. :10.000 Max. :43.00 Max. :222
##
## sexo edad nivel anio
## Min. :1.00 Min. :18.00 Min. : 0.000 Min. :1.00
## 1st Qu.:1.00 1st Qu.:32.00 1st Qu.: 3.000 1st Qu.:3.00
## Median :2.00 Median :44.00 Median : 4.000 Median :3.00
## Mean :1.56 Mean :45.88 Mean : 4.655 Mean :3.47
## 3rd Qu.:2.00 3rd Qu.:58.00 3rd Qu.: 7.000 3rd Qu.:4.00
## Max. :2.00 Max. :97.00 Max. :99.000 Max. :6.00
## NA's :39
## cond_act p1 p2 p3_1
## Min. : 1.000 Min. :1.000 Min. :0.000 Min. :0.000
## 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median : 1.000 Median :1.000 Median :1.000 Median :2.000
## Mean : 3.274 Mean :1.018 Mean :1.371 Mean :1.547
## 3rd Qu.: 7.000 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :10.000 Max. :2.000 Max. :2.000 Max. :2.000
##
## p3_2 p3_3 p3_4 p3_5
## Min. :0.00 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median :2.00 Median :2.000 Median :2.000 Median :2.000
## Mean :1.72 Mean :1.731 Mean :1.909 Mean :1.583
## 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.00 Max. :2.000 Max. :2.000 Max. :2.000
##
## p4 p5 p5_6esp p6_1
## Min. : 0.000 Min. :0.000 Length:2016 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.:0.000 Class :character 1st Qu.:0.0000
## Median : 0.000 Median :0.000 Mode :character Median :0.0000
## Mean : 1.627 Mean :1.406 Mean :0.7996
## 3rd Qu.: 2.000 3rd Qu.:3.000 3rd Qu.:2.0000
## Max. :99.000 Max. :6.000 Max. :2.0000
##
## p6_2 p6_3 p6_4 p6_5
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.7173 Mean :0.7009 Mean :0.6711 Mean :0.7292
## 3rd Qu.:2.0000 3rd Qu.:2.0000 3rd Qu.:2.0000 3rd Qu.:2.0000
## Max. :2.0000 Max. :2.0000 Max. :2.0000 Max. :2.0000
##
## p6_6 p6_6esp p7 p7_3
## Min. :0.0000 Length:2016 Min. :0.0000 Min. : 0
## 1st Qu.:0.0000 Class :character 1st Qu.:0.0000 1st Qu.: 0
## Median :0.0000 Mode :character Median :0.0000 Median : 0
## Mean :0.8323 Mean :0.9062 Mean : 3699
## 3rd Qu.:2.0000 3rd Qu.:2.0000 3rd Qu.: 0
## Max. :2.0000 Max. :3.0000 Max. :999999
##
## p8_1 p8_2 p9 p9_5esp
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Length:2016
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 Class :character
## Median :0.0000 Median :0.0000 Median :0.0000 Mode :character
## Mean :0.7292 Mean :0.4931 Mean :0.4712
## 3rd Qu.:2.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :2.0000 Max. :2.0000 Max. :5.0000
##
## p10 p11 p11_6esp p12_1
## Min. : 0.000 Min. :0.0000 Length:2016 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.:0.0000 Class :character 1st Qu.:0.0000
## Median : 0.000 Median :0.0000 Mode :character Median :0.0000
## Mean : 0.869 Mean :0.8318 Mean :0.4628
## 3rd Qu.: 0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :60.000 Max. :6.0000 Max. :2.0000
##
## p12_2 p12_3 p12_4 p12_5
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4435 Mean :0.4375 Mean :0.4479 Mean :0.4588
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :2.0000 Max. :2.0000 Max. :2.0000 Max. :2.0000
##
## p12_6 p12_7 p12_8 p12_9
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4206 Mean :0.4291 Mean :0.4335 Mean :0.4836
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :2.0000 Max. :2.0000 Max. :2.0000 Max. :2.0000
##
## p12_9esp p13 p13_3 p14_1
## Length:2016 Min. :0.0000 Min. : 0 Min. :0.000
## Class :character 1st Qu.:0.0000 1st Qu.: 0 1st Qu.:0.000
## Mode :character Median :0.0000 Median : 0 Median :0.000
## Mean :0.5263 Mean : 1008 Mean :0.433
## 3rd Qu.:0.0000 3rd Qu.: 0 3rd Qu.:0.000
## Max. :3.0000 Max. :999999 Max. :2.000
##
## p14_2 p15 p15_5esp p16
## Min. :0.0000 Min. :0.0000 Length:2016 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 Class :character 1st Qu.: 0.0000
## Median :0.0000 Median :0.0000 Mode :character Median : 0.0000
## Mean :0.2912 Mean :0.2917 Mean : 0.7445
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.: 0.0000
## Max. :2.0000 Max. :5.0000 Max. :30.0000
##
## p17 p17_6esp p18_1 p18_2
## Min. :0.0000 Length:2016 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 Class :character 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Mode :character Median :0.0000 Median :0.0000
## Mean :0.7733 Mean :0.3085 Mean :0.3175
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :6.0000 Max. :2.0000 Max. :2.0000
##
## p18_3 p18_4 p18_5 p19
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.3189 Mean :0.3413 Mean :0.2951 Mean :0.5397
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :2.0000 Max. :2.0000 Max. :2.0000 Max. :3.0000
##
## p19_3 p20_1 p20_2 p21
## Min. : 0.0 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 0.0 Median :0.0000 Median :0.0000 Median :0.0000
## Mean : 998.2 Mean :0.4206 Mean :0.2753 Mean :0.3105
## 3rd Qu.: 0.0 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :999999.0 Max. :2.0000 Max. :2.0000 Max. :5.0000
##
## p21_5esp p22 p23_1 p23_2
## Length:2016 Min. :0.0000 Min. :0.00000 Min. :0.00000
## Class :character 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Mode :character Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.1969 Mean :0.08185 Mean :0.07391
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :5.0000 Max. :2.00000 Max. :2.00000
##
## p24 p25 p25_6esp p26
## Min. :0.0000 Min. :0.000 Length:2016 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:0.000 Class :character 1st Qu.: 0.00
## Median :0.0000 Median :0.000 Mode :character Median : 20.00
## Mean :0.8408 Mean :1.163 Mean : 28.45
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.: 40.00
## Max. :5.0000 Max. :6.000 Max. :360.00
##
## p27 p28 p28_7esp p29
## Min. :0.000 Min. :0.0000 Length:2016 Min. :0.00
## 1st Qu.:0.000 1st Qu.:0.0000 Class :character 1st Qu.:0.00
## Median :2.000 Median :0.0000 Mode :character Median :2.00
## Mean :1.267 Mean :0.6141 Mean :1.75
## 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:3.00
## Max. :2.000 Max. :7.0000 Max. :4.00
##
## p30 p31 p32 p32_6esp
## Min. :0.000 Min. :0.000 Min. :0.000 Length:2016
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 Class :character
## Median :3.000 Median :1.000 Median :0.000 Mode :character
## Mean :2.129 Mean :1.077 Mean :0.754
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :4.000 Max. :2.000 Max. :6.000
##
## p33_1 p33_2 p33_3 p33_4
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.813 Mean :1.865 Mean :1.931 Mean :1.854
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
##
## p34_1 p34_2 p34_3 p34_3_1 p34_4
## Min. :0.000 Min. :0.0 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.0 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:1.000
## Median :2.000 Median :1.0 Median :2.000 Median :0.0000 Median :1.000
## Mean :1.693 Mean :1.5 Mean :1.645 Mean :0.5938 Mean :1.428
## 3rd Qu.:2.000 3rd Qu.:2.0 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :3.000 Max. :3.0 Max. :3.000 Max. :3.0000 Max. :3.000
##
## p34_4_1 p35 p36_1 p36_2
## Min. :0.000 Min. :0.0000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:1.0000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.000 Median :1.0000 Median :1.000 Median :1.000
## Mean :1.098 Mean :0.9896 Mean :1.426 Mean :1.243
## 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :6.000 Max. :2.0000 Max. :3.000 Max. :3.000
##
## p36_3 p36_4 factor h_lec
## Min. :0.000 Min. :0.000 Min. : 1481 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.: 12064 1st Qu.:1.000
## Median :1.000 Median :1.000 Median : 17342 Median :1.000
## Mean :1.347 Mean :1.439 Mean : 19896 Mean :2.049
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.: 24978 3rd Qu.:4.000
## Max. :3.000 Max. :3.000 Max. :101135 Max. :4.000
##
## mat_lec perslec l_format r_format
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :3.000 Median :1.000 Median :0.0000 Median :0.0000
## Mean :2.708 Mean :1.271 Mean :0.7847 Mean :0.4459
## 3rd Qu.:4.000 3rd Qu.:2.000 3rd Qu.:2.0000 3rd Qu.:0.0000
## Max. :4.000 Max. :2.000 Max. :3.0000 Max. :3.0000
##
## p_format perslecl
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:1.000
## Median :0.0000 Median :1.000
## Mean :0.4246 Mean :1.357
## 3rd Qu.:0.0000 3rd Qu.:2.000
## Max. :3.0000 Max. :2.000
##
str(m24)
## 'data.frame': 2016 obs. of 108 variables:
## $ folio : chr "12B221" "12B221" "12B221" "12B221" ...
## $ entidad : int 1 1 1 1 1 1 1 1 1 1 ...
## $ control : int 40002 40002 40002 40002 40003 40003 40003 40003 40028 40028 ...
## $ viv_sel : int 2 1 3 4 1 2 4 3 2 3 ...
## $ num_hog : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hog_mud : int 0 0 0 0 0 0 0 0 0 0 ...
## $ n_ren_el: int 1 1 1 2 1 1 1 2 1 1 ...
## $ cd : int 14 14 14 14 14 14 14 14 14 14 ...
## $ periodo : int 224 224 224 224 224 224 224 224 224 224 ...
## $ sexo : int 1 1 1 1 1 1 2 2 1 2 ...
## $ edad : int 52 55 59 30 54 30 66 45 26 41 ...
## $ nivel : int 3 3 3 4 3 4 2 3 4 4 ...
## $ anio : int 3 3 3 3 3 3 6 3 3 3 ...
## $ cond_act: int 9 1 1 1 1 1 7 7 1 1 ...
## $ p1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ p2 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p3_1 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p3_2 : int 2 2 2 2 2 2 2 2 1 2 ...
## $ p3_3 : int 2 2 2 2 1 2 2 2 1 2 ...
## $ p3_4 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p3_5 : int 2 2 2 2 2 1 2 2 1 1 ...
## $ p4 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p5 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p5_6esp : chr "" "" "" "" ...
## $ p6_1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p6_2 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p6_3 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p6_4 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p6_5 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p6_6 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p6_6esp : chr "" "" "" "" ...
## $ p7 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p7_3 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p8_1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p8_2 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p9 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p9_5esp : chr "" "" "" "" ...
## $ p10 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p11 : int 0 0 0 0 0 0 0 0 4 0 ...
## $ p11_6esp: chr "" "" "" "" ...
## $ p12_1 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_2 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_3 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_4 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p12_5 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_6 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_7 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p12_8 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_9 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p12_9esp: chr "" "" "" "" ...
## $ p13 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p13_3 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p14_1 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p14_2 : int 0 0 0 0 0 0 0 0 2 0 ...
## $ p15 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ p15_5esp: chr "" "" "" "" ...
## $ p16 : int 0 0 0 0 1 0 0 0 1 0 ...
## $ p17 : int 0 0 0 0 4 0 0 0 4 0 ...
## $ p17_6esp: chr "" "" "" "" ...
## $ p18_1 : int 0 0 0 0 1 0 0 0 1 0 ...
## $ p18_2 : int 0 0 0 0 1 0 0 0 2 0 ...
## $ p18_3 : int 0 0 0 0 1 0 0 0 1 0 ...
## $ p18_4 : int 0 0 0 0 1 0 0 0 1 0 ...
## $ p18_5 : int 0 0 0 0 1 0 0 0 1 0 ...
## $ p19 : int 0 0 0 0 2 0 0 0 2 0 ...
## $ p19_3 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p20_1 : int 0 0 0 0 1 0 0 0 1 0 ...
## $ p20_2 : int 0 0 0 0 2 0 0 0 2 0 ...
## $ p21 : int 0 0 0 0 1 0 0 0 2 0 ...
## $ p21_5esp: chr "" "" "" "" ...
## $ p22 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p23_1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p23_2 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p24 : int 0 0 0 0 0 5 0 0 5 5 ...
## $ p25 : int 0 0 0 0 0 4 0 0 1 4 ...
## $ p25_6esp: chr "" "" "" "" ...
## $ p26 : int 0 0 0 0 20 15 0 0 20 10 ...
## $ p27 : int 0 0 0 0 2 2 0 0 2 2 ...
## $ p28 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ p28_7esp: chr "" "" "" "" ...
## $ p29 : int 0 0 0 0 2 2 0 0 2 2 ...
## $ p30 : int 0 0 0 0 3 3 0 0 3 3 ...
## $ p31 : int 0 0 0 0 2 2 0 0 2 2 ...
## $ p32 : int 5 2 3 4 0 0 2 3 0 0 ...
## $ p32_6esp: chr "" "" "" "" ...
## $ p33_1 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p33_2 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p33_3 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p33_4 : int 2 2 2 2 2 2 2 2 2 2 ...
## $ p34_1 : int 2 2 2 2 2 1 2 2 1 1 ...
## $ p34_2 : int 1 2 1 2 2 1 1 2 1 1 ...
## $ p34_3 : int 2 2 2 2 2 1 2 2 1 1 ...
## $ p34_3_1 : int 0 0 0 0 0 1 0 0 2 1 ...
## $ p34_4 : int 1 2 1 1 1 1 1 1 1 1 ...
## $ p34_4_1 : int 1 0 1 1 1 1 1 1 1 1 ...
## $ p35 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ p36_1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ p36_2 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ p36_3 : int 1 1 1 1 1 1 1 1 1 1 ...
## [list output truncated]
names(m24)
## [1] "folio" "entidad" "control" "viv_sel" "num_hog" "hog_mud"
## [7] "n_ren_el" "cd" "periodo" "sexo" "edad" "nivel"
## [13] "anio" "cond_act" "p1" "p2" "p3_1" "p3_2"
## [19] "p3_3" "p3_4" "p3_5" "p4" "p5" "p5_6esp"
## [25] "p6_1" "p6_2" "p6_3" "p6_4" "p6_5" "p6_6"
## [31] "p6_6esp" "p7" "p7_3" "p8_1" "p8_2" "p9"
## [37] "p9_5esp" "p10" "p11" "p11_6esp" "p12_1" "p12_2"
## [43] "p12_3" "p12_4" "p12_5" "p12_6" "p12_7" "p12_8"
## [49] "p12_9" "p12_9esp" "p13" "p13_3" "p14_1" "p14_2"
## [55] "p15" "p15_5esp" "p16" "p17" "p17_6esp" "p18_1"
## [61] "p18_2" "p18_3" "p18_4" "p18_5" "p19" "p19_3"
## [67] "p20_1" "p20_2" "p21" "p21_5esp" "p22" "p23_1"
## [73] "p23_2" "p24" "p25" "p25_6esp" "p26" "p27"
## [79] "p28" "p28_7esp" "p29" "p30" "p31" "p32"
## [85] "p32_6esp" "p33_1" "p33_2" "p33_3" "p33_4" "p34_1"
## [91] "p34_2" "p34_3" "p34_3_1" "p34_4" "p34_4_1" "p35"
## [97] "p36_1" "p36_2" "p36_3" "p36_4" "factor" "h_lec"
## [103] "mat_lec" "perslec" "l_format" "r_format" "p_format" "perslecl"
head(m24)
## folio entidad control viv_sel num_hog hog_mud n_ren_el cd periodo sexo edad
## 1 12B221 1 40002 2 1 0 1 14 224 1 52
## 2 12B221 1 40002 1 1 0 1 14 224 1 55
## 3 12B221 1 40002 3 1 0 1 14 224 1 59
## 4 12B221 1 40002 4 1 0 2 14 224 1 30
## 5 11B213 1 40003 1 1 0 1 14 224 1 54
## 6 11B213 1 40003 2 1 0 1 14 224 1 30
## nivel anio cond_act p1 p2 p3_1 p3_2 p3_3 p3_4 p3_5 p4 p5 p5_6esp p6_1 p6_2
## 1 3 3 9 1 2 2 2 2 2 2 0 0 0 0
## 2 3 3 1 1 2 2 2 2 2 2 0 0 0 0
## 3 3 3 1 1 2 2 2 2 2 2 0 0 0 0
## 4 4 3 1 1 2 2 2 2 2 2 0 0 0 0
## 5 3 3 1 1 2 2 2 1 2 2 0 0 0 0
## 6 4 3 1 1 2 2 2 2 2 1 0 0 0 0
## p6_3 p6_4 p6_5 p6_6 p6_6esp p7 p7_3 p8_1 p8_2 p9 p9_5esp p10 p11 p11_6esp
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## p12_1 p12_2 p12_3 p12_4 p12_5 p12_6 p12_7 p12_8 p12_9 p12_9esp p13 p13_3
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## p14_1 p14_2 p15 p15_5esp p16 p17 p17_6esp p18_1 p18_2 p18_3 p18_4 p18_5 p19
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 1 4 1 1 1 1 1 2
## 6 0 0 0 0 0 0 0 0 0 0 0
## p19_3 p20_1 p20_2 p21 p21_5esp p22 p23_1 p23_2 p24 p25 p25_6esp p26 p27 p28
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0 0
## 5 0 1 2 1 0 0 0 0 0 20 2 0
## 6 0 0 0 0 0 0 0 5 4 15 2 0
## p28_7esp p29 p30 p31 p32 p32_6esp p33_1 p33_2 p33_3 p33_4 p34_1 p34_2 p34_3
## 1 0 0 0 5 2 2 2 2 2 1 2
## 2 0 0 0 2 2 2 2 2 2 2 2
## 3 0 0 0 3 2 2 2 2 2 1 2
## 4 0 0 0 4 2 2 2 2 2 2 2
## 5 2 3 2 0 2 2 2 2 2 2 2
## 6 2 3 2 0 2 2 2 2 1 1 1
## p34_3_1 p34_4 p34_4_1 p35 p36_1 p36_2 p36_3 p36_4 factor h_lec mat_lec
## 1 0 1 1 1 1 1 1 2 36731 4 4
## 2 0 2 0 1 1 1 1 2 9183 4 4
## 3 0 1 1 1 1 1 1 2 36731 4 4
## 4 0 1 1 1 1 1 1 1 18365 4 4
## 5 0 1 1 1 1 1 1 2 7488 3 3
## 6 1 1 1 1 1 1 1 1 7488 3 3
## perslec l_format r_format p_format perslecl
## 1 2 0 0 0 2
## 2 2 0 0 0 2
## 3 2 0 0 0 2
## 4 2 0 0 0 2
## 5 1 0 0 1 1
## 6 1 0 0 0 2
#View(m24)
dim(m24)
## [1] 2016 108
summary(m24)
## folio entidad control viv_sel num_hog
## Length:2016 Min. : 1.00 Min. :40001 Min. :1.000 Min. :1
## Class :character 1st Qu.: 9.00 1st Qu.:40095 1st Qu.:1.750 1st Qu.:1
## Mode :character Median :15.00 Median :40191 Median :2.000 Median :1
## Mean :15.58 Mean :40244 Mean :2.493 Mean :1
## 3rd Qu.:20.25 3rd Qu.:40315 3rd Qu.:3.000 3rd Qu.:1
## Max. :32.00 Max. :41419 Max. :4.000 Max. :1
##
## hog_mud n_ren_el cd periodo
## Min. :0.00000 Min. :1.000 Min. : 1.00 Min. :224
## 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.: 2.00 1st Qu.:224
## Median :0.00000 Median :1.000 Median : 9.00 Median :224
## Mean :0.03472 Mean :1.673 Mean :13.74 Mean :224
## 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:25.00 3rd Qu.:224
## Max. :2.00000 Max. :8.000 Max. :43.00 Max. :224
##
## sexo edad nivel anio
## Min. :1.000 Min. :18.00 Min. : 0.00 Min. :1.000
## 1st Qu.:1.000 1st Qu.:33.00 1st Qu.: 3.00 1st Qu.:3.000
## Median :2.000 Median :46.00 Median : 4.00 Median :3.000
## Mean :1.574 Mean :46.49 Mean : 4.66 Mean :3.456
## 3rd Qu.:2.000 3rd Qu.:59.00 3rd Qu.: 7.00 3rd Qu.:4.000
## Max. :2.000 Max. :94.00 Max. :99.00 Max. :9.000
## NA's :48
## cond_act p1 p2 p3_1
## Min. : 1.000 Min. :1.000 Min. :0.000 Min. :0.000
## 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median : 1.000 Median :1.000 Median :1.000 Median :2.000
## Mean : 3.325 Mean :1.023 Mean :1.405 Mean :1.552
## 3rd Qu.: 7.000 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :10.000 Max. :2.000 Max. :2.000 Max. :2.000
##
## p3_2 p3_3 p3_4 p3_5
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.000
## Median :2.000 Median :2.000 Median :2.000 Median :2.000
## Mean :1.745 Mean :1.781 Mean :1.912 Mean :1.588
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
##
## p4 p5 p5_6esp p6_1
## Min. : 0.000 Min. :0.000 Length:2016 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.:0.000 Class :character 1st Qu.:0.0000
## Median : 0.000 Median :0.000 Mode :character Median :0.0000
## Mean : 1.275 Mean :1.348 Mean :0.7723
## 3rd Qu.: 2.000 3rd Qu.:3.000 3rd Qu.:2.0000
## Max. :70.000 Max. :6.000 Max. :2.0000
##
## p6_2 p6_3 p6_4 p6_5
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.686 Mean :0.6756 Mean :0.6458 Mean :0.7143
## 3rd Qu.:2.000 3rd Qu.:2.0000 3rd Qu.:1.0000 3rd Qu.:2.0000
## Max. :2.000 Max. :2.0000 Max. :2.0000 Max. :2.0000
##
## p6_6 p6_6esp p7 p7_3
## Min. :0.0000 Length:2016 Min. :0.0000 Min. : 0
## 1st Qu.:0.0000 Class :character 1st Qu.:0.0000 1st Qu.: 0
## Median :0.0000 Mode :character Median :0.0000 Median : 0
## Mean :0.8006 Mean :0.8591 Mean : 3115
## 3rd Qu.:2.0000 3rd Qu.:2.0000 3rd Qu.: 0
## Max. :2.0000 Max. :3.0000 Max. :999999
##
## p8_1 p8_2 p9 p9_5esp
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Length:2016
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 Class :character
## Median :0.0000 Median :0.0000 Median :0.0000 Mode :character
## Mean :0.7054 Mean :0.4772 Mean :0.4529
## 3rd Qu.:2.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :2.0000 Max. :2.0000 Max. :5.0000
##
## p10 p11 p11_6esp p12_1
## Min. : 0.0000 Min. :0.0000 Length:2016 Min. :0.0000
## 1st Qu.: 0.0000 1st Qu.:0.0000 Class :character 1st Qu.:0.0000
## Median : 0.0000 Median :0.0000 Mode :character Median :0.0000
## Mean : 0.6349 Mean :0.7326 Mean :0.3973
## 3rd Qu.: 0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :25.0000 Max. :6.0000 Max. :2.0000
##
## p12_2 p12_3 p12_4 p12_5
## Min. :0.0000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.000 Median :0.0000
## Mean :0.3824 Mean :0.371 Mean :0.377 Mean :0.3879
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :2.0000 Max. :2.000 Max. :2.000 Max. :2.0000
##
## p12_6 p12_7 p12_8 p12_9
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.371 Mean :0.369 Mean :0.3562 Mean :0.4172
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :2.000 Max. :2.000 Max. :2.0000 Max. :2.0000
##
## p12_9esp p13 p13_3 p14_1
## Length:2016 Min. :0.000 Min. : 0.0 Min. :0.000
## Class :character 1st Qu.:0.000 1st Qu.: 0.0 1st Qu.:0.000
## Mode :character Median :0.000 Median : 0.0 Median :0.000
## Mean :0.433 Mean : 506.5 Mean :0.372
## 3rd Qu.:0.000 3rd Qu.: 0.0 3rd Qu.:0.000
## Max. :3.000 Max. :999999.0 Max. :2.000
##
## p14_2 p15 p15_5esp p16
## Min. :0.000 Min. :0.0000 Length:2016 Min. : 0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 Class :character 1st Qu.: 0.0000
## Median :0.000 Median :0.0000 Mode :character Median : 0.0000
## Mean :0.247 Mean :0.2688 Mean : 0.5749
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.: 0.0000
## Max. :2.000 Max. :5.0000 Max. :60.0000
##
## p17 p17_6esp p18_1 p18_2
## Min. :0.0000 Length:2016 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 Class :character 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Mode :character Median :0.0000 Median :0.0000
## Mean :0.5794 Mean :0.2163 Mean :0.2272
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :6.0000 Max. :2.0000 Max. :2.0000
##
## p18_3 p18_4 p18_5 p19
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2361 Mean :0.2594 Mean :0.2133 Mean :0.3904
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :2.0000 Max. :2.0000 Max. :2.0000 Max. :3.0000
##
## p19_3 p20_1 p20_2 p21
## Min. : 0.0 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 0.0 Median :0.0000 Median :0.0000 Median :0.0000
## Mean : 995.8 Mean :0.3115 Mean :0.2039 Mean :0.2411
## 3rd Qu.: 0.0 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :999999.0 Max. :2.0000 Max. :2.0000 Max. :5.0000
##
## p21_5esp p22 p23_1 p23_2
## Length:2016 Min. :0.0000 Min. :0.00000 Min. :0.000
## Class :character 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.000
## Mode :character Median :0.0000 Median :0.00000 Median :0.000
## Mean :0.1443 Mean :0.05804 Mean :0.063
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.000
## Max. :5.0000 Max. :2.00000 Max. :2.000
##
## p24 p25 p25_6esp p26
## Min. :0.0000 Min. :0.000 Length:2016 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:0.000 Class :character 1st Qu.: 0.00
## Median :0.0000 Median :0.000 Mode :character Median : 20.00
## Mean :0.7986 Mean :1.117 Mean : 26.78
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.: 30.00
## Max. :5.0000 Max. :6.000 Max. :360.00
##
## p27 p28 p28_7esp p29
## Min. :0.00 Min. :0.000 Length:2016 Min. :0.00
## 1st Qu.:0.00 1st Qu.:0.000 Class :character 1st Qu.:0.00
## Median :2.00 Median :0.000 Mode :character Median :2.00
## Mean :1.23 Mean :0.557 Mean :1.68
## 3rd Qu.:2.00 3rd Qu.:0.000 3rd Qu.:3.00
## Max. :2.00 Max. :7.000 Max. :4.00
##
## p30 p31 p32 p32_6esp
## Min. :0.000 Min. :0.000 Min. :0.0000 Length:2016
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000 Class :character
## Median :3.000 Median :1.000 Median :0.0000 Mode :character
## Mean :1.995 Mean :1.056 Mean :0.8175
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :4.000 Max. :2.000 Max. :6.0000
##
## p33_1 p33_2 p33_3 p33_4 p34_1
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.00
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.00
## Median :2.000 Median :2.000 Median :2.000 Median :2.000 Median :2.00
## Mean :1.787 Mean :1.868 Mean :1.887 Mean :1.824 Mean :1.67
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.00
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000 Max. :3.00
##
## p34_2 p34_3 p34_3_1 p34_4
## Min. :0.000 Min. :0.00 Min. :0.0000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.00 1st Qu.:0.0000 1st Qu.:1.000
## Median :1.000 Median :2.00 Median :0.0000 Median :1.000
## Mean :1.477 Mean :1.64 Mean :0.5437 Mean :1.427
## 3rd Qu.:2.000 3rd Qu.:2.00 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :3.000 Max. :3.00 Max. :3.0000 Max. :3.000
##
## p34_4_1 p35 p36_1 p36_2
## Min. :0.000 Min. :0.0000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:1.0000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.000 Median :1.0000 Median :1.000 Median :1.000
## Mean :0.998 Mean :0.9871 Mean :1.357 Mean :1.188
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :6.000 Max. :2.0000 Max. :3.000 Max. :3.000
##
## p36_3 p36_4 factor h_lec
## Min. :0.000 Min. :0.000 Min. : 3576 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.: 12994 1st Qu.:1.000
## Median :1.000 Median :1.000 Median : 18445 Median :1.000
## Mean :1.293 Mean :1.397 Mean : 21196 Mean :2.141
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.: 26839 3rd Qu.:4.000
## Max. :3.000 Max. :3.000 Max. :124941 Max. :4.000
##
## mat_lec perslec l_format r_format
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :3.000 Median :1.000 Median :0.000 Median :0.0000
## Mean :2.714 Mean :1.284 Mean :0.748 Mean :0.3869
## 3rd Qu.:4.000 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :4.000 Max. :2.000 Max. :3.000 Max. :3.0000
##
## p_format perslecl
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:1.000
## Median :0.0000 Median :1.000
## Mean :0.3165 Mean :1.376
## 3rd Qu.:0.0000 3rd Qu.:2.000
## Max. :3.0000 Max. :2.000
##
files <- c(
"Datos_molec_2020-1.csv",
"Datos_molec_2022-1.csv",
"Datos_molec_2024-1.csv"
)
years <- c(2020, 2022, 2024)
aliases <- list(
sabe_leer_escribir = c("p1", "p_1", "sabe_leer", "sabe_leer_escribir", "leer_escribir"),
nivel_aprobado = c("nivel", "nivel_aprobado", "escolaridad", "grado_aprob", "ultimo_grado"),
condicion_actividad= c("cond_act", "condicion_actividad", "cond_act_pea"),
libros_gasto = c("p7_3", "p07_3", "p7_03", "gasto_libros", "libros_gasto"),
libros_leidos_12m = c(
"p4","p04","libros_leidos","num_libros","cuantos_libros",
"libros_12m","libros_ultimos_12_meses","libros_12_meses"
),
no_lectura_motivo = c("p32", "p032", "no_lectura_motivo", "motivo_no_lectura"),
no_lectura_otro = c("p32_6esp", "p032_6esp", "p32_06esp", "no_lectura_otro_esp", "no_lectura_otro"),
lectura_minutos = c("p26", "p026", "p26_minutos", "minutos_lectura","minutos_continuos", "p26_min", "minutos")
)
standard_vars <- names(aliases)
harmonize_one <- function(path, year, aliases) {
df_raw <- suppressMessages(read_csv(path, col_types = cols(.default = col_character())))
df <- clean_names(df_raw)
nm <- names(df)
pick_first <- function(cands) {
cands_clean <- cands |>
tolower() |>
str_replace_all("[^a-z0-9_]", "_")
hit <- intersect(cands_clean, nm)
if (length(hit) > 0) hit[[1]] else NA_character_
}
n <- nrow(df)
out <- tibble(year = rep(year, n))
for (std in standard_vars) {
src <- pick_first(aliases[[std]])
if (!is.na(src)) {
out[[std]] <- df[[src]]
} else {
out[[std]] <- NA_character_
message("Año ", year, ": no se encontró columna para '", std, "'.")
}
}
out
}
molec_list <- map2(files, years, ~ harmonize_one(.x, .y, aliases))
molec <- bind_rows(molec_list)
molec <- molec %>%
mutate(
libros_gasto_num = suppressWarnings(as.numeric(libros_gasto)),
libros_leidos_12m_num = suppressWarnings(as.numeric(libros_leidos_12m)),
lectura_minutos_num = suppressWarnings(as.numeric(lectura_minutos)),
sabe_leer_bin = case_when(
str_detect(str_trim(tolower(sabe_leer_escribir)), "^(si|sí|1)$") ~ 1,
str_detect(str_trim(tolower(sabe_leer_escribir)), "^(no|0)$") ~ 0,
TRUE ~ NA_real_
)
)
molec <- molec %>%
mutate(
no_lectura_motivo = case_when(
!is.na(no_lectura_otro) & str_trim(no_lectura_otro) != "" ~ "Otro",
TRUE ~ no_lectura_motivo
)
) %>%
select(-no_lectura_otro,-sabe_leer_bin,-libros_gasto,-libros_leidos_12m,-lectura_minutos)
molec<- na.omit(molec)
for (yy in years) {
df_y <- subset(molec, year == yy)
write.csv(df_y, paste0("DatosEquipo#_", yy, ".csv"), row.names = FALSE)
}
numify <- function(x) {
y <- readr::parse_number(x, locale = readr::locale(decimal_mark = ".", grouping_mark = ","))
if (mean(is.na(y)) > 0.9) {
y <- readr::parse_number(x, locale = readr::locale(decimal_mark = ",", grouping_mark = "."))
}
y
}
write.csv(molec, "DatosEquipo#1.csv", row.names = FALSE)
str(molec)
## tibble [6,042 × 8] (S3: tbl_df/tbl/data.frame)
## $ year : num [1:6042] 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ sabe_leer_escribir : chr [1:6042] "1" "1" "1" "1" ...
## $ nivel_aprobado : chr [1:6042] "2" "3" "3" "7" ...
## $ condicion_actividad : chr [1:6042] "1" "7" "1" "1" ...
## $ no_lectura_motivo : chr [1:6042] "0" "0" "3" "0" ...
## $ libros_gasto_num : num [1:6042] 0 0 0 0 0 0 0 0 0 0 ...
## $ libros_leidos_12m_num: num [1:6042] 1 0 0 2 2 0 0 0 0 1 ...
## $ lectura_minutos_num : num [1:6042] 30 10 0 20 20 30 20 10 0 60 ...
names(molec)
## [1] "year" "sabe_leer_escribir" "nivel_aprobado"
## [4] "condicion_actividad" "no_lectura_motivo" "libros_gasto_num"
## [7] "libros_leidos_12m_num" "lectura_minutos_num"
head(molec)
## # A tibble: 6 × 8
## year sabe_leer_escribir nivel_aprobado condicion_actividad no_lectura_motivo
## <dbl> <chr> <chr> <chr> <chr>
## 1 2020 1 2 1 0
## 2 2020 1 3 7 0
## 3 2020 1 3 1 3
## 4 2020 1 7 1 0
## 5 2020 1 6 7 0
## 6 2020 1 3 1 0
## # ℹ 3 more variables: libros_gasto_num <dbl>, libros_leidos_12m_num <dbl>,
## # lectura_minutos_num <dbl>
#View(molec)
dim(molec)
## [1] 6042 8
summary(molec)
## year sabe_leer_escribir nivel_aprobado condicion_actividad
## Min. :2020 Length:6042 Length:6042 Length:6042
## 1st Qu.:2020 Class :character Class :character Class :character
## Median :2022 Mode :character Mode :character Mode :character
## Mean :2022
## 3rd Qu.:2024
## Max. :2024
## no_lectura_motivo libros_gasto_num libros_leidos_12m_num lectura_minutos_num
## Length:6042 Min. : 0 Min. : 0.000 Min. : 0.00
## Class :character 1st Qu.: 0 1st Qu.: 0.000 1st Qu.: 0.00
## Mode :character Median : 0 Median : 0.000 Median : 20.00
## Mean : 2816 Mean : 1.443 Mean : 27.83
## 3rd Qu.: 0 3rd Qu.: 2.000 3rd Qu.: 40.00
## Max. :999999 Max. :99.000 Max. :480.00
colSums(is.na(molec))
## year sabe_leer_escribir nivel_aprobado
## 0 0 0
## condicion_actividad no_lectura_motivo libros_gasto_num
## 0 0 0
## libros_leidos_12m_num lectura_minutos_num
## 0 0
round(colSums(is.na(molec)) / nrow(molec) * 100, 2)
## year sabe_leer_escribir nivel_aprobado
## 0 0 0
## condicion_actividad no_lectura_motivo libros_gasto_num
## 0 0 0
## libros_leidos_12m_num lectura_minutos_num
## 0 0
table(molec$libros_gasto_num) +
prop.table(table(molec$libros_gasto_num))
##
## 0 1 10 15 20 25
## 5128.848726 1.000166 1.000166 1.000166 3.000497 1.000166
## 30 40 45 50 55 60
## 4.000662 7.001159 1.000166 8.001324 1.000166 5.000828
## 70 75 85 89 90 100
## 2.000331 1.000166 1.000166 1.000166 3.000497 26.004303
## 110 120 130 140 150 160
## 1.000166 7.001159 1.000166 1.000166 36.005958 2.000331
## 170 180 185 200 215 219
## 2.000331 7.001159 1.000166 63.010427 1.000166 1.000166
## 220 230 239 240 250 270
## 1.000166 1.000166 1.000166 2.000331 24.003972 2.000331
## 280 299 300 311 329 340
## 4.000662 1.000166 77.012744 1.000166 1.000166 1.000166
## 350 360 370 380 390 400
## 17.002814 1.000166 1.000166 2.000331 1.000166 46.007613
## 420 430 450 480 500 530
## 1.000166 1.000166 10.001655 2.000331 78.012910 1.000166
## 550 580 600 650 700 750
## 3.000497 1.000166 69.011420 4.000662 20.003310 6.000993
## 780 800 850 860 875 900
## 1.000166 37.006124 1.000166 1.000166 1.000166 18.002979
## 950 960 1000 1100 1200 1300
## 3.000497 1.000166 54.008937 3.000497 29.004800 6.000993
## 1400 1500 1560 1600 1700 1800
## 3.000497 43.007117 1.000166 8.001324 2.000331 6.000993
## 1900 2000 2200 2300 2400 2500
## 1.000166 31.005131 1.000166 2.000331 2.000331 9.001490
## 2599 3000 3400 3500 4000 4200
## 1.000166 17.002814 1.000166 7.001159 7.001159 1.000166
## 5000 5200 6000 7000 7200 7500
## 13.002152 1.000166 5.000828 1.000166 1.000166 1.000166
## 8000 9999 10000 15000 50000 120000
## 2.000331 1.000166 1.000166 2.000331 1.000166 1.000166
## 999999
## 16.002648
table(molec$libros_leidos_12m_num) +
prop.table(table(molec$libros_leidos_12m_num))
##
## 0 1 2 3 4 5
## 3582.592850 758.125455 635.105098 399.066038 214.035419 158.026150
## 6 7 8 9 10 11
## 75.012413 19.003145 37.006124 8.001324 46.007613 4.000662
## 12 13 14 15 16 17
## 30.004965 3.000497 2.000331 18.002979 1.000166 1.000166
## 18 20 24 25 30 35
## 1.000166 19.003145 5.000828 2.000331 9.001490 1.000166
## 40 42 45 50 60 66
## 4.000662 1.000166 1.000166 5.000828 1.000166 1.000166
## 70 99
## 1.000166 1.000166
table(molec$lectura_minutos_num) +
prop.table(table(molec$lectura_minutos_num))
##
## 0 1 2 3 4 5
## 1886.312148 1.000166 4.000662 4.000662 1.000166 55.009103
## 6 7 10 15 16 18
## 2.000331 1.000166 243.040218 322.053294 1.000166 1.000166
## 20 21 25 26 30 35
## 658.108904 1.000166 117.019364 1.000166 1156.191327 61.010096
## 38 40 44 45 48 50
## 2.000331 211.034922 1.000166 154.025488 1.000166 33.005462
## 60 70 80 90 120 160
## 773.127938 5.000828 2.000331 97.016054 191.031612 1.000166
## 180 200 240 300 360 480
## 38.006289 2.000331 11.001821 2.000331 2.000331 1.000166
datos <- read.csv("DatosEquipo#1.csv", stringsAsFactors = FALSE)
#View(datos)
datos <- datos %>% mutate(
year = as.integer(year),
lee_algo = libros_leidos_12m_num > 0
)
cat("Filas:", nrow(datos), " Columnas:", ncol(datos), "\n")
## Filas: 6042 Columnas: 9
print(names(datos))
## [1] "year" "sabe_leer_escribir" "nivel_aprobado"
## [4] "condicion_actividad" "no_lectura_motivo" "libros_gasto_num"
## [7] "libros_leidos_12m_num" "lectura_minutos_num" "lee_algo"
str(datos)
## 'data.frame': 6042 obs. of 9 variables:
## $ year : int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ sabe_leer_escribir : int 1 1 1 1 1 1 1 1 1 1 ...
## $ nivel_aprobado : int 2 3 3 7 6 3 6 3 2 2 ...
## $ condicion_actividad : int 1 7 1 1 7 1 1 1 1 8 ...
## $ no_lectura_motivo : chr "0" "0" "3" "0" ...
## $ libros_gasto_num : int 0 0 0 0 0 0 0 0 0 0 ...
## $ libros_leidos_12m_num: int 1 0 0 2 2 0 0 0 0 1 ...
## $ lectura_minutos_num : int 30 10 0 20 20 30 20 10 0 60 ...
## $ lee_algo : logi TRUE FALSE FALSE TRUE TRUE FALSE ...
porc_nulos <- sapply(datos, function(x) mean(is.na(x))*100)
print(round(porc_nulos, 2))
## year sabe_leer_escribir nivel_aprobado
## 0 0 0
## condicion_actividad no_lectura_motivo libros_gasto_num
## 0 0 0
## libros_leidos_12m_num lectura_minutos_num lee_algo
## 0 0 0
dup_count <- sum(duplicated(datos))
cat("Duplicados:", dup_count, "\n")
## Duplicados: 3419
datos <- datos[!duplicated(datos), ]
es_texto <- sapply(datos, is.character)
datos[es_texto] <- lapply(datos[es_texto], function(x) trimws(tolower(x)))
if ("libros_leidos_12m_num" %in% names(datos)) datos$libros_leidos_12m_num[datos$libros_leidos_12m_num %in% c(99)] <- NA
if ("libros_gasto_num" %in% names(datos)) datos$libros_gasto_num[datos$libros_gasto_num %in% c(999999)] <- NA
if ("lee_algo" %in% names(datos))
datos$lee_algo[is.na(datos$lee_algo)] <- FALSE
diccionario <- data.frame(
variable = names(datos),
tipo = sapply(datos, function(x) class(x)[1]),
porc_nulos = round(colMeans(is.na(datos)) * 100, 2),
stringsAsFactors = FALSE
)
desc <- c(
year = "Año del dato.",
saber_leer_escribir = "Se pregunta por alfabetización",
nivel_aprobado = "Nivel de escolaridad. ¿Hasta qué año o grado aprobó en la escuela?.",
condicion_actividad = "Condición de actividad: ¿La semana pasada...?",
no_lectura_motivo = "Motivo principal por el que no lee los materiales de lectura mencionados.",
libros_gasto_num = "Aproximadamente, ¿cuánto gastó?",
libros_leidos_12m_num = "¿Cuántos libros leyó en los últimos doce meses?",
lectura_minutos_num = "¿Cuántos minutos lee?",
lee_algo = "Si la persona sabe leer, ¿ha leido algo en los últimos 12m?"
)
#View(diccionario)
tabla <- datos[,c(3,6,7,8)]
nums <- Filter(is.numeric, tabla)
res_num <- t(sapply(nums, function(x) c(
n = sum(!is.na(x)),
media = mean(x, na.rm=TRUE),
mediana = median(x, na.rm=TRUE),
min = min(x, na.rm=TRUE),
max = max(x, na.rm=TRUE),
rango_medio = (min(x, na.rm=TRUE) + max(x, na.rm=TRUE))/2,
sd = sd(x, na.rm=TRUE),
coef_var = ifelse(mean(x,na.rm=TRUE)==0, NA, sd(x,na.rm=TRUE)/mean(x,na.rm=TRUE)),
q25 = quantile(x, .25, na.rm=TRUE),
q75 = quantile(x, .75, na.rm=TRUE)
)))
round(res_num, 3)
## n media mediana min max rango_medio sd
## nivel_aprobado 2623 5.253 5 0 99 49.5 5.615
## libros_gasto_num 2607 381.250 0 0 120000 60000.0 2696.898
## libros_leidos_12m_num 2622 2.815 2 0 70 35.0 5.105
## lectura_minutos_num 2623 41.997 30 0 480 240.0 40.337
## coef_var q25.25% q75.75%
## nivel_aprobado 1.069 3 7
## libros_gasto_num 7.074 0 250
## libros_leidos_12m_num 1.813 0 3
## lectura_minutos_num 0.960 20 60
cats <- datos[,c(1,2,4,5,9)]
res_cat <- lapply(cats, function(v){
tb <- table(v, useNA="ifany")
list(freq = tb, prop = round(prop.table(tb), 4))
})
res_cat
## $year
## $year$freq
## v
## 2020 2022 2024
## 861 890 872
##
## $year$prop
## v
## 2020 2022 2024
## 0.3283 0.3393 0.3324
##
##
## $sabe_leer_escribir
## $sabe_leer_escribir$freq
## v
## 1 2
## 2586 37
##
## $sabe_leer_escribir$prop
## v
## 1 2
## 0.9859 0.0141
##
##
## $condicion_actividad
## $condicion_actividad$freq
## v
## 1 2 3 4 5 6 7 8 9 10 99
## 1309 27 46 110 17 192 551 317 21 32 1
##
## $condicion_actividad$prop
## v
## 1 2 3 4 5 6 7 8 9 10 99
## 0.4990 0.0103 0.0175 0.0419 0.0065 0.0732 0.2101 0.1209 0.0080 0.0122 0.0004
##
##
## $no_lectura_motivo
## $no_lectura_motivo$freq
## v
## 0 1 2 3 4 5 otro
## 2312 76 56 81 15 69 14
##
## $no_lectura_motivo$prop
## v
## 0 1 2 3 4 5 otro
## 0.8814 0.0290 0.0213 0.0309 0.0057 0.0263 0.0053
##
##
## $lee_algo
## $lee_algo$freq
## v
## FALSE TRUE
## 778 1845
##
## $lee_algo$prop
## v
## FALSE TRUE
## 0.2966 0.7034
theme_set(theme_minimal(base_size = 12))
datos %>%
group_by(year) %>%
summarise(prom_libros = mean(libros_leidos_12m_num, na.rm = TRUE), .groups = "drop") %>%
ggplot(aes(year, prom_libros)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_text(aes(label = round(prom_libros, 2)),
vjust = -0.6, size = 3) +
scale_y_continuous(expand = expansion(mult = c(0.02, 0.12))) +
labs(title = "Promedio de libros leídos por año",
x = "Año", y = "Libros (promedio)")
datos %>%
group_by(year) %>%
summarise(pct_lee = mean(lee_algo, na.rm = TRUE)) %>%
ggplot(aes(year, pct_lee)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
geom_text(aes(label = round(pct_lee, 4)),
vjust = -0.6, size = 3) +
scale_y_continuous(labels = percent_format(accuracy = 1.2)) +
labs(title = "% que leyó ≥1 libro en los últimos 12 meses",
x = "Año", y = "Porcentaje")
datos <- datos %>%
mutate(no_lectura_motivo = recode(as.character(no_lectura_motivo),
"1" = "Falta interés/gusto",
"2" = "Prefiere otras actividades",
"3" = "Falta de tiempo",
"4" = "Falta de dinero",
"5" = "Problemas de salud",
"6" = "otro",
.default = "pase"
))
datos %>%
filter(!is.na(no_lectura_motivo)) %>%
count(no_lectura_motivo) %>%
mutate(
p = n / sum(n), # calcula el porcentaje
no_lectura_motivo = fct_reorder(no_lectura_motivo, p) # ordena por porcentaje
) %>%
ggplot(aes(no_lectura_motivo, n, fill = no_lectura_motivo)) +
geom_col() +
geom_text(aes(label = percent(p)), hjust = -0.2, size = 3.8) +
coord_flip(clip = "off") +
scale_y_continuous(expand = expansion(mult = c(0, 0.12))) +
guides(fill = "none") +
labs(title = "Motivos para NO leer", x = NULL, y = "Porcentaje")
datos %>%
filter(!is.na(no_lectura_motivo), no_lectura_motivo != "pase") %>%
count(no_lectura_motivo) %>%
mutate(
p = n / sum(n),
no_lectura_motivo = fct_reorder(no_lectura_motivo, p)
) %>%
ggplot(aes(no_lectura_motivo, n, fill = no_lectura_motivo)) +
geom_col() +
geom_text(aes(label = percent(p)), hjust = -0.2, size = 3.8) +
coord_flip(clip = "off") +
scale_y_continuous(expand = expansion(mult = c(0, 0.12))) +
guides(fill = "none") +
labs(title = "Motivos para NO leer (sin “PASE”)", x = NULL, y = "Porcentaje")
datos <- datos %>%
mutate(nivel_aprobado = recode(as.character(nivel_aprobado),
"0" = "Ninguno",
"1" = "Preescolar",
"2" = "Primaria",
"3" = "Secundaria",
"4" = "Preparatoria",
"5" = "Normal básica",
"6" = "Carrera técnica",
"7" = "Profesional",
"8" = "Maestría",
"9" = "Doctorado",
"99" = "No sabe",
.default = "Otro"
))
df_plot <- datos %>%
filter(!is.na(nivel_aprobado)) %>%
group_by(nivel_aprobado) %>%
filter(condicion_actividad != "No sabe") %>%
summarise(total_libros = sum(libros_leidos_12m_num, na.rm = TRUE), .groups = "drop") %>%
mutate(nivel_aprobado = fct_reorder(nivel_aprobado, total_libros))
ggplot(df_plot, aes(nivel_aprobado, total_libros, fill = nivel_aprobado)) +
geom_col() +
geom_text(aes(label = comma(round(total_libros, 0))),
hjust = -0.2, size = 3) +
coord_flip() +
scale_y_continuous(expand = expansion(mult = c(0, 0.12))) +
guides(fill = "none") +
labs(title = "Libros leídos por nivel educativo",
x = "Nivel aprobado", y = "Libros en 12 meses (3 años)")
datos <- datos %>%
mutate(condicion_actividad = recode(as.character(condicion_actividad),
"1" = "trabajo para obtener ingresos",
"2" = "trabajo sin pago",
"3" = "tenía trabajo",
"4" = "busca trabajo",
"5" = "espera de solicitud de trabajo",
"6" = "estudiante",
"7" = "quehaceres del hogar",
"8" = "jubilado(a)/pensionado(a)",
"9" = "incapacitado(a)",
"10" = "Otra situación",
"99" = "No especificado",
.default = "Otro"
))
datos %>%
filter(!is.na(condicion_actividad)) %>%
group_by(condicion_actividad) %>%
summarise(pct = mean(lee_algo, na.rm = TRUE)) %>%
mutate(condicion_actividad = fct_reorder(condicion_actividad, pct)) %>%
ggplot(aes(condicion_actividad, pct, fill=condicion_actividad)) +
geom_col() +
geom_text(aes(label = percent(pct, 1)), hjust = -0.05, size = 3.5) +
coord_flip(clip = "off") +
scale_y_continuous(labels = percent_format(), expand = expansion(mult = c(0, .1))) +
labs(title = "% que leyó ≥1 libro por condición de actividad",
x = NULL, y = "Porcentaje")
datos %>%
ggplot(aes(x = libros_leidos_12m_num)) +
geom_histogram(bins = 30, fill = "#69b3a2", color = "white") +
facet_wrap(~ year, scales = "free_y") +
labs(
title = "Distribución de libros leídos por año",
x = "Libros en 12 meses",
y = "Frecuencia"
) +
theme_minimal(base_size = 13)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
res_nivel <- datos%>%
group_by(nivel_aprobado) %>%
summarise(
n = n(),
pct_lee = mean(lee_algo, na.rm=TRUE)*100,
media_lectores = mean(libros_leidos_12m_num[lee_algo], na.rm=TRUE),
mediana_lectores = median(libros_leidos_12m_num[lee_algo], na.rm=TRUE)
)
res_nivel
## # A tibble: 11 × 5
## nivel_aprobado n pct_lee media_lectores mediana_lectores
## <chr> <int> <dbl> <dbl> <dbl>
## 1 Carrera técnica 241 63.5 2.95 2
## 2 Doctorado 30 86.7 9.15 4
## 3 Maestría 152 83.6 5.98 4
## 4 Ninguno 62 16.1 1.1 1
## 5 No sabe 8 12.5 1 1
## 6 Normal básica 34 52.9 3.89 2.5
## 7 Preescolar 2 0 NaN NA
## 8 Preparatoria 492 73.6 3.78 2
## 9 Primaria 315 53.0 3.32 2
## 10 Profesional 856 83.4 4.32 3
## 11 Secundaria 431 61.9 3.18 2
res_act <- datos %>%
group_by(condicion_actividad) %>%
summarise(
n = n(),
pct_lee = mean(lee_algo, na.rm=TRUE)*100,
media_lectores = mean(libros_leidos_12m_num[lee_algo], na.rm=TRUE),
mediana_lectores = median(libros_leidos_12m_num[lee_algo], na.rm=TRUE)
)
res_act
## # A tibble: 11 × 5
## condicion_actividad n pct_lee media_lectores mediana_lectores
## <chr> <int> <dbl> <dbl> <dbl>
## 1 No especificado 1 100 1 1
## 2 Otra situación 32 25 7.75 2.5
## 3 busca trabajo 110 48.2 3.42 2
## 4 espera de solicitud de trabajo 17 41.2 4.57 5
## 5 estudiante 192 87.5 4.52 4
## 6 incapacitado(a) 21 19.0 1.5 1
## 7 jubilado(a)/pensionado(a) 317 61.8 3.48 2
## 8 quehaceres del hogar 551 65.7 3.13 2
## 9 tenía trabajo 46 37.0 2.76 2
## 10 trabajo para obtener ingresos 1309 77.8 4.37 3
## 11 trabajo sin pago 27 37.0 2.7 3
datos %>% filter(nivel_aprobado != "No sabe") %>%
ggplot(aes(nivel_aprobado, libros_leidos_12m_num, fill=nivel_aprobado)) +
geom_boxplot(outlier.alpha=.3) + coord_flip() +
labs(title="Libros en 12m por nivel educativo", x=NULL, y="Libros en 12 meses")
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
datos %>% filter(condicion_actividad != "No especificado") %>%
ggplot(aes(condicion_actividad, libros_leidos_12m_num,fill=condicion_actividad)) +
geom_boxplot(outlier.alpha=.3) + coord_flip() +
labs(title="Libros en 12m por condición de actividad", x=NULL, y="Libros en 12 meses")
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
datos %>%
group_by(nivel_aprobado) %>%
filter(nivel_aprobado != "No sabe") %>%
summarise(pct = mean(lee_algo, na.rm=TRUE)) %>%
ggplot(aes(fct_reorder(nivel_aprobado, pct), pct,fill=nivel_aprobado)) +
geom_col() +
geom_text(aes(label=percent(pct,1)), hjust=-0.05, size=3.5) +
coord_flip(clip="off") +
labs(title="% que leyó ≥1 libro por nivel educativo", x=NULL, y="Porcentaje")
datos %>%
group_by(condicion_actividad) %>%
filter(condicion_actividad != "no especificado") %>%
summarise(pct = mean(lee_algo, na.rm=TRUE)) %>%
ggplot(aes(fct_reorder(condicion_actividad, pct), pct,fill=condicion_actividad)) +
geom_col() +
geom_text(aes(label=percent(pct,1)), hjust=-0.05, size=3.5) +
coord_flip(clip="off") +
labs(title="% que leyó ≥1 libro por condición de actividad", x=NULL, y="Porcentaje")
datos <- datos %>%
mutate(
lee_f = factor(lee_algo, levels = c(FALSE,TRUE), labels = c("No","Sí")),
educacion_grupo = case_when(
nivel_aprobado %in% c("Ninguno","Preescolar") ~ "Sin educación",
nivel_aprobado %in% "Primaria" ~ "Básica",
nivel_aprobado %in% c("Secundaria","Preparatoria","Normal básica","Carrera técnica") ~ "Media",
nivel_aprobado %in% c("Profesional","Maestría","Doctorado") ~ "Superior y Posgrado",
TRUE ~ NA_character_
),
actividad_grupo = case_when(
condicion_actividad %in% c("trabajo con ingresos","trabajo sin pago","tenía trabajo") ~ "Trabaja",
condicion_actividad %in% "estudiante" ~ "Estudia",
condicion_actividad %in% c("busca trabajo","espera trabajo","quehaceres del hogar",
"jubilado/pensionado","incapacitado","otra situación",
"no especificado","Otro") ~ "No trabaja",
TRUE ~ NA_character_
),
)
theme_set(theme_minimal(base_size = 12))
stats_educacion <- datos %>%
filter(!is.na(educacion_grupo), !is.na(libros_leidos_12m_num)) %>%
group_by(educacion_grupo) %>%
summarise(
media = mean(libros_leidos_12m_num, na.rm = TRUE),
mediana = median(libros_leidos_12m_num, na.rm = TRUE),
n = n()
)
p_edu <- datos %>%
filter(!is.na(educacion_grupo), !is.na(libros_leidos_12m_num)) %>%
ggplot(aes(
x = fct_reorder(educacion_grupo, libros_leidos_12m_num, .fun = median, .na_rm = TRUE),
y = libros_leidos_12m_num,
fill = educacion_grupo
)) +
geom_boxplot(outlier.alpha = .3)
p_edu +
geom_text(
data = stats_educacion,
aes(x = educacion_grupo, y = mediana,
label = paste0("Mediana=", round(mediana, 1))),
vjust = -1.6, hjust = -3.0, size = 3,
inherit.aes = FALSE
) +
geom_text(
data = stats_educacion,
aes(x = educacion_grupo, y = media,
label = paste0("Media=", round(media, 1))),
vjust = 2.5, hjust = -3.0, size = 3, color = "gray20",
inherit.aes = FALSE
) +
coord_flip() +
guides(fill = "none") +
labs(
title = "Libros en 12 meses por nivel educativo (grupos)",
x = NULL,
y = "Libros en 12 meses"
)
stats_actividad <- datos %>%
filter(!is.na(actividad_grupo), !is.na(libros_leidos_12m_num)) %>%
group_by(actividad_grupo) %>%
summarise(
media = mean(libros_leidos_12m_num, na.rm = TRUE),
mediana = median(libros_leidos_12m_num, na.rm = TRUE),
n = n()
)
p <- datos %>%
filter(!is.na(actividad_grupo), !is.na(libros_leidos_12m_num)) %>%
ggplot(aes(
x = fct_reorder(actividad_grupo, libros_leidos_12m_num, .fun = median, .na_rm = TRUE),
y = libros_leidos_12m_num, fill = actividad_grupo
)) +
geom_boxplot(outlier.alpha = .3)
p +
geom_text(
data = stats_actividad,
aes(x = actividad_grupo, y = mediana,
label = paste0("Mediana=", round(mediana, 1))),
vjust = -2.6,hjust = -3.0, size = 3,
inherit.aes = FALSE
) +
geom_text(
data = stats_actividad,
aes(x = actividad_grupo, y = media,
label = paste0("Media=", round(media, 1))),
vjust = 3.5,hjust = -3.0, size = 3, color = "gray20",
inherit.aes = FALSE
) +
coord_flip() +
guides(fill = "none") +
labs(
title = "Libros en 12 meses por condición de actividad (grupos)",
x = NULL, y = "Libros en 12 meses"
)
datos %>%
filter(!is.na(educacion_grupo), !is.na(libros_leidos_12m_num)) %>%
group_by(educacion_grupo) %>%
summarise(pct = mean(libros_leidos_12m_num > 0, na.rm = TRUE), n = n(), .groups = "drop") %>%
ggplot(aes(fct_reorder(educacion_grupo, pct), pct, fill = educacion_grupo)) +
geom_col() +
geom_text(aes(label = percent(pct, accuracy = 0.1)), hjust = -0.12, size = 3.8) +
coord_flip(clip = "off") +
scale_y_continuous(labels = percent_format(),
expand = expansion(mult = c(0, 0.12))) +
guides(fill = "none") +
labs(title = "% que leyó ≥1 libro por nivel educativo (grupos)",
x = NULL, y = "Porcentaje")
datos %>%
filter(!is.na(actividad_grupo), !is.na(libros_leidos_12m_num)) %>%
group_by(actividad_grupo) %>%
summarise(pct = mean(libros_leidos_12m_num > 0, na.rm = TRUE), n = n(), .groups = "drop") %>%
ggplot(aes(fct_reorder(actividad_grupo, pct), pct, fill = actividad_grupo)) +
geom_col() +
geom_text(aes(label = percent(pct, accuracy = 0.1)), hjust = -0.12, size = 3.8) +
coord_flip(clip = "off") +
scale_y_continuous(labels = percent_format(),
expand = expansion(mult = c(0, 0.12))) +
guides(fill = "none") +
labs(title = "% que leyó ≥1 libro por condición de actividad (grupos)",
x = NULL, y = "Porcentaje")
alpha <- 0.04
conf <- 1 - alpha
n_por_grupo <- datos %>%
filter(!is.na(educacion_grupo), !is.na(libros_leidos_12m_num)) %>%
count(educacion_grupo, name = "n") %>%
mutate(use_t = n < 30) %>%
arrange(n)
n_por_grupo
## educacion_grupo n use_t
## 1 Sin educación 64 FALSE
## 2 Básica 315 FALSE
## 3 Superior y Posgrado 1037 FALSE
## 4 Media 1198 FALSE
res_edu <- datos %>%
filter(!is.na(educacion_grupo), !is.na(libros_leidos_12m_num)) %>%
group_by(educacion_grupo) %>%
summarise(
n = n(),
media = mean(libros_leidos_12m_num),
sd = sd(libros_leidos_12m_num),
se = sd/sqrt(n),
t_crit = qt(1 - alpha/2,df=n-1),
IC_li = media - t_crit * se,
IC_ls = media + t_crit * se,
.groups = "drop"
)
print(res_edu)
## # A tibble: 4 × 8
## educacion_grupo n media sd se t_crit IC_li IC_ls
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Básica 315 1.76 5.21 0.293 2.06 1.15 2.36
## 2 Media 1198 2.29 4.39 0.127 2.06 2.03 2.55
## 3 Sin educación 64 0.172 0.420 0.0525 2.10 0.0618 0.282
## 4 Superior y Posgrado 1037 3.93 5.74 0.178 2.06 3.56 4.30
res_edu$educacion_grupo <- factor(res_edu$educacion_grupo,
levels = c("Sin educación", "Básica", "Media", "Superior y Posgrado")
)
ggplot(res_edu, aes(x = educacion_grupo, y = media)) +
geom_point(size=3, color="blue") +
geom_errorbar(aes(ymin=IC_li, ymax=IC_ls), width=0.2, color="darkblue") +
geom_text(aes(label = round(media, 2)),
vjust = -2.0, size = 3) +
labs(title="Promedio de libros leídos por nivel educativo (96% IC)",
x="Nivel educativo", y="Media de libros") +
theme_minimal()
n_por_grupo <- datos %>%
filter(!is.na(actividad_grupo), !is.na(libros_leidos_12m_num)) %>%
count(actividad_grupo, name = "n") %>%
mutate(use_t = n < 30) %>% # regla práctica: t si n<30
arrange(n)
n_por_grupo
## actividad_grupo n use_t
## 1 Trabaja 73 FALSE
## 2 Estudia 192 FALSE
## 3 No trabaja 661 FALSE
res_act <- datos %>%
filter(!is.na(actividad_grupo), !is.na(libros_leidos_12m_num)) %>%
group_by(actividad_grupo) %>%
summarise(
n = n(),
media = mean(libros_leidos_12m_num),
sd = sd(libros_leidos_12m_num),
se = sd/sqrt(n),
t_crit = qt(1 - alpha/2,df=n-1),
IC_li = media - t_crit * se,
IC_ls = media + t_crit * se,
.groups = "drop"
)
print(res_act)
## # A tibble: 3 × 8
## actividad_grupo n media sd se t_crit IC_li IC_ls
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Estudia 192 3.95 4.30 0.310 2.07 3.31 4.59
## 2 No trabaja 661 1.99 4.39 0.171 2.06 1.64 2.34
## 3 Trabaja 73 1.01 1.76 0.206 2.09 0.583 1.44
res_act$actividad_grupo <- factor(res_act$actividad_grupo,
levels = c("No trabaja", "Trabaja","Estudia")
)
ggplot(res_act, aes(x = actividad_grupo, y = media)) +
geom_point(size=3, color="green") +
geom_errorbar(aes(ymin=IC_li, ymax=IC_ls), width=0.2, color="darkgreen") +
geom_text(aes(label = round(media, 2)),
vjust = -2.0, size = 3) +
labs(title="Promedio de libros leídos por condición de actividad (96% IC)",
x="Condición de actividad", y="Media de libros") +
theme_minimal()
datos <- datos %>%
filter(!is.na(educacion_grupo))%>%
mutate(con_educacion = ifelse(educacion_grupo == "Sin educación",
"Sin educación", "Con educación"))
datos %>%
group_by(con_educacion) %>%
summarise(
cantidad = n(),
promedio_libros = mean(libros_leidos_12m_num, na.rm = TRUE),
desviacion = sd(libros_leidos_12m_num, na.rm = TRUE)
)
## # A tibble: 2 × 4
## con_educacion cantidad promedio_libros desviacion
## <chr> <int> <dbl> <dbl>
## 1 Con educación 2551 2.89 5.16
## 2 Sin educación 64 0.172 0.420
resultado <- t.test(libros_leidos_12m_num ~ con_educacion,
data = datos,
alternative = "greater",
conf.level=0.96, # Una cola derecha
var.equal = FALSE) # Welch (no asumimos varianzas iguales)
print(resultado)
##
## Welch Two Sample t-test
##
## data: libros_leidos_12m_num by con_educacion
## t = 23.674, df = 1065.2, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group Con educación and group Sin educación is greater than 0
## 96 percent confidence interval:
## 2.517113 Inf
## sample estimates:
## mean in group Con educación mean in group Sin educación
## 2.890196 0.171875
t_stat <- resultado$statistic
df <- resultado$parameter
alpha <- 0.04
t_crit <- qt(1 - alpha, df)
x <- seq(-25, 25, length=400)
y <- dt(x, df)
plot(x, y, type="l", lwd=2, xlab="t", ylab="Densidad",
main="Prueba t de Student (una cola derecha)\nCon educación > Sin educación")
polygon(c(x[x>t_crit], t_crit, max(x)),
c(y[x>t_crit], 0, 0), col=rgb(1,0,0,0.2))
abline(v=t_crit, col="red", lwd=2, lty=2) # Valor crítico
abline(v=t_stat, col="blue", lwd=2) # t calculado
legend("topright", legend=c("Valor crítico", "Estadístico t"),
col=c("red","blue"), lty=2:1, lwd=2)
ic_medias <- datos %>%
group_by(con_educacion) %>%
summarise(
n = n(),
media = mean(libros_leidos_12m_num, na.rm = TRUE),
sd = sd(libros_leidos_12m_num, na.rm = TRUE)
) %>%
mutate(
t_crit = qt(1 - alpha/2, df = n - 1),
error = t_crit * sd / sqrt(n),
LI = media - error,
LS = media + error
)
ggplot(ic_medias, aes(x = con_educacion, y = media)) +
geom_point(size = 3, color = "orange") +
geom_errorbar(aes(ymin = LI, ymax = LS), width = 0.2, color = "orange", lwd = 1) +
geom_text(aes(label = round(media, 2)),
vjust = -2.0, size = 3) +
labs(
title = "Intervalos de confianza (96%) de libros leídos por grupo educativo",
x = "Nivel educativo",
y = "Media de libros leídos"
) +
theme_minimal()
tabla1 <- table(datos$educacion_grupo, datos$lee_f)
tabla2 <- table(datos$actividad_grupo, datos$lee_f)
tabla1
##
## No Sí
## Básica 148 167
## Media 398 800
## Sin educación 54 10
## Superior y Posgrado 171 867
tabla2
##
## No Sí
## Estudia 24 168
## No trabaja 245 415
## Trabaja 46 27
chi1 <- chisq.test(tabla1, correct = FALSE)
chi2 <- chisq.test(tabla2, correct = FALSE)
print(chi1); print(chi2)
##
## Pearson's Chi-squared test
##
## data: tabla1
## X-squared = 231.71, df = 3, p-value < 2.2e-16
##
## Pearson's Chi-squared test
##
## data: tabla2
## X-squared = 69.746, df = 2, p-value = 7.16e-16
datos_barras <- datos %>% drop_na(educacion_grupo, actividad_grupo, lee_f)
ggplot(datos_barras, aes(x = educacion_grupo, fill = lee_f)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
title = "Proporción de lectores por nivel educativo",
x = "Nivel educativo",
y = "Proporción",
fill = "Lee"
) +
geom_text(
aes(
label = paste0(round((..count..) / tapply(..count.., ..x.., sum)[as.character(..x..)] * 100, 1), "%")
),
stat = "count", position = position_fill(vjust = 0.5), size = 3
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 20, hjust = 1))
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(datos_barras, aes(x = actividad_grupo, fill = lee_f)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
title = "Proporción de lectores por condición de actividad",
x = "Condición de actividad",
y = "Proporción",
fill = "Lee"
) +
geom_text(
aes(
label = paste0(round((..count..) / tapply(..count.., ..x.., sum)[as.character(..x..)] * 100, 1), "%")
),
stat = "count", position = position_fill(vjust = 0.5), size = 3
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 20, hjust = 1))
alpha <- 0.04
z <- qnorm(1 - alpha/2)
no_lectores <- datos %>%
filter(lee_algo == FALSE, !is.na(no_lectura_motivo), no_lectura_motivo != "pase")
ic_motivos <- no_lectores %>%
count(no_lectura_motivo, name = "x") %>%
mutate(
n = sum(x),
p = x / n,
error = z * sqrt((p * (1 - p)) / n),
LI = p - error,
LS = p + error
)
ic_motivos
## no_lectura_motivo x n p error LI LS
## 1 Falta de dinero 15 292 0.05136986 0.02653130 0.02483856 0.07790117
## 2 Falta de tiempo 78 292 0.26712329 0.05317742 0.21394587 0.32030071
## 3 Falta interés/gusto 74 292 0.25342466 0.05227778 0.20114687 0.30570244
## 4 Prefiere otras actividades 56 292 0.19178082 0.04731764 0.14446318 0.23909846
## 5 Problemas de salud 69 292 0.23630137 0.05105638 0.18524499 0.28735775
ic_motivos <- ic_motivos %>%
arrange(p) %>%
mutate(no_lectura_motivo = factor(no_lectura_motivo, levels = no_lectura_motivo))
ggplot(ic_motivos, aes(x = no_lectura_motivo, y = p)) +
geom_col(fill = "steelblue", alpha = 0.8) + # barras de proporciones
geom_errorbar(aes(ymin = LI, ymax = LS), width = 0.2, color = "black") + # IC 96%
geom_text(aes(label = scales::percent(p, accuracy = 0.1)),
vjust = -0.5, size = 3.5) +
labs(
title = "Proporción de motivos de no lectura (con IC del 96%)",
x = "Motivo principal de no lectura",
y = "Proporción de personas"
) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 30, hjust = 1),
plot.title = element_text(face = "bold")
)
tabla3 <- table(datos$educacion_grupo, datos$no_lectura_motivo)
tabla4 <- table(datos$actividad_grupo, datos$no_lectura_motivo)
tabla3
##
## Falta de dinero Falta de tiempo Falta interés/gusto pase
## Básica 4 15 14 254
## Media 10 38 37 1048
## Sin educación 1 5 7 40
## Superior y Posgrado 0 20 16 981
##
## Prefiere otras actividades Problemas de salud
## Básica 12 16
## Media 30 35
## Sin educación 3 8
## Superior y Posgrado 11 10
tabla4
##
## Falta de dinero Falta de tiempo Falta interés/gusto pase
## Estudia 0 5 2 183
## No trabaja 2 24 26 569
## Trabaja 0 11 5 53
##
## Prefiere otras actividades Problemas de salud
## Estudia 2 0
## No trabaja 19 20
## Trabaja 2 2
chi3 <- chisq.test(tabla3, correct = FALSE)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
chi4 <- chisq.test(tabla4, correct = FALSE)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
print(chi3); print(chi4)
##
## Pearson's Chi-squared test
##
## data: tabla3
## X-squared = 113.57, df = 15, p-value < 2.2e-16
##
## Pearson's Chi-squared test
##
## data: tabla4
## X-squared = 39.355, df = 10, p-value = 2.2e-05
datos_barras <- datos %>% drop_na(educacion_grupo, actividad_grupo, no_lectura_motivo)
# (a) Nivel educativo vs Lector
datos_barras %>%
filter(!is.na(educacion_grupo),
!is.na(no_lectura_motivo),
no_lectura_motivo != "pase") %>%
count(educacion_grupo, no_lectura_motivo) %>%
group_by(educacion_grupo) %>%
mutate(prop = n / sum(n)) %>%
ungroup() %>%
ggplot(aes(x = educacion_grupo, y = prop, fill = no_lectura_motivo)) +
geom_col(position = "fill") +
scale_y_continuous(labels = scales::percent) +
geom_text(aes(label = scales::percent(prop, accuracy = 1)),
position = position_fill(vjust = 0.5), size = 3) +
labs(
title = "Motivos para NO leer por nivel educativo",
x = "Nivel educativo",
y = "Proporción",
fill = "Motivo"
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 20, hjust = 1))
datos_barras %>%
filter(!is.na(actividad_grupo),
!is.na(no_lectura_motivo),
no_lectura_motivo != "pase") %>%
count(actividad_grupo, no_lectura_motivo) %>%
group_by(actividad_grupo) %>%
mutate(prop = n / sum(n)) %>%
ungroup() %>%
ggplot(aes(x = actividad_grupo, y = prop, fill = no_lectura_motivo)) +
geom_col(position = "fill") +
scale_y_continuous(labels = scales::percent) +
geom_text(aes(label = scales::percent(prop, accuracy = 1)),
position = position_fill(vjust = 0.5), size = 3) +
labs(
title = "Motivos para NO leer por condición de actividad",
x = "Condición de actividad",
y = "Proporción",
fill = "Motivo"
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 20, hjust = 1))
n_inicial <- nrow(molec)
n_final <- nrow(datos)
descartados <- n_inicial - n_final
porcentaje_descartados <- (descartados / n_inicial) * 100
cat("Porcentaje de datos descartados:", round(porcentaje_descartados, 2), "%")
## Porcentaje de datos descartados: 56.72 %