Practica de uso de posit.cloud para la gestión y generación de reportes

Microdato de la Encuestra de Hogares publicada por el INE

#Cargar la libreria que permite acceder al microdato
#install.packages("readxl")
library("readxl")
#cargar el microdato

eph22=read.csv("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2022/data/REG02_EPHC2022.csv", header=TRUE, sep=";")
#write.csv(eph22,"eph2022.csv")

Verificar los nombres y propiedades de los campos

#names(eph22)
#str(eph22)
#View(eph22)

explorar algunos de los campos

table(eph22$P06)
## 
##    1    6 
## 8579 8800

Hombres 1 Mujeres 6

eph22$P06 <- factor(eph22$P06, labels = c("Hombres", "Mujeres" ))
table(eph22$P06)
## 
## Hombres Mujeres 
##    8579    8800

Resultados poblacionales

#considera el factor de expansión
tabla1=aggregate(eph22$FEX,by=list(eph22$P06),FUN="sum")
tabla1
##   Group.1       x
## 1 Hombres 3705758
## 2 Mujeres 3659784

AREA DE RESIDENCIA

eph22$AREA <- factor(eph22$AREA, labels = c("Urbana", "Rural" ))
table(eph22$AREA)
## 
## Urbana  Rural 
##   9199   8180
#considera el factor de expansión
tabla2=aggregate(eph22$FEX,by=list(eph22$AREA),FUN="sum")
tabla2
##   Group.1       x
## 1  Urbana 4683468
## 2   Rural 2682074
barplot(tabla2$x ~ tabla2$Group.1)

Incorporar las bases de los años anteriores

#cargar el microdato

#eph21=read.csv("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2021/data/9e824reg02_ephc2021.csv", header=TRUE, sep=";")
#write.csv(eph21,"eph2021.csv")
#eph20=read.csv("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2020/data/55f07reg02_ephc2020.csv", header=TRUE, sep=";")
#write.csv(eph20,"eph2020.csv")
#eph19=read.csv("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2019/data/4edb7reg02_ephc2019.csv", header=TRUE, sep=";")
#write.csv(eph19,"eph2019.csv")
#eph18=read.csv("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2018/data/3493ereg02_ephc2018.csv", header=TRUE, sep=";")
#write.csv(eph18,"eph2018.csv")

Compilar los microdatos en una unica base

library("readxl")
library("readr")

variables=c("P06","P02","AREA","FEX","PEAA","e01aimde","añoest")
variables
## [1] "P06"      "P02"      "AREA"     "FEX"      "PEAA"     "e01aimde" "añoest"
eph2018 <- read_csv("eph2018.csv", col_select = variables)
## New names:
## • `` -> `...1`
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(variables)
## 
##   # Now:
##   data %>% select(all_of(variables))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## Rows: 18563 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): AREA, P02, P06, PEAA, FEX, añoest
## num (1): e01aimde
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
eph2019 <- read_csv("eph2019.csv", col_select = variables)
## New names:
## Rows: 18233 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (6): AREA, P02, P06, PEAA, FEX, añoest num (1): e01aimde
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
eph2020 <- read_csv("eph2020.csv", col_select = variables)
## New names:
## Rows: 17582 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (6): AREA, P02, P06, PEAA, FEX, añoest num (1): e01aimde
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
eph2021 <- read_csv("eph2021.csv", col_select = variables)
## New names:
## Rows: 16569 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (6): AREA, P02, P06, PEAA, FEX, añoest num (1): e01aimde
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
eph2022 <- read_csv("eph2022.csv", col_select = variables)
## New names:
## Rows: 17379 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (6): AREA, P02, P06, PEAA, FEX, añoest num (1): e01aimde
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
eph2018$year<-2018
eph2019$year<-2019
eph2020$year<-2020
eph2021$year<-2021
eph2022$year<-2022
names(eph2018)
## [1] "P06"      "P02"      "AREA"     "FEX"      "PEAA"     "e01aimde" "añoest"  
## [8] "year"
names(eph2019)
## [1] "P06"      "P02"      "AREA"     "FEX"      "PEAA"     "e01aimde" "añoest"  
## [8] "year"
names(eph2020)
## [1] "P06"      "P02"      "AREA"     "FEX"      "PEAA"     "e01aimde" "añoest"  
## [8] "year"
names(eph2021)
## [1] "P06"      "P02"      "AREA"     "FEX"      "PEAA"     "e01aimde" "añoest"  
## [8] "year"
names(eph2022)
## [1] "P06"      "P02"      "AREA"     "FEX"      "PEAA"     "e01aimde" "añoest"  
## [8] "year"
eph=rbind(eph2018,eph2019,eph2020,eph2021,eph2022)
table(eph$P06,eph$year)
##    
##     2018 2019 2020 2021 2022
##   1 9174 9131 8762 8149 8579
##   6 9389 9102 8820 8420 8800

etiquetas

eph$P06 <- factor(eph$P06, labels = c("Hombres", "Mujeres" ))
eph$AREA<- factor(eph$AREA, labels = c("Urbana", "Rural" ))
eph$PEAA<- factor(eph$PEAA, labels = c("Ocupados", "Desocupados","Inactivos" ))
table(eph$P06,eph$year)
##          
##           2018 2019 2020 2021 2022
##   Hombres 9174 9131 8762 8149 8579
##   Mujeres 9389 9102 8820 8420 8800
table(eph$AREA,eph$year)
##         
##           2018  2019  2020  2021  2022
##   Urbana 10001  9870  9316  8483  9199
##   Rural   8562  8363  8266  8086  8180
table(eph$PEAA,eph$year)
##              
##               2018 2019 2020 2021 2022
##   Ocupados    9186 9201 8732 8182 8492
##   Desocupados  469  490  537  507  458
##   Inactivos   5514 5295 5245 4959 5417
tabla3=aggregate(eph$FEX,by=list(eph$P06,eph$year),FUN="sum")
tabla3
##    Group.1 Group.2       x
## 1  Hombres    2018 3512556
## 2  Mujeres    2018 3458673
## 3  Hombres    2019 3560800
## 4  Mujeres    2019 3508527
## 5  Hombres    2020 3608906
## 6  Mujeres    2020 3558610
## 7  Hombres    2021 3657340
## 8  Mujeres    2021 3609103
## 9  Hombres    2022 3705758
## 10 Mujeres    2022 3659784
tabla4=aggregate(eph$FEX,by=list(eph$AREA,eph$year),FUN="sum")
tabla4
##    Group.1 Group.2       x
## 1   Urbana    2018 4321796
## 2    Rural    2018 2649433
## 3   Urbana    2019 4410912
## 4    Rural    2019 2658415
## 5   Urbana    2020 4500165
## 6    Rural    2020 2667351
## 7   Urbana    2021 4591760
## 8    Rural    2021 2674683
## 9   Urbana    2022 4683468
## 10   Rural    2022 2682074
tabla5=aggregate(eph$FEX,by=list(eph$PEAA,eph$year),FUN="sum")
tabla5
##        Group.1 Group.2       x
## 1     Ocupados    2018 3375265
## 2  Desocupados    2018  203038
## 3    Inactivos    2018 2010226
## 4     Ocupados    2019 3478441
## 5  Desocupados    2019  208220
## 6    Inactivos    2019 1997285
## 7     Ocupados    2020 3475918
## 8  Desocupados    2020  258975
## 9    Inactivos    2020 2044881
## 10    Ocupados    2021 3540009
## 11 Desocupados    2021  255681
## 12   Inactivos    2021 2080725
## 13    Ocupados    2022 3552129
## 14 Desocupados    2022  215184
## 15   Inactivos    2022 2206323