#Cargar la libreria que permite acceder al microdato
#install.packages("readxl")
library("readxl")
#cargar el microdato
eph22=read.csv("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2022/data/REG02_EPHC2022.csv", header=TRUE, sep=";")
#write.csv(eph22,"eph2022.csv")
Verificar los nombres y propiedades de los campos
#names(eph22)
#str(eph22)
#View(eph22)
explorar algunos de los campos
table(eph22$P06)
##
## 1 6
## 8579 8800
Hombres 1 Mujeres 6
eph22$P06 <- factor(eph22$P06, labels = c("Hombres", "Mujeres" ))
table(eph22$P06)
##
## Hombres Mujeres
## 8579 8800
Resultados poblacionales
#considera el factor de expansión
tabla1=aggregate(eph22$FEX,by=list(eph22$P06),FUN="sum")
tabla1
## Group.1 x
## 1 Hombres 3705758
## 2 Mujeres 3659784
AREA DE RESIDENCIA
eph22$AREA <- factor(eph22$AREA, labels = c("Urbana", "Rural" ))
table(eph22$AREA)
##
## Urbana Rural
## 9199 8180
#considera el factor de expansión
tabla2=aggregate(eph22$FEX,by=list(eph22$AREA),FUN="sum")
tabla2
## Group.1 x
## 1 Urbana 4683468
## 2 Rural 2682074
barplot(tabla2$x ~ tabla2$Group.1)
#cargar el microdato
#eph21=read.csv("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2021/data/9e824reg02_ephc2021.csv", header=TRUE, sep=";")
#write.csv(eph21,"eph2021.csv")
#eph20=read.csv("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2020/data/55f07reg02_ephc2020.csv", header=TRUE, sep=";")
#write.csv(eph20,"eph2020.csv")
#eph19=read.csv("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2019/data/4edb7reg02_ephc2019.csv", header=TRUE, sep=";")
#write.csv(eph19,"eph2019.csv")
#eph18=read.csv("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2018/data/3493ereg02_ephc2018.csv", header=TRUE, sep=";")
#write.csv(eph18,"eph2018.csv")
Compilar los microdatos en una unica base
library("readxl")
library("readr")
variables=c("P06","P02","AREA","FEX","PEAA","e01aimde","añoest")
variables
## [1] "P06" "P02" "AREA" "FEX" "PEAA" "e01aimde" "añoest"
eph2018 <- read_csv("eph2018.csv", col_select = variables)
## New names:
## • `` -> `...1`
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(variables)
##
## # Now:
## data %>% select(all_of(variables))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## Rows: 18563 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): AREA, P02, P06, PEAA, FEX, añoest
## num (1): e01aimde
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
eph2019 <- read_csv("eph2019.csv", col_select = variables)
## New names:
## Rows: 18233 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (6): AREA, P02, P06, PEAA, FEX, añoest num (1): e01aimde
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
eph2020 <- read_csv("eph2020.csv", col_select = variables)
## New names:
## Rows: 17582 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (6): AREA, P02, P06, PEAA, FEX, añoest num (1): e01aimde
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
eph2021 <- read_csv("eph2021.csv", col_select = variables)
## New names:
## Rows: 16569 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (6): AREA, P02, P06, PEAA, FEX, añoest num (1): e01aimde
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
eph2022 <- read_csv("eph2022.csv", col_select = variables)
## New names:
## Rows: 17379 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (6): AREA, P02, P06, PEAA, FEX, añoest num (1): e01aimde
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
eph2018$year<-2018
eph2019$year<-2019
eph2020$year<-2020
eph2021$year<-2021
eph2022$year<-2022
names(eph2018)
## [1] "P06" "P02" "AREA" "FEX" "PEAA" "e01aimde" "añoest"
## [8] "year"
names(eph2019)
## [1] "P06" "P02" "AREA" "FEX" "PEAA" "e01aimde" "añoest"
## [8] "year"
names(eph2020)
## [1] "P06" "P02" "AREA" "FEX" "PEAA" "e01aimde" "añoest"
## [8] "year"
names(eph2021)
## [1] "P06" "P02" "AREA" "FEX" "PEAA" "e01aimde" "añoest"
## [8] "year"
names(eph2022)
## [1] "P06" "P02" "AREA" "FEX" "PEAA" "e01aimde" "añoest"
## [8] "year"
eph=rbind(eph2018,eph2019,eph2020,eph2021,eph2022)
table(eph$P06,eph$year)
##
## 2018 2019 2020 2021 2022
## 1 9174 9131 8762 8149 8579
## 6 9389 9102 8820 8420 8800
etiquetas
eph$P06 <- factor(eph$P06, labels = c("Hombres", "Mujeres" ))
eph$AREA<- factor(eph$AREA, labels = c("Urbana", "Rural" ))
eph$PEAA<- factor(eph$PEAA, labels = c("Ocupados", "Desocupados","Inactivos" ))
table(eph$P06,eph$year)
##
## 2018 2019 2020 2021 2022
## Hombres 9174 9131 8762 8149 8579
## Mujeres 9389 9102 8820 8420 8800
table(eph$AREA,eph$year)
##
## 2018 2019 2020 2021 2022
## Urbana 10001 9870 9316 8483 9199
## Rural 8562 8363 8266 8086 8180
table(eph$PEAA,eph$year)
##
## 2018 2019 2020 2021 2022
## Ocupados 9186 9201 8732 8182 8492
## Desocupados 469 490 537 507 458
## Inactivos 5514 5295 5245 4959 5417
tabla3=aggregate(eph$FEX,by=list(eph$P06,eph$year),FUN="sum")
tabla3
## Group.1 Group.2 x
## 1 Hombres 2018 3512556
## 2 Mujeres 2018 3458673
## 3 Hombres 2019 3560800
## 4 Mujeres 2019 3508527
## 5 Hombres 2020 3608906
## 6 Mujeres 2020 3558610
## 7 Hombres 2021 3657340
## 8 Mujeres 2021 3609103
## 9 Hombres 2022 3705758
## 10 Mujeres 2022 3659784
tabla4=aggregate(eph$FEX,by=list(eph$AREA,eph$year),FUN="sum")
tabla4
## Group.1 Group.2 x
## 1 Urbana 2018 4321796
## 2 Rural 2018 2649433
## 3 Urbana 2019 4410912
## 4 Rural 2019 2658415
## 5 Urbana 2020 4500165
## 6 Rural 2020 2667351
## 7 Urbana 2021 4591760
## 8 Rural 2021 2674683
## 9 Urbana 2022 4683468
## 10 Rural 2022 2682074
tabla5=aggregate(eph$FEX,by=list(eph$PEAA,eph$year),FUN="sum")
tabla5
## Group.1 Group.2 x
## 1 Ocupados 2018 3375265
## 2 Desocupados 2018 203038
## 3 Inactivos 2018 2010226
## 4 Ocupados 2019 3478441
## 5 Desocupados 2019 208220
## 6 Inactivos 2019 1997285
## 7 Ocupados 2020 3475918
## 8 Desocupados 2020 258975
## 9 Inactivos 2020 2044881
## 10 Ocupados 2021 3540009
## 11 Desocupados 2021 255681
## 12 Inactivos 2021 2080725
## 13 Ocupados 2022 3552129
## 14 Desocupados 2022 215184
## 15 Inactivos 2022 2206323