#setwd("~/OD/OneDrive/FACEN_MScEstad/Dia1")
# en caso de trabajar en la version de escritorio
#base=read.csv2("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2021/data/9e824reg02_ephc2021.csv", header=TRUE,sep=";",dec = ",")
#save(base, file = "D:/OD/OneDrive/FACEN_MScEstad/Dia2/base_ephc2021.Rdata")
# en caso de trabajar en la version cloud
base=read.csv("9e824reg02_ephc2021.csv",header=T, sep=";")
save("base", file = "base_ephc2021.Rdata")
names(base)
## [1] "UPM" "NVIVI" "NHOGA"
## [4] "DPTOREP" "AREA" "L02"
## [7] "P02" "P03" "P04"
## [10] "P04A" "P04B" "P05C"
## [13] "P05P" "P05M" "P06"
## [16] "P08D" "P08M" "P08A"
## [19] "P09" "P10A" "P10AB"
## [22] "P10Z" "P11A" "P11AB"
## [25] "P11Z" "P12" "A01"
## [28] "A01A" "A02" "A03"
## [31] "A04" "A04B" "A04A"
## [34] "A05" "A07" "A08"
## [37] "A10" "A11A" "A11M"
## [40] "A11S" "A12" "A13REC"
## [43] "A14REC" "A15" "A16"
## [46] "A17A" "A17M" "A17S"
## [49] "A18" "A18A" "B01REC"
## [52] "B02REC" "B03LU" "B03MA"
## [55] "B03MI" "B03JU" "B03VI"
## [58] "B03SA" "B03DO" "B04"
## [61] "B05" "B05A" "B06"
## [64] "B07A" "B07M" "B07S"
## [67] "B08" "B09A" "B09M"
## [70] "B09S" "B10" "B11"
## [73] "B12" "B12A" "B12B"
## [76] "B12C" "B13" "B14"
## [79] "B15" "B16G" "B16U"
## [82] "B16D" "B16T" "B17"
## [85] "B18AG" "B18AU" "B18BG"
## [88] "B18BU" "B19" "B20G"
## [91] "B20U" "B20D" "B20T"
## [94] "B21" "B22" "B23"
## [97] "B24" "B25" "B26"
## [100] "B271" "B272" "B28"
## [103] "B29" "B30" "B31"
## [106] "C01REC" "C02REC" "C03"
## [109] "C04" "C05" "C06"
## [112] "C07" "C08" "C09"
## [115] "C101" "C102" "C11G"
## [118] "C11U" "C11D" "C11T"
## [121] "C12" "C13AG" "C13AU"
## [124] "C13BG" "C13BU" "C14"
## [127] "C14A" "C14B" "C14C"
## [130] "C15" "C16REC" "C17REC"
## [133] "C18" "C18A" "C18B"
## [136] "C19" "D01" "D02"
## [139] "D03" "D04" "D05"
## [142] "E01A" "E01B" "E01C"
## [145] "E01D" "E01E" "E01F"
## [148] "E01G" "E01H" "E01I"
## [151] "E01J" "E01K" "E01L"
## [154] "E01M" "E02D1" "E02D2"
## [157] "E02B" "ED01" "ED02"
## [160] "ED03" "ED0504" "ED06C"
## [163] "ED08" "ED09" "ED10"
## [166] "ED11F1" "ED11F1A" "ED11GH1"
## [169] "ED11GH1A" "ED12" "ED13"
## [172] "ED14" "ED14A" "ED15"
## [175] "S01A" "S01B" "S02"
## [178] "S03" "S03A" "S03B"
## [181] "S03C" "S04" "S05"
## [184] "S06" "S07" "S08"
## [187] "S09" "CATE_PEA" "TAMA_PEA"
## [190] "OCUP_PEA" "RAMA_PEA" "HORAB"
## [193] "HORABC" "HORABCO" "PEAD"
## [196] "PEAA" "TIPOHOGA" "FEX"
## [199] "NJEF" "NCON" "NPAD"
## [202] "NMAD" "TIC01" "TIC02"
## [205] "TIC03" "TIC0401" "TIC0402"
## [208] "TIC0403" "TIC0404" "TIC0405"
## [211] "TIC0406" "TIC0407" "TIC0408"
## [214] "TIC0409" "TIC0501" "TIC0502"
## [217] "TIC0503" "TIC0504" "TIC0505"
## [220] "TIC0506" "TIC0507" "TIC0508"
## [223] "TIC0509" "TIC0510" "TIC0511"
## [226] "TIC0512" "TIC0513" "TIC06"
## [229] "TIC07" "añoest" "ra06ya09"
## [232] "e01aimde" "e01bimde" "e01cimde"
## [235] "e01dde" "e01ede" "e01fde"
## [238] "e01gde" "e01hde" "e01ide"
## [241] "e01jde" "e01kde" "e01lde"
## [244] "e01mde" "e01kjde" "e02bde"
## [247] "ingrevasode" "ingrepytyvõde" "ingresect_privadode"
## [250] "ipcm" "pobrezai" "pobnopoi"
## [253] "quintili" "decili" "quintiai"
## [256] "decilai" "informalidad"
# explorar y codificar las variables de interes
base$P06=factor(base$P06,labels=c("Hombres","Mujeres"))
table(base$P06)
##
## Hombres Mujeres
## 8149 8420
table(base$PEAA)
##
## 1 2 3
## 8182 507 4959
# explorar y codificar las variables de interes
base$PEAA=factor(base$PEAA,labels=c("Ocupados","Desocupados","Inactivos"))
table(base$PEAA)
##
## Ocupados Desocupados Inactivos
## 8182 507 4959
Empleado / obrero público 1 Empleado / obrero privado 2 Empleador o patrón 3 Trabajador por cuenta propia 4 Trabajador familiar no remunerado 5 Trabajador/a doméstico/a 6 NR 9
table(base$CATE_PEA)
##
## 1 2 3 4 5 6 9
## 712 2939 352 3080 921 618 1
# explorar y codificar las variables de interes
base$CATE_PEA=factor(base$CATE_PEA,labels=c("Pùblicos","Privados","Patròn","Independiente","No remunerado","Domèsticos","NR"))
table(base$CATE_PEA)
##
## Pùblicos Privados Patròn Independiente No remunerado
## 712 2939 352 3080 921
## Domèsticos NR
## 618 1
table(base$CATE_PEA,base$P06)
##
## Hombres Mujeres
## Pùblicos 320 392
## Privados 2112 827
## Patròn 278 74
## Independiente 1866 1214
## No remunerado 430 491
## Domèsticos 38 580
## NR 1 0
summary(base$e01aimde)
## Length Class Mode
## 16569 character character
#tab1=aggregate(base$e01aimde,list(base$CATE_PEA,base$P06),mean)
str(base$PEAA)
## Factor w/ 3 levels "Ocupados","Desocupados",..: 3 3 1 1 1 2 3 NA NA NA ...
table(base$PEAA)
##
## Ocupados Desocupados Inactivos
## 8182 507 4959
basefil=subset(base,base$PEAA=="Ocupados" & base$CATE_PEA=="Pùblicos")
#View(basefil)
table(basefil$CATE_PEA)
##
## Pùblicos Privados Patròn Independiente No remunerado
## 701 0 0 0 0
## Domèsticos NR
## 0 0
summary(basefil$e01aimde)
## Length Class Mode
## 701 character character
tab1=aggregate(basefil$e01aimde,list(basefil$CATE_PEA,basefil$P06),mean)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
tab1
## Group.1 Group.2 x
## 1 Pùblicos Hombres NA
## 2 Pùblicos Mujeres NA
#setwd("~/OD/OneDrive/FACEN_MScEstad/Dia2")
#basemh=read.csv("D:/OD/OneDrive/FACEN_MScEstad/Dia2/nomina-2021-11/nomina_2021-11.csv",sep=",",header=T, comment.char = "", strip.white = TRUE,stringsAsFactors = TRUE)
#save(basemh, file = "D:/OD/OneDrive/FACEN_MScEstad/Dia2/basemh.Rdata")
#load("D:/OD/OneDrive/FACEN_MScEstad/Dia2/basemh.Rdata")
load("basemh.Rdata")
names(basemh)
## [1] "anio" "mes"
## [3] "codigoNivel" "descripcionNivel"
## [5] "codigoEntidad" "descripcionEntidad"
## [7] "codigoPrograma" "descripcionPrograma"
## [9] "codigoSubprograma" "descripcionSubprograma"
## [11] "codigoProyecto" "descripcionProyecto"
## [13] "codigoUnidadResponsable" "descripcionUnidadResponsable"
## [15] "codigoObjetoGasto" "conceptoGasto"
## [17] "fuenteFinanciamiento" "linea"
## [19] "codigoPersona" "nombres"
## [21] "apellidos" "sexo"
## [23] "discapacidad" "codigoCategoria"
## [25] "cargo" "horasCatedra"
## [27] "fechaIngreso" "tipoPersonal"
## [29] "lugar" "montoPresupuestado"
## [31] "montoDevengado" "mesCorte"
## [33] "anioCorte" "fechaCorte"
## [35] "nivelAbr" "entidadAbr"
## [37] "programaAbr" "subprogramaAbr"
## [39] "proyectoAbr" "unidadAbr"
#str(basemh)
#table (basemh$entidadAbr)
prop.table (table(basemh$descripcionNivel))
##
## 11-PODER LEGISLATIVO
## 0.0057897509
## 12-PODER EJECUTIVO
## 0.8910931222
## 14-CONTRALOR\xcdA GENERAL DE LA REP\xdaBLICA
## 0.0019155169
## 23-ENTES AUT\xd3NOMOS Y AUT\xc1RQUICOS
## 0.0049762996
## 13-PODER JUDICIAL
## 0.0414005333
## 15-OTROS ORGANISMOS DEL ESTADO
## 0.0002830994
## 27-ENTIDADES FINANCIERAS OFICIALES
## 0.0002968867
## 28-UNIVERSIDADES NACIONALES
## 0.0542447909
prop.table(table (basemh$tipoPersonal))
##
## COM CON PER
## 0.001724333 0.003234503 0.065713075 0.929328089
basemhfil=subset(basemh,select=c("codigoPersona","montoDevengado","codigoEntidad","descripcionEntidad","sexo","tipoPersonal","entidadAbr" ), montoDevengado>0 & montoDevengado<50000001 & any(tipoPersonal=="PER" | tipoPersonal=="CON" ))
str(basemhfil)
## 'data.frame': 1087768 obs. of 7 variables:
## $ codigoPersona : Factor w/ 255392 levels "1000000","1000019",..: 7814 80670 244442 106613 12860 142102 91808 55234 1406 67839 ...
## $ montoDevengado : int 2800000 3700000 5400000 9300000 2800000 4600000 2600000 5700000 6000000 2400000 ...
## $ codigoEntidad : int 2 2 2 2 2 2 2 2 2 2 ...
## $ descripcionEntidad: Factor w/ 55 levels "002-VICEPRESIDENCIA DE LA REP\xdaBLICA",..: 33 33 33 33 33 33 33 33 33 33 ...
## $ sexo : Factor w/ 2 levels "F","M": 1 1 2 2 2 2 1 2 2 2 ...
## $ tipoPersonal : Factor w/ 4 levels "","COM","CON",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ entidadAbr : Factor w/ 55 levels "ANEAES","ANTSV",..: 27 27 27 27 27 27 27 27 27 27 ...
#table(basemhfil$descripcionEntidad)
#suma de todos los conceptos
library("plyr")
#PROCESO LENTO
#basemhfilYdesdup<-ddply(basemhfil, .(codigoPersona, sexo,tipoPersonal,entidadAbr), summarise, saltot=sum(montoDevengado))
#save(basemhfilYdesdup, file = "basemhfilYdesdup.Rdata")
#save(basemh, file = "D:/OD/OneDrive/FACEN_MScEstad/Dia2/basemhfilYdesdup.Rdata")
#load("D:/OD/OneDrive/FACEN_MScEstad/Dia2/basemhfilYdesdup.Rdata")
load("basemhfilYdesdup.Rdata")
#FPsel_desdup
summary(basemhfilYdesdup$saltot)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25000 3200000 5130000 5585739 6981491 116291160
#ranquing 10 mejores salarios en al FP
library(data.table)
baseorder <- basemhfilYdesdup[order(basemhfilYdesdup$saltot, decreasing = TRUE), ] # Top N highest values by group
baseorder <- data.table(baseorder, key = "sexo")
baseorder <- baseorder[ , head(.SD, 11), by = sexo]
baseorder # Print updated data
## sexo codigoPersona tipoPersonal entidadAbr saltot
## 1: F 797425 PER MRE 91556808
## 2: F 759897 PER MRE 73919115
## 3: F 1002073 PER MRE 65197278
## 4: F 1301934 PER MRE 63011664
## 5: F 1740087 PER UNA 58152566
## 6: F 643504 PER MRE 54984000
## 7: F 866548 PER MRE 51224469
## 8: F 2877710 PER MRE 50730654
## 9: F 1263776 PER MRE 50528113
## 10: F 1192965 PER MRE 49554330
## 11: F 2292997 PER MRE 48304061
## 12: M 1004539 PER MRE 116291160
## 13: M 2321703 PER MRE 94558734
## 14: M 678245 PER MRE 81993968
## 15: M 3658821 PER MRE 76489617
## 16: M 854193 PER MRE 75603000
## 17: M 824495 PER MRE 70228314
## 18: M 999359 PER MRE 69417300
## 19: M 752011 PER MAG 68767178
## 20: M 1122551 PER MRE 67602828
## 21: M 1040348 PER MRE 66001419
## 22: M 993329 PER MRE 65980800
## sexo codigoPersona tipoPersonal entidadAbr saltot
boxplot(basemhfilYdesdup$saltot ~ basemhfilYdesdup$sexo + basemhfilYdesdup$tipoPersonal)
# Salarios medios
tabsalmed=aggregate(basemhfilYdesdup$saltot,list(basemhfilYdesdup$sexo ,basemhfilYdesdup$tipoPersonal),mean)
tabsalmed
## Group.1 Group.2 x
## 1 F 1712275
## 2 M 2015482
## 3 F COM 2433331
## 4 M COM 1844975
## 5 F CON 4137064
## 6 M CON 4214396
## 7 F PER 5822918
## 8 M PER 6052574
tabsalcount=aggregate(basemhfilYdesdup$saltot,list(basemhfilYdesdup$sexo ,basemhfilYdesdup$tipoPersonal),length)
tabsalcount
## Group.1 Group.2 x
## 1 F 311
## 2 M 653
## 3 F COM 529
## 4 M COM 2036
## 5 F CON 28069
## 6 M CON 17776
## 7 F PER 112688
## 8 M PER 109530
barplot(table(basemhfil$tipoPersonal,basemhfil$sexo))
#ranquing entidades con mejores salarios promedios
library(data.table)
tabsalmedenti=aggregate(basemhfilYdesdup$saltot,list(basemhfilYdesdup$entidadAbr),mean)
tabsalmedenti
## Group.1 x
## 1 ANEAES 5316432
## 2 ANTSV 5820687
## 3 ARRN 8467087
## 4 CAH 5020433
## 5 CGR 9397104
## 6 CM 6463278
## 7 CN 6938223
## 8 CNV 7715010
## 9 CONACOM 10014621
## 10 CONES 5220558
## 11 CSJ 5265166
## 12 DIBEN 7014857
## 13 DINACOPA 3645817
## 14 DINAPI 4475428
## 15 DP 4145913
## 16 ERSSAN 8710000
## 17 FONDEC 6196420
## 18 HCD 7129069
## 19 HCS 8615710
## 20 INCOOP 6194309
## 21 INDERT 6348662
## 22 INDI 8840491
## 23 INFONA 4769778
## 24 INTN 4481768
## 25 IPA 5820992
## 26 IPTA 5009311
## 27 JE 4186092
## 28 JEM 8543182
## 29 MAG 4645881
## 30 MDN 5344299
## 31 MDP 6077744
## 32 MEC 5910132
## 33 MH 6877361
## 34 MI 6029024
## 35 MIC 7639034
## 36 MJ 3951414
## 37 MM 4903271
## 38 MNPT 10932819
## 39 MOPC 4622488
## 40 MP 6889804
## 41 MRE 14779353
## 42 MSPYBS 5158047
## 43 MTESS 4182931
## 44 PR 6068721
## 45 SEDECO 5293580
## 46 SENACSA 6186697
## 47 UNA 5140841
## 48 UNC 4341667
## 49 UNCA 3694196
## 50 UNCANI 5522952
## 51 UNE 4649771
## 52 UNI 3644561
## 53 UNP 4555630
## 54 UNVES 2440415
## 55 VPR 3308999
baseorder <- tabsalmedenti[order(tabsalmedenti$x, decreasing = TRUE), ] # Top N highest values by group
baseorder # Print updated data
## Group.1 x
## 41 MRE 14779353
## 38 MNPT 10932819
## 9 CONACOM 10014621
## 5 CGR 9397104
## 22 INDI 8840491
## 16 ERSSAN 8710000
## 19 HCS 8615710
## 28 JEM 8543182
## 3 ARRN 8467087
## 8 CNV 7715010
## 35 MIC 7639034
## 18 HCD 7129069
## 12 DIBEN 7014857
## 7 CN 6938223
## 40 MP 6889804
## 33 MH 6877361
## 6 CM 6463278
## 21 INDERT 6348662
## 17 FONDEC 6196420
## 20 INCOOP 6194309
## 46 SENACSA 6186697
## 31 MDP 6077744
## 44 PR 6068721
## 34 MI 6029024
## 32 MEC 5910132
## 25 IPA 5820992
## 2 ANTSV 5820687
## 50 UNCANI 5522952
## 30 MDN 5344299
## 1 ANEAES 5316432
## 45 SEDECO 5293580
## 11 CSJ 5265166
## 10 CONES 5220558
## 42 MSPYBS 5158047
## 47 UNA 5140841
## 4 CAH 5020433
## 26 IPTA 5009311
## 37 MM 4903271
## 23 INFONA 4769778
## 51 UNE 4649771
## 29 MAG 4645881
## 39 MOPC 4622488
## 53 UNP 4555630
## 24 INTN 4481768
## 14 DINAPI 4475428
## 48 UNC 4341667
## 27 JE 4186092
## 43 MTESS 4182931
## 15 DP 4145913
## 36 MJ 3951414
## 49 UNCA 3694196
## 13 DINACOPA 3645817
## 52 UNI 3644561
## 55 VPR 3308999
## 54 UNVES 2440415