true
#setwd("~/OD/OneDrive/FACEN_MScEstad/Dia1")

Verificar si el salario medio de las mujeres y hombres en el sector publico resultan iguales

 # en caso de trabajar en la version de escritorio
#base=read.csv2("https://www.ine.gov.py/datos/encuestas/eph/Poblacion/EPH-2021/data/9e824reg02_ephc2021.csv", header=TRUE,sep=";",dec = ",")
#save(base, file = "D:/OD/OneDrive/FACEN_MScEstad/Dia2/base_ephc2021.Rdata")

# en caso de trabajar en la version cloud
base=read.csv("9e824reg02_ephc2021.csv",header=T, sep=";")
save("base", file = "base_ephc2021.Rdata") 
names(base)
##   [1] "UPM"                 "NVIVI"               "NHOGA"              
##   [4] "DPTOREP"             "AREA"                "L02"                
##   [7] "P02"                 "P03"                 "P04"                
##  [10] "P04A"                "P04B"                "P05C"               
##  [13] "P05P"                "P05M"                "P06"                
##  [16] "P08D"                "P08M"                "P08A"               
##  [19] "P09"                 "P10A"                "P10AB"              
##  [22] "P10Z"                "P11A"                "P11AB"              
##  [25] "P11Z"                "P12"                 "A01"                
##  [28] "A01A"                "A02"                 "A03"                
##  [31] "A04"                 "A04B"                "A04A"               
##  [34] "A05"                 "A07"                 "A08"                
##  [37] "A10"                 "A11A"                "A11M"               
##  [40] "A11S"                "A12"                 "A13REC"             
##  [43] "A14REC"              "A15"                 "A16"                
##  [46] "A17A"                "A17M"                "A17S"               
##  [49] "A18"                 "A18A"                "B01REC"             
##  [52] "B02REC"              "B03LU"               "B03MA"              
##  [55] "B03MI"               "B03JU"               "B03VI"              
##  [58] "B03SA"               "B03DO"               "B04"                
##  [61] "B05"                 "B05A"                "B06"                
##  [64] "B07A"                "B07M"                "B07S"               
##  [67] "B08"                 "B09A"                "B09M"               
##  [70] "B09S"                "B10"                 "B11"                
##  [73] "B12"                 "B12A"                "B12B"               
##  [76] "B12C"                "B13"                 "B14"                
##  [79] "B15"                 "B16G"                "B16U"               
##  [82] "B16D"                "B16T"                "B17"                
##  [85] "B18AG"               "B18AU"               "B18BG"              
##  [88] "B18BU"               "B19"                 "B20G"               
##  [91] "B20U"                "B20D"                "B20T"               
##  [94] "B21"                 "B22"                 "B23"                
##  [97] "B24"                 "B25"                 "B26"                
## [100] "B271"                "B272"                "B28"                
## [103] "B29"                 "B30"                 "B31"                
## [106] "C01REC"              "C02REC"              "C03"                
## [109] "C04"                 "C05"                 "C06"                
## [112] "C07"                 "C08"                 "C09"                
## [115] "C101"                "C102"                "C11G"               
## [118] "C11U"                "C11D"                "C11T"               
## [121] "C12"                 "C13AG"               "C13AU"              
## [124] "C13BG"               "C13BU"               "C14"                
## [127] "C14A"                "C14B"                "C14C"               
## [130] "C15"                 "C16REC"              "C17REC"             
## [133] "C18"                 "C18A"                "C18B"               
## [136] "C19"                 "D01"                 "D02"                
## [139] "D03"                 "D04"                 "D05"                
## [142] "E01A"                "E01B"                "E01C"               
## [145] "E01D"                "E01E"                "E01F"               
## [148] "E01G"                "E01H"                "E01I"               
## [151] "E01J"                "E01K"                "E01L"               
## [154] "E01M"                "E02D1"               "E02D2"              
## [157] "E02B"                "ED01"                "ED02"               
## [160] "ED03"                "ED0504"              "ED06C"              
## [163] "ED08"                "ED09"                "ED10"               
## [166] "ED11F1"              "ED11F1A"             "ED11GH1"            
## [169] "ED11GH1A"            "ED12"                "ED13"               
## [172] "ED14"                "ED14A"               "ED15"               
## [175] "S01A"                "S01B"                "S02"                
## [178] "S03"                 "S03A"                "S03B"               
## [181] "S03C"                "S04"                 "S05"                
## [184] "S06"                 "S07"                 "S08"                
## [187] "S09"                 "CATE_PEA"            "TAMA_PEA"           
## [190] "OCUP_PEA"            "RAMA_PEA"            "HORAB"              
## [193] "HORABC"              "HORABCO"             "PEAD"               
## [196] "PEAA"                "TIPOHOGA"            "FEX"                
## [199] "NJEF"                "NCON"                "NPAD"               
## [202] "NMAD"                "TIC01"               "TIC02"              
## [205] "TIC03"               "TIC0401"             "TIC0402"            
## [208] "TIC0403"             "TIC0404"             "TIC0405"            
## [211] "TIC0406"             "TIC0407"             "TIC0408"            
## [214] "TIC0409"             "TIC0501"             "TIC0502"            
## [217] "TIC0503"             "TIC0504"             "TIC0505"            
## [220] "TIC0506"             "TIC0507"             "TIC0508"            
## [223] "TIC0509"             "TIC0510"             "TIC0511"            
## [226] "TIC0512"             "TIC0513"             "TIC06"              
## [229] "TIC07"               "añoest"              "ra06ya09"           
## [232] "e01aimde"            "e01bimde"            "e01cimde"           
## [235] "e01dde"              "e01ede"              "e01fde"             
## [238] "e01gde"              "e01hde"              "e01ide"             
## [241] "e01jde"              "e01kde"              "e01lde"             
## [244] "e01mde"              "e01kjde"             "e02bde"             
## [247] "ingrevasode"         "ingrepytyvõde"       "ingresect_privadode"
## [250] "ipcm"                "pobrezai"            "pobnopoi"           
## [253] "quintili"            "decili"              "quintiai"           
## [256] "decilai"             "informalidad"

Variables de interes en la EPH 2021

P06 sexo

# explorar y codificar las variables de interes

base$P06=factor(base$P06,labels=c("Hombres","Mujeres"))
table(base$P06)
## 
## Hombres Mujeres 
##    8149    8420

PEAA situacion de ocupacion

table(base$PEAA)
## 
##    1    2    3 
## 8182  507 4959
# explorar y codificar las variables de interes

base$PEAA=factor(base$PEAA,labels=c("Ocupados","Desocupados","Inactivos"))
table(base$PEAA)
## 
##    Ocupados Desocupados   Inactivos 
##        8182         507        4959

CATEGORIA DE OCUPACION CATE_PEA

Empleado / obrero público 1 Empleado / obrero privado 2 Empleador o patrón 3 Trabajador por cuenta propia 4 Trabajador familiar no remunerado 5 Trabajador/a doméstico/a 6 NR 9

table(base$CATE_PEA)
## 
##    1    2    3    4    5    6    9 
##  712 2939  352 3080  921  618    1
# explorar y codificar las variables de interes

base$CATE_PEA=factor(base$CATE_PEA,labels=c("Pùblicos","Privados","Patròn","Independiente","No remunerado","Domèsticos","NR"))
table(base$CATE_PEA)
## 
##      Pùblicos      Privados        Patròn Independiente No remunerado 
##           712          2939           352          3080           921 
##    Domèsticos            NR 
##           618             1

Seleccion de una submuestra

table(base$CATE_PEA,base$P06)
##                
##                 Hombres Mujeres
##   Pùblicos          320     392
##   Privados         2112     827
##   Patròn            278      74
##   Independiente    1866    1214
##   No remunerado     430     491
##   Domèsticos         38     580
##   NR                  1       0
summary(base$e01aimde)
##    Length     Class      Mode 
##     16569 character character
#tab1=aggregate(base$e01aimde,list(base$CATE_PEA,base$P06),mean)
str(base$PEAA)
##  Factor w/ 3 levels "Ocupados","Desocupados",..: 3 3 1 1 1 2 3 NA NA NA ...
table(base$PEAA)
## 
##    Ocupados Desocupados   Inactivos 
##        8182         507        4959

Filtrar solo a los ocupados con ingresos no nulos

basefil=subset(base,base$PEAA=="Ocupados" & base$CATE_PEA=="Pùblicos")
#View(basefil)
table(basefil$CATE_PEA)
## 
##      Pùblicos      Privados        Patròn Independiente No remunerado 
##           701             0             0             0             0 
##    Domèsticos            NR 
##             0             0

Ingreso en la ocupacion principal

summary(basefil$e01aimde)
##    Length     Class      Mode 
##       701 character character
tab1=aggregate(basefil$e01aimde,list(basefil$CATE_PEA,basefil$P06),mean)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
tab1
##    Group.1 Group.2  x
## 1 Pùblicos Hombres NA
## 2 Pùblicos Mujeres NA

Verificamos comparando los resultados obtenidos con los promedios estimados usando los regisgtros administrativos

#setwd("~/OD/OneDrive/FACEN_MScEstad/Dia2")
#basemh=read.csv("D:/OD/OneDrive/FACEN_MScEstad/Dia2/nomina-2021-11/nomina_2021-11.csv",sep=",",header=T, comment.char = "", strip.white = TRUE,stringsAsFactors = TRUE)
#save(basemh, file = "D:/OD/OneDrive/FACEN_MScEstad/Dia2/basemh.Rdata")
#load("D:/OD/OneDrive/FACEN_MScEstad/Dia2/basemh.Rdata")
load("basemh.Rdata")
names(basemh)
##  [1] "anio"                         "mes"                         
##  [3] "codigoNivel"                  "descripcionNivel"            
##  [5] "codigoEntidad"                "descripcionEntidad"          
##  [7] "codigoPrograma"               "descripcionPrograma"         
##  [9] "codigoSubprograma"            "descripcionSubprograma"      
## [11] "codigoProyecto"               "descripcionProyecto"         
## [13] "codigoUnidadResponsable"      "descripcionUnidadResponsable"
## [15] "codigoObjetoGasto"            "conceptoGasto"               
## [17] "fuenteFinanciamiento"         "linea"                       
## [19] "codigoPersona"                "nombres"                     
## [21] "apellidos"                    "sexo"                        
## [23] "discapacidad"                 "codigoCategoria"             
## [25] "cargo"                        "horasCatedra"                
## [27] "fechaIngreso"                 "tipoPersonal"                
## [29] "lugar"                        "montoPresupuestado"          
## [31] "montoDevengado"               "mesCorte"                    
## [33] "anioCorte"                    "fechaCorte"                  
## [35] "nivelAbr"                     "entidadAbr"                  
## [37] "programaAbr"                  "subprogramaAbr"              
## [39] "proyectoAbr"                  "unidadAbr"
#str(basemh)
#table (basemh$entidadAbr)
prop.table (table(basemh$descripcionNivel))
## 
##                         11-PODER LEGISLATIVO 
##                                 0.0057897509 
##                           12-PODER EJECUTIVO 
##                                 0.8910931222 
## 14-CONTRALOR\xcdA GENERAL DE LA REP\xdaBLICA 
##                                 0.0019155169 
##       23-ENTES AUT\xd3NOMOS Y AUT\xc1RQUICOS 
##                                 0.0049762996 
##                            13-PODER JUDICIAL 
##                                 0.0414005333 
##               15-OTROS ORGANISMOS DEL ESTADO 
##                                 0.0002830994 
##           27-ENTIDADES FINANCIERAS OFICIALES 
##                                 0.0002968867 
##                  28-UNIVERSIDADES NACIONALES 
##                                 0.0542447909
prop.table(table (basemh$tipoPersonal))
## 
##                     COM         CON         PER 
## 0.001724333 0.003234503 0.065713075 0.929328089
basemhfil=subset(basemh,select=c("codigoPersona","montoDevengado","codigoEntidad","descripcionEntidad","sexo","tipoPersonal","entidadAbr" ), montoDevengado>0 & montoDevengado<50000001 & any(tipoPersonal=="PER" | tipoPersonal=="CON" ))
str(basemhfil)
## 'data.frame':    1087768 obs. of  7 variables:
##  $ codigoPersona     : Factor w/ 255392 levels "1000000","1000019",..: 7814 80670 244442 106613 12860 142102 91808 55234 1406 67839 ...
##  $ montoDevengado    : int  2800000 3700000 5400000 9300000 2800000 4600000 2600000 5700000 6000000 2400000 ...
##  $ codigoEntidad     : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ descripcionEntidad: Factor w/ 55 levels "002-VICEPRESIDENCIA DE LA REP\xdaBLICA",..: 33 33 33 33 33 33 33 33 33 33 ...
##  $ sexo              : Factor w/ 2 levels "F","M": 1 1 2 2 2 2 1 2 2 2 ...
##  $ tipoPersonal      : Factor w/ 4 levels "","COM","CON",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ entidadAbr        : Factor w/ 55 levels "ANEAES","ANTSV",..: 27 27 27 27 27 27 27 27 27 27 ...
#table(basemhfil$descripcionEntidad)
#suma de todos los conceptos
library("plyr")
#PROCESO LENTO

#basemhfilYdesdup<-ddply(basemhfil, .(codigoPersona, sexo,tipoPersonal,entidadAbr), summarise, saltot=sum(montoDevengado))
#save(basemhfilYdesdup, file = "basemhfilYdesdup.Rdata")
#save(basemh, file = "D:/OD/OneDrive/FACEN_MScEstad/Dia2/basemhfilYdesdup.Rdata")
#load("D:/OD/OneDrive/FACEN_MScEstad/Dia2/basemhfilYdesdup.Rdata")
load("basemhfilYdesdup.Rdata")
#FPsel_desdup
summary(basemhfilYdesdup$saltot)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##     25000   3200000   5130000   5585739   6981491 116291160
#ranquing 10 mejores salarios en al FP
library(data.table)
baseorder <- basemhfilYdesdup[order(basemhfilYdesdup$saltot, decreasing = TRUE), ]  # Top N highest values by group
baseorder <- data.table(baseorder, key = "sexo")
baseorder <- baseorder[ , head(.SD, 11), by = sexo]
baseorder                                                  # Print updated data
##     sexo codigoPersona tipoPersonal entidadAbr    saltot
##  1:    F        797425          PER        MRE  91556808
##  2:    F        759897          PER        MRE  73919115
##  3:    F       1002073          PER        MRE  65197278
##  4:    F       1301934          PER        MRE  63011664
##  5:    F       1740087          PER        UNA  58152566
##  6:    F        643504          PER        MRE  54984000
##  7:    F        866548          PER        MRE  51224469
##  8:    F       2877710          PER        MRE  50730654
##  9:    F       1263776          PER        MRE  50528113
## 10:    F       1192965          PER        MRE  49554330
## 11:    F       2292997          PER        MRE  48304061
## 12:    M       1004539          PER        MRE 116291160
## 13:    M       2321703          PER        MRE  94558734
## 14:    M        678245          PER        MRE  81993968
## 15:    M       3658821          PER        MRE  76489617
## 16:    M        854193          PER        MRE  75603000
## 17:    M        824495          PER        MRE  70228314
## 18:    M        999359          PER        MRE  69417300
## 19:    M        752011          PER        MAG  68767178
## 20:    M       1122551          PER        MRE  67602828
## 21:    M       1040348          PER        MRE  66001419
## 22:    M        993329          PER        MRE  65980800
##     sexo codigoPersona tipoPersonal entidadAbr    saltot
boxplot(basemhfilYdesdup$saltot ~ basemhfilYdesdup$sexo + basemhfilYdesdup$tipoPersonal)

# Salarios medios
tabsalmed=aggregate(basemhfilYdesdup$saltot,list(basemhfilYdesdup$sexo ,basemhfilYdesdup$tipoPersonal),mean)
tabsalmed
##   Group.1 Group.2       x
## 1       F         1712275
## 2       M         2015482
## 3       F     COM 2433331
## 4       M     COM 1844975
## 5       F     CON 4137064
## 6       M     CON 4214396
## 7       F     PER 5822918
## 8       M     PER 6052574
tabsalcount=aggregate(basemhfilYdesdup$saltot,list(basemhfilYdesdup$sexo ,basemhfilYdesdup$tipoPersonal),length)
tabsalcount
##   Group.1 Group.2      x
## 1       F            311
## 2       M            653
## 3       F     COM    529
## 4       M     COM   2036
## 5       F     CON  28069
## 6       M     CON  17776
## 7       F     PER 112688
## 8       M     PER 109530
barplot(table(basemhfil$tipoPersonal,basemhfil$sexo))

#ranquing entidades con mejores salarios promedios
library(data.table)

tabsalmedenti=aggregate(basemhfilYdesdup$saltot,list(basemhfilYdesdup$entidadAbr),mean)
tabsalmedenti
##     Group.1        x
## 1    ANEAES  5316432
## 2     ANTSV  5820687
## 3      ARRN  8467087
## 4       CAH  5020433
## 5       CGR  9397104
## 6        CM  6463278
## 7        CN  6938223
## 8       CNV  7715010
## 9   CONACOM 10014621
## 10    CONES  5220558
## 11      CSJ  5265166
## 12    DIBEN  7014857
## 13 DINACOPA  3645817
## 14   DINAPI  4475428
## 15       DP  4145913
## 16   ERSSAN  8710000
## 17   FONDEC  6196420
## 18      HCD  7129069
## 19      HCS  8615710
## 20   INCOOP  6194309
## 21   INDERT  6348662
## 22     INDI  8840491
## 23   INFONA  4769778
## 24     INTN  4481768
## 25      IPA  5820992
## 26     IPTA  5009311
## 27       JE  4186092
## 28      JEM  8543182
## 29      MAG  4645881
## 30      MDN  5344299
## 31      MDP  6077744
## 32      MEC  5910132
## 33       MH  6877361
## 34       MI  6029024
## 35      MIC  7639034
## 36       MJ  3951414
## 37       MM  4903271
## 38     MNPT 10932819
## 39     MOPC  4622488
## 40       MP  6889804
## 41      MRE 14779353
## 42   MSPYBS  5158047
## 43    MTESS  4182931
## 44       PR  6068721
## 45   SEDECO  5293580
## 46  SENACSA  6186697
## 47      UNA  5140841
## 48      UNC  4341667
## 49     UNCA  3694196
## 50   UNCANI  5522952
## 51      UNE  4649771
## 52      UNI  3644561
## 53      UNP  4555630
## 54    UNVES  2440415
## 55      VPR  3308999
baseorder <- tabsalmedenti[order(tabsalmedenti$x, decreasing = TRUE), ]  # Top N highest values by group
baseorder                                            # Print updated data
##     Group.1        x
## 41      MRE 14779353
## 38     MNPT 10932819
## 9   CONACOM 10014621
## 5       CGR  9397104
## 22     INDI  8840491
## 16   ERSSAN  8710000
## 19      HCS  8615710
## 28      JEM  8543182
## 3      ARRN  8467087
## 8       CNV  7715010
## 35      MIC  7639034
## 18      HCD  7129069
## 12    DIBEN  7014857
## 7        CN  6938223
## 40       MP  6889804
## 33       MH  6877361
## 6        CM  6463278
## 21   INDERT  6348662
## 17   FONDEC  6196420
## 20   INCOOP  6194309
## 46  SENACSA  6186697
## 31      MDP  6077744
## 44       PR  6068721
## 34       MI  6029024
## 32      MEC  5910132
## 25      IPA  5820992
## 2     ANTSV  5820687
## 50   UNCANI  5522952
## 30      MDN  5344299
## 1    ANEAES  5316432
## 45   SEDECO  5293580
## 11      CSJ  5265166
## 10    CONES  5220558
## 42   MSPYBS  5158047
## 47      UNA  5140841
## 4       CAH  5020433
## 26     IPTA  5009311
## 37       MM  4903271
## 23   INFONA  4769778
## 51      UNE  4649771
## 29      MAG  4645881
## 39     MOPC  4622488
## 53      UNP  4555630
## 24     INTN  4481768
## 14   DINAPI  4475428
## 48      UNC  4341667
## 27       JE  4186092
## 43    MTESS  4182931
## 15       DP  4145913
## 36       MJ  3951414
## 49     UNCA  3694196
## 13 DINACOPA  3645817
## 52      UNI  3644561
## 55      VPR  3308999
## 54    UNVES  2440415