Hipotesis de interes

El salario promedio en la ocupacion principal antes y durante el Covid es diferente - (salario deflactado por IPC)

Acceder a la base de la EPH para los años 2019 y 2020

#configurar el directorio de trabajo

setwd("~/FACEN_MScEstadTPfinal")

#Nombres de las bases de datos

#names(baseEPH19)
baseEPH19=read.csv("EPH2019.csv", sep=";",header=T,dec=",")
#names(baseEPH20)
baseEPH20=read.csv("EPH2020.csv", sep=";",header=T,dec=",")

Filtrar la base para las variables de interes

### P06

table(baseEPH19$P06)
## 
##    1    6 
## 9131 9102
### P02 - EDAD

table(baseEPH19$P02)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
## 287 295 310 326 340 366 311 344 343 325 355 331 321 354 356 369 341 362 301 322 
##  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39 
## 294 283 323 292 314 271 267 306 261 246 266 253 262 272 261 278 250 242 254 243 
##  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59 
## 256 174 204 196 194 224 160 171 169 177 204 193 167 168 203 192 194 199 139 186 
##  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79 
## 139 135 142 140 143 146 112  96 116 111  99  79 101  63  57  71  64  51  54  41 
##  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 
##  49  47  41  51  34  14  28  28  21  15  15  11   9  10  11   6   2   3   2   5 
## 100 101 106 
##   2   1   1
### e01aimde

summary(baseEPH19$e01aimde)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##         0         0         0   1041611   1505720 323372117
baseEPH19fil=subset(baseEPH19,select=c(FEX,PEAA,P02,AREA,P06,e01aimde),e01aimde>0 & e01aimde<50000000 & P02>14 & P02<70)
names(baseEPH19fil)
## [1] "FEX"      "PEAA"     "P02"      "AREA"     "P06"      "e01aimde"
baseEPH20fil=subset(baseEPH20,select=c(FEX,PEAA,P02,AREA,P06,e01aimde),e01aimde>0 & e01aimde<50000000 & P02>14 & P02<70)
names(baseEPH20fil)
## [1] "FEX"      "PEAA"     "P02"      "AREA"     "P06"      "e01aimde"
baseEPH19fil$year=rep(2019,nrow(baseEPH19fil))
table(baseEPH19fil$year)
## 
## 2019 
## 7767
baseEPH20fil$year=rep(2020,nrow(baseEPH20fil))
table(baseEPH20fil$year)
## 
## 2020 
## 7292
baseEPH19y20=rbind(baseEPH19fil,baseEPH20fil)
table(baseEPH19y20$year,baseEPH19y20$P06)
##       
##           1    6
##   2019 4663 3104
##   2020 4456 2836

Verificar las variables

str(baseEPH19y20)
## 'data.frame':    15059 obs. of  7 variables:
##  $ FEX     : int  708 504 504 504 504 504 525 525 393 393 ...
##  $ PEAA    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ P02     : int  29 51 62 30 27 22 33 32 47 45 ...
##  $ AREA    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ P06     : int  1 6 1 1 1 6 1 6 1 6 ...
##  $ e01aimde: num  2007626 3011439 2810677 3011439 3955024 ...
##  $ year    : num  2019 2019 2019 2019 2019 ...
boxplot(baseEPH19y20$e01aimde~baseEPH19y20$year)

Etiquetar las variables que lo requieran

table(baseEPH19y20$P06)
## 
##    1    6 
## 9119 5940
baseEPH19y20$P06e=factor(baseEPH19y20$P06,labels=c("Hombres","Mujeres"))
table(baseEPH19y20$P06e,baseEPH19y20$year)
##          
##           2019 2020
##   Hombres 4663 4456
##   Mujeres 3104 2836

Calculo de las estadisticas

summary(baseEPH19y20$e01aimde)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      414   803752  1800000  2228306  2684355 49675325

instalar paquetes

#install.packages("plyr")
library("plyr")
## Warning: package 'plyr' was built under R version 4.2.3
tablapromedios <- ddply(baseEPH19y20, .(year), summarize, mean = round(mean(e01aimde), 2), sd = round(sd(e01aimde), 2))

tablapromedios
##   year    mean      sd
## 1 2019 2317649 2639534
## 2 2020 2133144 2485222