El salario promedio en la ocupacion principal antes y durante el Covid es diferente - (salario deflactado por IPC)
#configurar el directorio de trabajo
setwd("~/FACEN_MScEstadTPfinal")
#Nombres de las bases de datos
#names(baseEPH19)
baseEPH19=read.csv("EPH2019.csv", sep=";",header=T,dec=",")
#names(baseEPH20)
baseEPH20=read.csv("EPH2020.csv", sep=";",header=T,dec=",")
### P06
table(baseEPH19$P06)
##
## 1 6
## 9131 9102
### P02 - EDAD
table(baseEPH19$P02)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 287 295 310 326 340 366 311 344 343 325 355 331 321 354 356 369 341 362 301 322
## 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
## 294 283 323 292 314 271 267 306 261 246 266 253 262 272 261 278 250 242 254 243
## 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
## 256 174 204 196 194 224 160 171 169 177 204 193 167 168 203 192 194 199 139 186
## 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
## 139 135 142 140 143 146 112 96 116 111 99 79 101 63 57 71 64 51 54 41
## 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
## 49 47 41 51 34 14 28 28 21 15 15 11 9 10 11 6 2 3 2 5
## 100 101 106
## 2 1 1
### e01aimde
summary(baseEPH19$e01aimde)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 1041611 1505720 323372117
baseEPH19fil=subset(baseEPH19,select=c(FEX,PEAA,P02,AREA,P06,e01aimde),e01aimde>0 & e01aimde<50000000 & P02>14 & P02<70)
names(baseEPH19fil)
## [1] "FEX" "PEAA" "P02" "AREA" "P06" "e01aimde"
baseEPH20fil=subset(baseEPH20,select=c(FEX,PEAA,P02,AREA,P06,e01aimde),e01aimde>0 & e01aimde<50000000 & P02>14 & P02<70)
names(baseEPH20fil)
## [1] "FEX" "PEAA" "P02" "AREA" "P06" "e01aimde"
baseEPH19fil$year=rep(2019,nrow(baseEPH19fil))
table(baseEPH19fil$year)
##
## 2019
## 7767
baseEPH20fil$year=rep(2020,nrow(baseEPH20fil))
table(baseEPH20fil$year)
##
## 2020
## 7292
baseEPH19y20=rbind(baseEPH19fil,baseEPH20fil)
table(baseEPH19y20$year,baseEPH19y20$P06)
##
## 1 6
## 2019 4663 3104
## 2020 4456 2836
str(baseEPH19y20)
## 'data.frame': 15059 obs. of 7 variables:
## $ FEX : int 708 504 504 504 504 504 525 525 393 393 ...
## $ PEAA : int 1 1 1 1 1 1 1 1 1 1 ...
## $ P02 : int 29 51 62 30 27 22 33 32 47 45 ...
## $ AREA : int 1 1 1 1 1 1 1 1 1 1 ...
## $ P06 : int 1 6 1 1 1 6 1 6 1 6 ...
## $ e01aimde: num 2007626 3011439 2810677 3011439 3955024 ...
## $ year : num 2019 2019 2019 2019 2019 ...
boxplot(baseEPH19y20$e01aimde~baseEPH19y20$year)
table(baseEPH19y20$P06)
##
## 1 6
## 9119 5940
baseEPH19y20$P06e=factor(baseEPH19y20$P06,labels=c("Hombres","Mujeres"))
table(baseEPH19y20$P06e,baseEPH19y20$year)
##
## 2019 2020
## Hombres 4663 4456
## Mujeres 3104 2836
summary(baseEPH19y20$e01aimde)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 414 803752 1800000 2228306 2684355 49675325
#install.packages("plyr")
library("plyr")
## Warning: package 'plyr' was built under R version 4.2.3
tablapromedios <- ddply(baseEPH19y20, .(year), summarize, mean = round(mean(e01aimde), 2), sd = round(sd(e01aimde), 2))
tablapromedios
## year mean sd
## 1 2019 2317649 2639534
## 2 2020 2133144 2485222