knitr::opts_chunk$set(echo = TRUE)

I will recover the dataset that I created with people working for Sciences… from a couple of weeks ago.

setwd("~/Dropbox/UDLAP/Cursos/2022 Primavera/Pensiones y SS/Presentaciones")
sdem<-read.csv("ENOEN_SDEMT121.csv")
sdem<-sdem[which(sdem$eda>=15 & sdem$eda<=97),]
sdem<-sdem[which(sdem$c_res!=2),]
table(sdem$clase1,sdem$sex)
##    
##         1     2
##   0   173   172
##   1 92483 62143
##   2 32493 77143
sdem<-sdem[which(sdem$clase1>0),]
peao<-sdem[which(sdem$clase2==1),]
coe1<-read.csv("ENOEN_COE1T121.csv")
peao<-merge(peao,coe1,by=c("cd_a","ent","con","v_sel","tipo","mes_cal",
                           "n_hog","h_mud","n_ren"))
remove(sdem,coe1)
peao$p3<-as.numeric(as.character(peao$p3))
peao$ciencias<-ifelse(peao$p3>=2200 & peao$p3<2300,1,0)
ciencias<-peao[which(peao$ciencias==1),]
remove(peao)
table(ciencias$sex)
## 
##    1    2 
## 2089  562
names(ciencias)
##   [1] "cd_a"        "ent"         "con"         "v_sel"       "tipo"       
##   [6] "mes_cal"     "n_hog"       "h_mud"       "n_ren"       "r_def.x"    
##  [11] "loc"         "mun"         "est"         "est_d_tri"   "est_d_men"  
##  [16] "ageb"        "t_loc_tri"   "t_loc_men"   "upm.x"       "d_sem.x"    
##  [21] "n_pro_viv.x" "n_ent.x"     "per.x"       "c_res"       "par_c"      
##  [26] "sex"         "eda.x"       "nac_dia"     "nac_mes"     "nac_anio"   
##  [31] "l_nac_c"     "cs_p12"      "cs_p13_1"    "cs_p13_2"    "cs_p14_c"   
##  [36] "cs_p15"      "cs_p16"      "cs_p17"      "n_hij"       "e_con"      
##  [41] "cs_ad_mot"   "cs_p20_des"  "cs_ad_des"   "cs_nr_mot"   "cs_p22_des" 
##  [46] "cs_nr_ori"   "ur.x"        "zona"        "salario"     "fac_tri.x"  
##  [51] "fac_men.x"   "clase1"      "clase2"      "clase3"      "pos_ocu"    
##  [56] "seg_soc"     "rama"        "c_ocu11c"    "ing7c"       "dur9c"      
##  [61] "emple7c"     "medica5c"    "buscar5c"    "rama_est1"   "rama_est2"  
##  [66] "dur_est"     "ambito1"     "ambito2"     "tue1"        "tue2"       
##  [71] "tue3"        "busqueda"    "d_ant_lab"   "d_cexp_est"  "dur_des"    
##  [76] "sub_o"       "s_clasifi"   "remune2c"    "pre_asa"     "tip_con"    
##  [81] "dispo"       "nodispo"     "c_inac5c"    "pnea_est"    "niv_ins"    
##  [86] "eda5c"       "eda7c"       "eda12c"      "eda19c"      "hij5c"      
##  [91] "domestico"   "anios_esc"   "hrsocup"     "ingocup"     "ing_x_hrs"  
##  [96] "tpg_p8a"     "tcco"        "cp_anoc"     "imssissste"  "ma48me1sm"  
## [101] "p14apoyos"   "scian"       "t_tra"       "emp_ppal"    "tue_ppal"   
## [106] "trans_ppal"  "mh_fil2"     "mh_col"      "sec_ins"     "ca.x"       
## [111] "r_def.y"     "upm.y"       "d_sem.y"     "n_pro_viv.y" "n_ent.y"    
## [116] "per.y"       "eda.y"       "n_inf"       "p1"          "p1a1"       
## [121] "p1a2"        "p1a3"        "p1b"         "p1c"         "p1d"        
## [126] "p1e"         "p2_1"        "p2_2"        "p2_3"        "p2_4"       
## [131] "p2_9"        "p2a_dia"     "p2a_sem"     "p2a_mes"     "p2a_anio"   
## [136] "p2b_dia"     "p2b_sem"     "p2b_mes"     "p2b_anio"    "p2b"        
## [141] "p2c"         "p2d1"        "p2d2"        "p2d3"        "p2d4"       
## [146] "p2d5"        "p2d6"        "p2d7"        "p2d8"        "p2d9"       
## [151] "p2d10"       "p2d11"       "p2d99"       "p2e"         "p2f"        
## [156] "p2g1"        "p2g2"        "p2h1"        "p2h2"        "p2h3"       
## [161] "p2h4"        "p2h9"        "p3"          "p3a"         "p3b"        
## [166] "p3c1"        "p3c2"        "p3c3"        "p3c4"        "p3c9"       
## [171] "p3d"         "p3e"         "p3f1"        "p3f2"        "p3g1_1"     
## [176] "p3g1_2"      "p3g2_1"      "p3g2_2"      "p3g3_1"      "p3g3_2"     
## [181] "p3g4_1"      "p3g4_2"      "p3g9"        "p3g_tot"     "p3h"        
## [186] "p3i"         "p3j"         "p3k1"        "p3k2"        "p3l1"       
## [191] "p3l2"        "p3l3"        "p3l4"        "p3l5"        "p3l9"       
## [196] "p3m1"        "p3m2"        "p3m3"        "p3m4"        "p3m5"       
## [201] "p3m6"        "p3m7"        "p3m8"        "p3m9"        "p3n"        
## [206] "p3o"         "p3p1"        "p3p2"        "p3q"         "p3r_anio"   
## [211] "p3r_mes"     "p3r"         "p3s"         "p3t_anio"    "p3t_mes"    
## [216] "p4"          "p4_1"        "p4_2"        "p4_3"        "p4a"        
## [221] "p4a_1"       "p4b"         "p4c"         "p4d1"        "p4d2"       
## [226] "p4d3"        "p4e"         "p4f"         "p4g"         "p4h"        
## [231] "p4i"         "p4i_1"       "p5"          "p5a"         "p5b"        
## [236] "p5c_hlu"     "p5c_mlu"     "p5c_hma"     "p5c_mma"     "p5c_hmi"    
## [241] "p5c_mmi"     "p5c_hju"     "p5c_mju"     "p5c_hvi"     "p5c_mvi"    
## [246] "p5c_hsa"     "p5c_msa"     "p5c_hdo"     "p5c_mdo"     "p5c_thrs"   
## [251] "p5c_tdia"    "p5d"         "p5e1"        "p5e_hlu"     "p5e_mlu"    
## [256] "p5e_hma"     "p5e_mma"     "p5e_hmi"     "p5e_mmi"     "p5e_hju"    
## [261] "p5e_mju"     "p5e_hvi"     "p5e_mvi"     "p5e_hsa"     "p5e_msa"    
## [266] "p5e_hdo"     "p5e_mdo"     "p5e_thrs"    "p5e_tdia"    "p5f"        
## [271] "p5g1"        "p5g2"        "p5g3"        "p5g4"        "p5g5"       
## [276] "p5g6"        "p5g7"        "p5g8"        "p5g9"        "p5g10"      
## [281] "p5g11"       "p5g12"       "p5g13"       "p5g14"       "p5g15"      
## [286] "p5g99"       "p5h"         "ur.y"        "ca.y"        "fac_tri.y"  
## [291] "fac_men.y"   "ciencias"
# keep paid employees
ciencias<-ciencias[which(ciencias$p3h==1),]
v<-c("sex","eda.x","e_con","pos_ocu","seg_soc","ing7c","ingocup","ing_x_hrs",
     "hrsocup","p3r_anio","p3r_mes","p3r","p3i")
ciencias<-ciencias[v]
ciencias<-ciencias[which(ciencias$Salary>0),]

I am also selecting just few variables for my analysis. I will rename some:

colnames(ciencias)<-c("Gender","Age","MaritalSt","Position","SS","IngG","Salary",
                      "HrSalary","TimeOcc","StartYr","StartMo","Current","Union")
write.csv(ciencias,"ciencias.csv")
remove(ciencias)