Importar base de datos

#file.choose()
bdcol <- read.csv("/Users/lizmanzano/Desktop/RETO ANALÍTICA/BDCOLRH.csv")
bdbaj <- read.csv("/Users/lizmanzano/Desktop/RETO ANALÍTICA/BaseD_Limpia RH_ Bajas .csv")

Entender base de datos

summary(bdcol)
##  numero_de_empleado nombre_completo         edad          genero         
##  Min.   :  1.00     Length:113         Min.   :18.00   Length:113        
##  1st Qu.: 31.00     Class :character   1st Qu.:26.00   Class :character  
##  Median : 63.00     Mode  :character   Median :34.00   Mode  :character  
##  Mean   : 75.86                        Mean   :36.07                     
##  3rd Qu.:127.00                        3rd Qu.:45.00                     
##  Max.   :169.00                        Max.   :73.00                     
##                                                                          
##  fecha_de_alta        antiguedad          BAJA        puesto         
##  Length:113         Min.   : 0.000   Min.   :3     Length:113        
##  Class :character   1st Qu.: 0.000   1st Qu.:3     Class :character  
##  Mode  :character   Median : 0.000   Median :3     Mode  :character  
##                     Mean   : 1.425   Mean   :3                       
##                     3rd Qu.: 2.000   3rd Qu.:3                       
##                     Max.   :12.000   Max.   :3                       
##                                      NA's   :100                     
##  departamento       mano_de_obra       salario_diario    colonia         
##  Length:113         Length:113         Min.   :144.4   Length:113        
##  Class :character   Class :character   1st Qu.:176.7   Class :character  
##  Mode  :character   Mode  :character   Median :180.7   Mode  :character  
##                                        Mean   :181.4                     
##                                        3rd Qu.:180.7                     
##                                        Max.   :441.4                     
##                                                                          
##   municipio        
##  Length:113        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
summary(bdbaj)
##     nombre               edad          genero          fecha_de_alta     
##  Length:237         Min.   : 0.00   Length:237         Length:237        
##  Class :character   1st Qu.:23.00   Class :character   Class :character  
##  Mode  :character   Median :29.00   Mode  :character   Mode  :character  
##                     Mean   :30.52                                        
##                     3rd Qu.:37.00                                        
##                     Max.   :61.00                                        
##  motivo_de_baja     dias_de_trabajo       baja           puesto_que_desempeña
##  Length:237         Min.   :   0.00   Length:237         Length:237          
##  Class :character   1st Qu.:   9.00   Class :character   Class :character    
##  Mode  :character   Median :  21.00   Mode  :character   Mode  :character    
##                     Mean   :  83.42                                          
##                     3rd Qu.:  49.00                                          
##                     Max.   :1966.00                                          
##   salario_imss     colonia           municipio            estado         
##  Min.   :144.4   Length:237         Length:237         Length:237        
##  1st Qu.:180.7   Class :character   Class :character   Class :character  
##  Median :180.7   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :178.6                                                           
##  3rd Qu.:180.7                                                           
##  Max.   :500.0                                                           
##  estado_civil      
##  Length:237        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

Cargar librerías

library(foreign)
library(dplyr)        # data manipulation 
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forcats)      # to work with categorical variables
library(ggplot2)      # data visualization
library(janitor)      # data exploration and cleaning 
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(Hmisc)        # several useful functions for data analysis 
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(dlookr)       # summaries and visualization of missing values NAs
## 
## Attaching package: 'dlookr'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following object is masked from 'package:base':
## 
##     transform
library(corrplot)     # correlation plots
## corrplot 0.92 loaded
library(jtools)       # presentation of regression analysis 
## 
## Attaching package: 'jtools'
## The following object is masked from 'package:Hmisc':
## 
##     %nin%
library(lmtest)       # diagnostic checks - linear regression analysis 
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)          # diagnostic checks - linear regression analysis
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(olsrr)        # diagnostic checks - linear regression analysis 
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
library(kableExtra)   # HTML table attributes
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows

Liberar estructura del conjunto de datos

str(bdcol)
## 'data.frame':    113 obs. of  13 variables:
##  $ numero_de_empleado: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ nombre_completo   : chr  "NICOLAS MARTINEZ DE LOERA" "MARIANA DE LEON MORENO" "JOSE LUIS HERNANDEZ CERVANTES" "MARIA CAZARES MORALES" ...
##  $ edad              : int  67 43 73 32 57 38 55 26 27 37 ...
##  $ genero            : chr  "MASCULINO" "FEMENINO" "MASCULINO" "FEMENINO" ...
##  $ fecha_de_alta     : chr  "01/07/2010" "01/07/2011" "22/11/2011" "30/01/2013" ...
##  $ antiguedad        : int  12 11 11 9 8 8 7 6 5 5 ...
##  $ BAJA              : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ puesto            : chr  "Supervisor de Máquin" "Supervisor de pegado" "Externo" "SUPERVISORA" ...
##  $ departamento      : chr  "Produccion Cartón MDL" "Produccion Cartón MDL" "Externo" "Produccion Cartón MC" ...
##  $ mano_de_obra      : chr  "Indirecto" "Indirecto" "Indirecto" "Indirecto" ...
##  $ salario_diario    : num  177 177 177 337 441 ...
##  $ colonia           : chr  "UNIDAD LABORAL" "SANTA TERESITA" "VILLAS DE HUINALA" "PUEBLO NUEVO" ...
##  $ municipio         : chr  "SAN NICOLAS DE LOS G" "APODACA" "APODACA" "APODACA" ...
str(bdbaj)
## 'data.frame':    237 obs. of  13 variables:
##  $ nombre              : chr  "MARIO VALDEZ ORTIZ" "ISABEL BARRIOS MENDEZ" "MARIA ELIZABETH GOMEZ HERNANDEZ" "ALONDRA ABIGAIL ESCARCIA GOMEZ" ...
##  $ edad                : int  32 36 23 21 29 46 29 31 50 19 ...
##  $ genero              : chr  "MASCULINO" "FEMENINO" "FEMENINO" "FEMENINO" ...
##  $ fecha_de_alta       : chr  "9/3/2020" "9/11/2021" "10/11/2021" "10/11/2021" ...
##  $ motivo_de_baja      : chr  "RENUNCIA VOLUNTARIA" "RENUNCIA VOLUNTARIA" "RENUNCIA VOLUNTARIA" "RENUNCIA VOLUNTARIA" ...
##  $ dias_de_trabajo     : int  628 60 59 59 51 37 37 31 18 224 ...
##  $ baja                : chr  "27/11/2021" "8/1/2022" "8/1/2022" "8/1/2022" ...
##  $ puesto_que_desempeña: chr  "DISEÑO" "AYUDANTE GENERAL" "AYUDANTE GENERAL" "AYUDANTE GENERAL" ...
##  $ salario_imss        : num  500 152 152 152 152 ...
##  $ colonia             : chr  "SAN NICOLAS DE LOS G" "COLINAS DEL AEROPÑUERTO" "PUEBLO NUEVO" "PUEBLO NUEVO" ...
##  $ municipio           : chr  "SAN NICOLAS DE LOS G" "PESQUERIA" "APODACA" "APODACA" ...
##  $ estado              : chr  "NUEVO LEÓN" "NUEVO LEÓN" "NUEVO LEÓN" "NUEVO LEÓN" ...
##  $ estado_civil        : chr  "SOLTERO" "UNIÓN LIBRE" "CASADO" "SOLTERO" ...

Seleccionar columnas y variables

bdcol1<-bdcol 
bdbaj1<-bdbaj 
#bdcol1<-bdcol %>% select(-one_of('numero_de_empleado','fecha_de_alta' ,'BAJA', 'edad'))

Renombrar variables

summary(bdcol1)
##  numero_de_empleado nombre_completo         edad          genero         
##  Min.   :  1.00     Length:113         Min.   :18.00   Length:113        
##  1st Qu.: 31.00     Class :character   1st Qu.:26.00   Class :character  
##  Median : 63.00     Mode  :character   Median :34.00   Mode  :character  
##  Mean   : 75.86                        Mean   :36.07                     
##  3rd Qu.:127.00                        3rd Qu.:45.00                     
##  Max.   :169.00                        Max.   :73.00                     
##                                                                          
##  fecha_de_alta        antiguedad          BAJA        puesto         
##  Length:113         Min.   : 0.000   Min.   :3     Length:113        
##  Class :character   1st Qu.: 0.000   1st Qu.:3     Class :character  
##  Mode  :character   Median : 0.000   Median :3     Mode  :character  
##                     Mean   : 1.425   Mean   :3                       
##                     3rd Qu.: 2.000   3rd Qu.:3                       
##                     Max.   :12.000   Max.   :3                       
##                                      NA's   :100                     
##  departamento       mano_de_obra       salario_diario    colonia         
##  Length:113         Length:113         Min.   :144.4   Length:113        
##  Class :character   Class :character   1st Qu.:176.7   Class :character  
##  Mode  :character   Mode  :character   Median :180.7   Mode  :character  
##                                        Mean   :181.4                     
##                                        3rd Qu.:180.7                     
##                                        Max.   :441.4                     
##                                                                          
##   municipio        
##  Length:113        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
names(bdcol1)<-c('Nom_Comp', 'Gen',  'Ant', 'Puesto', 'Dep', 'MDO', 'SalDiario', 'Col', 'Mun')
names(bdbaj1)<-c('Nom', 'Edad', 'Gen', 'Fecha_alta', 'MB', 'Días_trab', 'Baja', 'PuestDes', 'Sal_IMSS', 'Col', 'Mun', 'Estado', 'EstCiv')

Cambiar fecha a formato fecha

bdcol1$Fecha_alta<-as.Date(bdcol$fecha_de_alta,format="%y/%m/%d")
bdbaj1$Fecha_alta<-as.Date(bdbaj1$Fecha_alta,format="%y/%m/%d")
bdbaj1$Baja<-as.Date(bdbaj1$Baja,format="%y/%m/%d") 

Calcular la variable “años” en años para entender más características de colaboradores

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
edad<-trunc((bdcol1$Fecha_alta %--% bdcol1$Fecha_alta) / years(1))

Crear intervalo de tiempo

bdcol1$edad<-edad

Eliminar NA’s y sustituir con 0’s

sum(is.na(bdcol1))
## [1] 100
sum(is.na(bdbaj1))
## [1] 0
bdcol1[is.na(bdcol1)]<-0
bdbaj1[is.na(bdbaj1)]<-0


summary(bdcol1)
##     Nom_Comp          Gen                 Ant           Puesto         
##  Min.   :  1.00   Length:113         Min.   :18.00   Length:113        
##  1st Qu.: 31.00   Class :character   1st Qu.:26.00   Class :character  
##  Median : 63.00   Mode  :character   Median :34.00   Mode  :character  
##  Mean   : 75.86                      Mean   :36.07                     
##  3rd Qu.:127.00                      3rd Qu.:45.00                     
##  Max.   :169.00                      Max.   :73.00                     
##      Dep                 MDO           SalDiario          Col           
##  Length:113         Min.   : 0.000   Min.   :0.0000   Length:113        
##  Class :character   1st Qu.: 0.000   1st Qu.:0.0000   Class :character  
##  Mode  :character   Median : 0.000   Median :0.0000   Mode  :character  
##                     Mean   : 1.425   Mean   :0.3451                     
##                     3rd Qu.: 2.000   3rd Qu.:0.0000                     
##                     Max.   :12.000   Max.   :3.0000                     
##      Mun                 NA                  NA             NA           
##  Length:113         Length:113         Min.   :144.4   Length:113        
##  Class :character   Class :character   1st Qu.:176.7   Class :character  
##  Mode  :character   Mode  :character   Median :180.7   Mode  :character  
##                                        Mean   :181.4                     
##                                        3rd Qu.:180.7                     
##                                        Max.   :441.4                     
##       NA              Fecha_alta              edad  
##  Length:113         Min.   :2001-06-20   Min.   :0  
##  Class :character   1st Qu.:2006-07-20   1st Qu.:0  
##  Mode  :character   Median :2014-04-20   Median :0  
##                     Mean   :2014-06-16   Mean   :0  
##                     3rd Qu.:2022-11-20   3rd Qu.:0  
##                     Max.   :2030-07-20   Max.   :0
summary(bdbaj1)
##      Nom                 Edad           Gen              Fecha_alta        
##  Length:237         Min.   : 0.00   Length:237         Min.   :2001-02-20  
##  Class :character   1st Qu.:23.00   Class :character   1st Qu.:2010-06-20  
##  Mode  :character   Median :29.00   Mode  :character   Median :2015-06-20  
##                     Mean   :30.52                      Mean   :2015-11-07  
##                     3rd Qu.:37.00                      3rd Qu.:2021-12-20  
##                     Max.   :61.00                      Max.   :2031-05-20  
##       MB              Días_trab            Baja              PuestDes        
##  Length:237         Min.   :   0.00   Min.   :2001-02-20   Length:237        
##  Class :character   1st Qu.:   9.00   1st Qu.:2011-04-20   Class :character  
##  Mode  :character   Median :  21.00   Median :2017-08-20   Mode  :character  
##                     Mean   :  83.42   Mean   :2017-08-09                     
##                     3rd Qu.:  49.00   3rd Qu.:2025-04-20                     
##                     Max.   :1966.00   Max.   :2031-01-20                     
##     Sal_IMSS         Col                Mun               Estado         
##  Min.   :144.4   Length:237         Length:237         Length:237        
##  1st Qu.:180.7   Class :character   Class :character   Class :character  
##  Median :180.7   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :178.6                                                           
##  3rd Qu.:180.7                                                           
##  Max.   :500.0                                                           
##     EstCiv         
##  Length:237        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
bdcol1 <- na.omit(bdcol1)
bdbaj1 <- na.omit(bdbaj1)


summary(bdcol1)
##     Nom_Comp          Gen                 Ant           Puesto         
##  Min.   :  1.00   Length:113         Min.   :18.00   Length:113        
##  1st Qu.: 31.00   Class :character   1st Qu.:26.00   Class :character  
##  Median : 63.00   Mode  :character   Median :34.00   Mode  :character  
##  Mean   : 75.86                      Mean   :36.07                     
##  3rd Qu.:127.00                      3rd Qu.:45.00                     
##  Max.   :169.00                      Max.   :73.00                     
##      Dep                 MDO           SalDiario          Col           
##  Length:113         Min.   : 0.000   Min.   :0.0000   Length:113        
##  Class :character   1st Qu.: 0.000   1st Qu.:0.0000   Class :character  
##  Mode  :character   Median : 0.000   Median :0.0000   Mode  :character  
##                     Mean   : 1.425   Mean   :0.3451                     
##                     3rd Qu.: 2.000   3rd Qu.:0.0000                     
##                     Max.   :12.000   Max.   :3.0000                     
##      Mun                 NA                  NA             NA           
##  Length:113         Length:113         Min.   :144.4   Length:113        
##  Class :character   Class :character   1st Qu.:176.7   Class :character  
##  Mode  :character   Mode  :character   Median :180.7   Mode  :character  
##                                        Mean   :181.4                     
##                                        3rd Qu.:180.7                     
##                                        Max.   :441.4                     
##       NA              Fecha_alta              edad  
##  Length:113         Min.   :2001-06-20   Min.   :0  
##  Class :character   1st Qu.:2006-07-20   1st Qu.:0  
##  Mode  :character   Median :2014-04-20   Median :0  
##                     Mean   :2014-06-16   Mean   :0  
##                     3rd Qu.:2022-11-20   3rd Qu.:0  
##                     Max.   :2030-07-20   Max.   :0
summary(bdbaj1)  
##      Nom                 Edad           Gen              Fecha_alta        
##  Length:237         Min.   : 0.00   Length:237         Min.   :2001-02-20  
##  Class :character   1st Qu.:23.00   Class :character   1st Qu.:2010-06-20  
##  Mode  :character   Median :29.00   Mode  :character   Median :2015-06-20  
##                     Mean   :30.52                      Mean   :2015-11-07  
##                     3rd Qu.:37.00                      3rd Qu.:2021-12-20  
##                     Max.   :61.00                      Max.   :2031-05-20  
##       MB              Días_trab            Baja              PuestDes        
##  Length:237         Min.   :   0.00   Min.   :2001-02-20   Length:237        
##  Class :character   1st Qu.:   9.00   1st Qu.:2011-04-20   Class :character  
##  Mode  :character   Median :  21.00   Median :2017-08-20   Mode  :character  
##                     Mean   :  83.42   Mean   :2017-08-09                     
##                     3rd Qu.:  49.00   3rd Qu.:2025-04-20                     
##                     Max.   :1966.00   Max.   :2031-01-20                     
##     Sal_IMSS         Col                Mun               Estado         
##  Min.   :144.4   Length:237         Length:237         Length:237        
##  1st Qu.:180.7   Class :character   Class :character   Class :character  
##  Median :180.7   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :178.6                                                           
##  3rd Qu.:180.7                                                           
##  Max.   :500.0                                                           
##     EstCiv         
##  Length:237        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
str(bdcol1)
## 'data.frame':    113 obs. of  15 variables:
##  $ Nom_Comp  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Gen       : chr  "NICOLAS MARTINEZ DE LOERA" "MARIANA DE LEON MORENO" "JOSE LUIS HERNANDEZ CERVANTES" "MARIA CAZARES MORALES" ...
##  $ Ant       : int  67 43 73 32 57 38 55 26 27 37 ...
##  $ Puesto    : chr  "MASCULINO" "FEMENINO" "MASCULINO" "FEMENINO" ...
##  $ Dep       : chr  "01/07/2010" "01/07/2011" "22/11/2011" "30/01/2013" ...
##  $ MDO       : int  12 11 11 9 8 8 7 6 5 5 ...
##  $ SalDiario : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Col       : chr  "Supervisor de Máquin" "Supervisor de pegado" "Externo" "SUPERVISORA" ...
##  $ Mun       : chr  "Produccion Cartón MDL" "Produccion Cartón MDL" "Externo" "Produccion Cartón MC" ...
##  $ NA        : chr  "Indirecto" "Indirecto" "Indirecto" "Indirecto" ...
##  $ NA        : num  177 177 177 337 441 ...
##  $ NA        : chr  "UNIDAD LABORAL" "SANTA TERESITA" "VILLAS DE HUINALA" "PUEBLO NUEVO" ...
##  $ NA        : chr  "SAN NICOLAS DE LOS G" "APODACA" "APODACA" "APODACA" ...
##  $ Fecha_alta: Date, format: "2001-07-20" "2001-07-20" ...
##  $ edad      : num  0 0 0 0 0 0 0 0 0 0 ...
str(bdbaj1)
## 'data.frame':    237 obs. of  13 variables:
##  $ Nom       : chr  "MARIO VALDEZ ORTIZ" "ISABEL BARRIOS MENDEZ" "MARIA ELIZABETH GOMEZ HERNANDEZ" "ALONDRA ABIGAIL ESCARCIA GOMEZ" ...
##  $ Edad      : int  32 36 23 21 29 46 29 31 50 19 ...
##  $ Gen       : chr  "MASCULINO" "FEMENINO" "FEMENINO" "FEMENINO" ...
##  $ Fecha_alta: Date, format: "2009-03-20" "2009-11-20" ...
##  $ MB        : chr  "RENUNCIA VOLUNTARIA" "RENUNCIA VOLUNTARIA" "RENUNCIA VOLUNTARIA" "RENUNCIA VOLUNTARIA" ...
##  $ Días_trab : int  628 60 59 59 51 37 37 31 18 224 ...
##  $ Baja      : Date, format: "2027-11-20" "2008-01-20" ...
##  $ PuestDes  : chr  "DISEÑO" "AYUDANTE GENERAL" "AYUDANTE GENERAL" "AYUDANTE GENERAL" ...
##  $ Sal_IMSS  : num  500 152 152 152 152 ...
##  $ Col       : chr  "SAN NICOLAS DE LOS G" "COLINAS DEL AEROPÑUERTO" "PUEBLO NUEVO" "PUEBLO NUEVO" ...
##  $ Mun       : chr  "SAN NICOLAS DE LOS G" "PESQUERIA" "APODACA" "APODACA" ...
##  $ Estado    : chr  "NUEVO LEÓN" "NUEVO LEÓN" "NUEVO LEÓN" "NUEVO LEÓN" ...
##  $ EstCiv    : chr  "SOLTERO" "UNIÓN LIBRE" "CASADO" "SOLTERO" ...

Convertir variables

bdbaj1$Gen<-as.factor(bdbaj1$Gen)
bdcol1$Gen<-as.factor(bdcol1$Gen)

bdbaj1$PuestDes<-as.factor(bdbaj1$PuestDes)
bdcol1$Puesto<-as.factor(bdcol1$Puesto)

bdcol1$Dep<-as.factor(bdcol1$Dep)

bdbaj1$Mun<-as.factor(bdbaj1$Mun)
bdcol1$Mun<-as.factor(bdcol1$Mun)

bdbaj1$Estado<-as.factor(bdbaj1$Estado)

bdbaj1$EstCiv<-as.factor(bdbaj1$EstCiv)

Verificar estructura

str(bdbaj1)
## 'data.frame':    237 obs. of  13 variables:
##  $ Nom       : chr  "MARIO VALDEZ ORTIZ" "ISABEL BARRIOS MENDEZ" "MARIA ELIZABETH GOMEZ HERNANDEZ" "ALONDRA ABIGAIL ESCARCIA GOMEZ" ...
##  $ Edad      : int  32 36 23 21 29 46 29 31 50 19 ...
##  $ Gen       : Factor w/ 2 levels "FEMENINO","MASCULINO": 2 1 1 1 1 1 1 2 2 2 ...
##  $ Fecha_alta: Date, format: "2009-03-20" "2009-11-20" ...
##  $ MB        : chr  "RENUNCIA VOLUNTARIA" "RENUNCIA VOLUNTARIA" "RENUNCIA VOLUNTARIA" "RENUNCIA VOLUNTARIA" ...
##  $ Días_trab : int  628 60 59 59 51 37 37 31 18 224 ...
##  $ Baja      : Date, format: "2027-11-20" "2008-01-20" ...
##  $ PuestDes  : Factor w/ 31 levels "ANALISTA DE NOMINAS /AUX DE R.H.",..: 15 9 9 9 9 9 9 9 9 4 ...
##  $ Sal_IMSS  : num  500 152 152 152 152 ...
##  $ Col       : chr  "SAN NICOLAS DE LOS G" "COLINAS DEL AEROPÑUERTO" "PUEBLO NUEVO" "PUEBLO NUEVO" ...
##  $ Mun       : Factor w/ 13 levels "APODACA","CADEREYTA",..: 10 7 1 1 1 1 1 5 4 1 ...
##  $ Estado    : Factor w/ 3 levels "COAHUILA","NUEVO LEÓN",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ EstCiv    : Factor w/ 5 levels "CASADO","DIVORCIADO",..: 3 5 1 3 3 3 5 5 3 3 ...
str(bdcol1)
## 'data.frame':    113 obs. of  15 variables:
##  $ Nom_Comp  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Gen       : Factor w/ 113 levels "ADELAIDA MENDOZA NAVARRO",..: 92 78 55 71 111 42 11 96 43 110 ...
##  $ Ant       : int  67 43 73 32 57 38 55 26 27 37 ...
##  $ Puesto    : Factor w/ 2 levels "FEMENINO","MASCULINO": 2 1 2 1 1 2 1 2 2 1 ...
##  $ Dep       : Factor w/ 93 levels "01/06/2022","01/07/2010",..: 2 3 69 91 18 10 25 75 40 63 ...
##  $ MDO       : int  12 11 11 9 8 8 7 6 5 5 ...
##  $ SalDiario : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Col       : chr  "Supervisor de Máquin" "Supervisor de pegado" "Externo" "SUPERVISORA" ...
##  $ Mun       : Factor w/ 22 levels "","Ay.flexo",..: 18 18 13 17 8 4 8 19 4 10 ...
##  $ NA        : chr  "Indirecto" "Indirecto" "Indirecto" "Indirecto" ...
##  $ NA        : num  177 177 177 337 441 ...
##  $ NA        : chr  "UNIDAD LABORAL" "SANTA TERESITA" "VILLAS DE HUINALA" "PUEBLO NUEVO" ...
##  $ NA        : chr  "SAN NICOLAS DE LOS G" "APODACA" "APODACA" "APODACA" ...
##  $ Fecha_alta: Date, format: "2001-07-20" "2001-07-20" ...
##  $ edad      : num  0 0 0 0 0 0 0 0 0 0 ...
summary(bdbaj1)
##      Nom                 Edad              Gen        Fecha_alta        
##  Length:237         Min.   : 0.00   FEMENINO :140   Min.   :2001-02-20  
##  Class :character   1st Qu.:23.00   MASCULINO: 97   1st Qu.:2010-06-20  
##  Mode  :character   Median :29.00                   Median :2015-06-20  
##                     Mean   :30.52                   Mean   :2015-11-07  
##                     3rd Qu.:37.00                   3rd Qu.:2021-12-20  
##                     Max.   :61.00                   Max.   :2031-05-20  
##                                                                         
##       MB              Días_trab            Baja           
##  Length:237         Min.   :   0.00   Min.   :2001-02-20  
##  Class :character   1st Qu.:   9.00   1st Qu.:2011-04-20  
##  Mode  :character   Median :  21.00   Median :2017-08-20  
##                     Mean   :  83.42   Mean   :2017-08-09  
##                     3rd Qu.:  49.00   3rd Qu.:2025-04-20  
##                     Max.   :1966.00   Max.   :2031-01-20  
##                                                           
##                   PuestDes      Sal_IMSS         Col           
##  AYUDANTE GENERAL     :173   Min.   :144.4   Length:237        
##  SOLDADOR             : 11   1st Qu.:180.7   Class :character  
##  COSTURERA            : 10   Median :180.7   Mode  :character  
##  MONTACARGUISTA       :  5   Mean   :178.6                     
##  AY. GENERAL          :  4   3rd Qu.:180.7                     
##  AUXILIAR DE EMBARQUES:  3   Max.   :500.0                     
##  (Other)              : 31                                     
##                        Mun             Estado            EstCiv   
##  APODACA                 :162   COAHUILA  :  9   CASADO     : 64  
##  PESQUERIA               : 32   NUEVO LEÓN:227   DIVORCIADO :  3  
##  JUAREZ                  : 15   SALTILLO  :  1   SOLTERO    :110  
##  GUADALUPE               : 10                    Unión libre:  1  
##  RAMOS ARIZPE            :  8                    UNIÓN LIBRE: 59  
##  SAN NICOLAS DE LOS GARZA:  3                                     
##  (Other)                 :  7
summary(bdcol1)
##     Nom_Comp                               Gen           Ant       
##  Min.   :  1.00   ADELAIDA MENDOZA NAVARRO   :  1   Min.   :18.00  
##  1st Qu.: 31.00   ADRIANA BADILLO LOZANO     :  1   1st Qu.:26.00  
##  Median : 63.00   ADRIANA IRENE ZAPATA GARCIA:  1   Median :34.00  
##  Mean   : 75.86   ADRIANA PADILLO CASTILLO   :  1   Mean   :36.07  
##  3rd Qu.:127.00   ALFREDO HERNANDEZ PASCUAL  :  1   3rd Qu.:45.00  
##  Max.   :169.00   ALMA DELIA LARA CAMPOS     :  1   Max.   :73.00  
##                   (Other)                    :107                  
##        Puesto           Dep          MDO           SalDiario     
##  FEMENINO :61   14/06/2022: 4   Min.   : 0.000   Min.   :0.0000  
##  MASCULINO:52   03/08/2022: 3   1st Qu.: 0.000   1st Qu.:0.0000  
##                 23/08/2022: 3   Median : 0.000   Median :0.0000  
##                 01/06/2022: 2   Mean   : 1.425   Mean   :0.3451  
##                 02/08/2022: 2   3rd Qu.: 2.000   3rd Qu.:0.0000  
##                 03/11/2020: 2   Max.   :12.000   Max.   :3.0000  
##                 (Other)   :97                                    
##      Col                               Mun          NA           
##  Length:113                              :40   Length:113        
##  Class :character   Producción Retorn    :10   Class :character  
##  Mode  :character   Costura              : 7   Mode  :character  
##                     Produccion Cartón MDL: 7                     
##                     Stabilus             : 7                     
##                     Cedis                : 6                     
##                     (Other)              :36                     
##        NA             NA                 NA              Fecha_alta        
##  Min.   :144.4   Length:113         Length:113         Min.   :2001-06-20  
##  1st Qu.:176.7   Class :character   Class :character   1st Qu.:2006-07-20  
##  Median :180.7   Mode  :character   Mode  :character   Median :2014-04-20  
##  Mean   :181.4                                         Mean   :2014-06-16  
##  3rd Qu.:180.7                                         3rd Qu.:2022-11-20  
##  Max.   :441.4                                         Max.   :2030-07-20  
##                                                                            
##       edad  
##  Min.   :0  
##  1st Qu.:0  
##  Median :0  
##  Mean   :0  
##  3rd Qu.:0  
##  Max.   :0  
## 

Verificar visualzación de datos

tapply(bdbaj1$Sal_IMSS,
       list(bdbaj1$Gen,bdbaj1$EstCiv), mean)
##             CASADO DIVORCIADO  SOLTERO Unión libre UNIÓN LIBRE
## FEMENINO  176.6727     180.68 178.5836          NA    175.7823
## MASCULINO 180.2840     180.68 182.6171      176.72    176.5513

Reemplazar “outlier” de salario diario con la mediana

bdbaj1$Sal_IMSS<-replace(bdbaj1$Sal_IMSS,bdbaj1$Sal_IMSS>1000000,181)

Gráficos

tapply(bdbaj1$Sal_IMSS,
       list(bdbaj1$Gen,bdbaj1$EstCiv), mean)
##             CASADO DIVORCIADO  SOLTERO Unión libre UNIÓN LIBRE
## FEMENINO  176.6727     180.68 178.5836          NA    175.7823
## MASCULINO 180.2840     180.68 182.6171      176.72    176.5513
hist(bdbaj1$Edad, freq=TRUE, col='Darkblue', main="Histograma Edad",xlab="Edad en Años")

Dentro de este histograma se analizó la edad y la frecuencia que tiene. En este caso se puede observar que la edad que se ve mas presente dentro de la empresa es la de 20-30 años.

ggplot(bdbaj1, aes(Gen,Días_trab,fill=Gen)) +                                    
  geom_bar(stat = "identity") +
  scale_fill_brewer(palette = "Set3") + ggtitle(" Días trabajadospor Genero")

En este gráfico se puede observar como los hombres cuentan con más días trabajados que las mujeres al momento de ser dados de baja de la empresa.

ggplot(bdbaj1, aes(x=Gen, y=Sal_IMSS, fill=Gen)) + 
  geom_bar(stat="identity") + 
  facet_grid(~EstCiv) + scale_fill_brewer(palette = "Set3")

Dentro de este gráfico se puede observar como en todos los rubros las mujeres ganan mas de su salario del IMSS que los hombres.