#Contexto La clave del Exito en cualquier organizacion es atraer y retener a los mejores talentos.

Education 1 ‘Below College’ 2 ‘College’ 3 ‘Bachelor’ 4 ‘Master’ 5 ‘Doctor’

EnvironmentSatisfaction 1 ‘Low’ 2 ‘Medium’ 3 ‘High’ 4 ‘Very High’

JobInvolvement 1 ‘Low’ 2 ‘Medium’ 3 ‘High’ 4 ‘Very High’

JobSatisfaction 1 ‘Low’ 2 ‘Medium’ 3 ‘High’ 4 ‘Very High’

PerformanceRating 1 ‘Low’ 2 ‘Good’ 3 ‘Excellent’ 4 ‘Outstanding’

RelationshipSatisfaction 1 ‘Low’ 2 ‘Medium’ 3 ‘High’ 4 ‘Very High’

WorkLifeBalance 1 ‘Bad’ 2 ‘Good’ 3 ‘Better’ 4 ‘Best’

#Definir directorio
setwd("G:/TRABAJO/DOCENCIA/KONRAND LORENZ/HR_datasets")
#Cargar las bases de datos de interes
emp_att=read.csv2("HR-Employee-Attrition.csv", header = TRUE, sep = ";", dec=".")
head(emp_att,5)
##   ï..CC Age    BusinessTravel DailyRate           Department DistanceFromHome
## 1  7001  41     Travel_Rarely      1102                Sales               10
## 2  7002  49 Travel_Frequently       279 Research_Development               11
## 3  7003  26     Travel_Rarely      1050 Research_Development                5
## 4  7004  33 Travel_Frequently      1392 Research_Development               10
## 5  7005  27     Travel_Rarely       591 Research_Development                9
##   Education EducationField EnvironmentSatisfaction Gender HourlyRate
## 1         2  Life_Sciences                       2 Female         48
## 2         1  Life_Sciences                       3   Male         68
## 3         2          Other                       4   Male         96
## 4         4  Life_Sciences                       4 Female         33
## 5         1        Medical                       1   Male         35
##   JobInvolvement JobLevel               JobRole JobSatisfaction MaritalStatus
## 1              3        2       Sales_Executive               4        Single
## 2              2        2    Research_Scientist               2       Married
## 3              2        1 Laboratory_Technician               3        Single
## 4              3        1    Research_Scientist               3       Married
## 5              3        1 Laboratory_Technician               2       Married
##   MonthlyIncome MonthlyRate NumCompaniesWorked OverTime PercentSalaryHike
## 1          5993       19479                  8        1                11
## 2          5130       24907                  1        0                23
## 3          2090        2396                  6        1                15
## 4          2909       23159                  1        1                11
## 5          3468       16632                  9        0                12
##   PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## 1                 3                        1            80                0
## 2                 4                        4            80                1
## 3                 3                        2            80                0
## 4                 3                        3            80                0
## 5                 3                        4            80                1
##   TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## 1                 8                     0               1              6
## 2                10                     3               3             10
## 3                 7                     3               3              0
## 4                 8                     3               3              8
## 5                 6                     3               3              2
##   YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager Attrition
## 1                  4                       0                    5         1
## 2                  7                       1                    7         0
## 3                  0                       0                    0         1
## 4                  7                       3                    0         0
## 5                  2                       2                    2         0
#Cargar librerias
library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.1.2
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.1.2
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoInvestigate)
## Warning: package 'FactoInvestigate' was built under R version 4.1.2
library(ggdendro)
## Warning: package 'ggdendro' was built under R version 4.1.2
library(ggplot2)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
library(fastDummies)
library(readxl)
#verificar valores nulos
sapply(emp_att, function(x) sum(is.na(x)))
##                    ï..CC                      Age           BusinessTravel 
##                        0                        0                        0 
##                DailyRate               Department         DistanceFromHome 
##                        0                        0                        0 
##                Education           EducationField  EnvironmentSatisfaction 
##                        0                        0                        0 
##                   Gender               HourlyRate           JobInvolvement 
##                        0                        0                        0 
##                 JobLevel                  JobRole          JobSatisfaction 
##                        0                        0                        0 
##            MaritalStatus            MonthlyIncome              MonthlyRate 
##                        0                        0                        0 
##       NumCompaniesWorked                 OverTime        PercentSalaryHike 
##                        0                        0                        0 
##        PerformanceRating RelationshipSatisfaction            StandardHours 
##                        0                        0                        0 
##         StockOptionLevel        TotalWorkingYears    TrainingTimesLastYear 
##                        0                        0                        0 
##          WorkLifeBalance           YearsAtCompany       YearsInCurrentRole 
##                        0                        0                        0 
##  YearsSinceLastPromotion     YearsWithCurrManager                Attrition 
##                        0                        0                        0
#Exploraciones iniciales sobre variables continuas
par(mfrow=c(3,3))
hist(emp_att$Age, main='Edad')
hist(emp_att$DistanceFromHome, main='Distancia hasta la casa')
hist(emp_att$MonthlyIncome, main='Ingreso Mensual')
hist(emp_att$YearsAtCompany ,main='Anos en la compania')
hist(emp_att$TotalWorkingYears ,main='Anos Trabajados')
hist(emp_att$YearsInCurrentRole ,main='Anos en el actual rol')

summary(emp_att$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   27.00   30.50   33.79   39.00   60.00
summary(emp_att$TotalWorkingYears)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    6.00   10.00   11.28   15.00   40.00
par(mfrow=c(1,2))
boxplot(emp_att$Age, main='Edad')
boxplot(emp_att$TotalWorkingYears ,main='Anos Trabajados')

#realizamos conteos de variables cualitativas (categoricas)
q1=sqldf('SELECT BusinessTravel, COUNT(BusinessTravel) as CONTEO
          FROM emp_att
          GROUP BY BusinessTravel
         ')
q1
##      BusinessTravel CONTEO
## 1        Non_Travel    150
## 2 Travel_Frequently    277
## 3     Travel_Rarely   1043
q2=sqldf('SELECT JobInvolvement, COUNT(JobInvolvement) as CONTEO
          FROM emp_att
          GROUP BY JobInvolvement
         ')
q2
##   JobInvolvement CONTEO
## 1              1     83
## 2              2    375
## 3              3    868
## 4              4    144
#JobInvolvement
#1 'Low' 2 'Medium' 3 'High' 4 'Very High'

Se debe convertir las variables Cualitativas en dummies.

emp_att = dummy_cols(emp_att,  select_columns = c("BusinessTravel","Department","EducationField","Gender","JobRole","MaritalStatus"))
colnames(emp_att)
##  [1] "ï..CC"                             "Age"                              
##  [3] "BusinessTravel"                    "DailyRate"                        
##  [5] "Department"                        "DistanceFromHome"                 
##  [7] "Education"                         "EducationField"                   
##  [9] "EnvironmentSatisfaction"           "Gender"                           
## [11] "HourlyRate"                        "JobInvolvement"                   
## [13] "JobLevel"                          "JobRole"                          
## [15] "JobSatisfaction"                   "MaritalStatus"                    
## [17] "MonthlyIncome"                     "MonthlyRate"                      
## [19] "NumCompaniesWorked"                "OverTime"                         
## [21] "PercentSalaryHike"                 "PerformanceRating"                
## [23] "RelationshipSatisfaction"          "StandardHours"                    
## [25] "StockOptionLevel"                  "TotalWorkingYears"                
## [27] "TrainingTimesLastYear"             "WorkLifeBalance"                  
## [29] "YearsAtCompany"                    "YearsInCurrentRole"               
## [31] "YearsSinceLastPromotion"           "YearsWithCurrManager"             
## [33] "Attrition"                         "BusinessTravel_Non_Travel"        
## [35] "BusinessTravel_Travel_Frequently"  "BusinessTravel_Travel_Rarely"     
## [37] "Department_Human_Resources"        "Department_Research_Development"  
## [39] "Department_Sales"                  "EducationField_Human_Resources"   
## [41] "EducationField_Life_Sciences"      "EducationField_Marketing"         
## [43] "EducationField_Medical"            "EducationField_Other"             
## [45] "EducationField_Technical_Degree"   "Gender_Female"                    
## [47] "Gender_Male"                       "JobRole_Healthcare_Representative"
## [49] "JobRole_Human_Resources"           "JobRole_Laboratory_Technician"    
## [51] "JobRole_Manager"                   "JobRole_Manufacturing_Director"   
## [53] "JobRole_Research_Director"         "JobRole_Research_Scientist"       
## [55] "JobRole_Sales_Executive"           "JobRole_Sales_Representative"     
## [57] "MaritalStatus_Divorced"            "MaritalStatus_Married"            
## [59] "MaritalStatus_Single"
head(emp_att)
##   ï..CC Age    BusinessTravel DailyRate           Department DistanceFromHome
## 1  7001  41     Travel_Rarely      1102                Sales               10
## 2  7002  49 Travel_Frequently       279 Research_Development               11
## 3  7003  26     Travel_Rarely      1050 Research_Development                5
## 4  7004  33 Travel_Frequently      1392 Research_Development               10
## 5  7005  27     Travel_Rarely       591 Research_Development                9
## 6  7006  28 Travel_Frequently      1003 Research_Development                5
##   Education EducationField EnvironmentSatisfaction Gender HourlyRate
## 1         2  Life_Sciences                       2 Female         48
## 2         1  Life_Sciences                       3   Male         68
## 3         2          Other                       4   Male         96
## 4         4  Life_Sciences                       4 Female         33
## 5         1        Medical                       1   Male         35
## 6         2  Life_Sciences                       4   Male        144
##   JobInvolvement JobLevel               JobRole JobSatisfaction MaritalStatus
## 1              3        2       Sales_Executive               4        Single
## 2              2        2    Research_Scientist               2       Married
## 3              2        1 Laboratory_Technician               3        Single
## 4              3        1    Research_Scientist               3       Married
## 5              3        1 Laboratory_Technician               2       Married
## 6              3        1 Laboratory_Technician               4        Single
##   MonthlyIncome MonthlyRate NumCompaniesWorked OverTime PercentSalaryHike
## 1          5993       19479                  8        1                11
## 2          5130       24907                  1        0                23
## 3          2090        2396                  6        1                15
## 4          2909       23159                  1        1                11
## 5          3468       16632                  9        0                12
## 6          3068       11864                  0        0                13
##   PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## 1                 3                        1            80                0
## 2                 4                        4            80                1
## 3                 3                        2            80                0
## 4                 3                        3            80                0
## 5                 3                        4            80                1
## 6                 3                        3            80                0
##   TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## 1                 8                     0               1              6
## 2                10                     3               3             10
## 3                 7                     3               3              0
## 4                 8                     3               3              8
## 5                 6                     3               3              2
## 6                 8                     2               2              7
##   YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager Attrition
## 1                  4                       0                    5         1
## 2                  7                       1                    7         0
## 3                  0                       0                    0         1
## 4                  7                       3                    0         0
## 5                  2                       2                    2         0
## 6                  7                       3                    6         0
##   BusinessTravel_Non_Travel BusinessTravel_Travel_Frequently
## 1                         0                                0
## 2                         0                                1
## 3                         0                                0
## 4                         0                                1
## 5                         0                                0
## 6                         0                                1
##   BusinessTravel_Travel_Rarely Department_Human_Resources
## 1                            1                          0
## 2                            0                          0
## 3                            1                          0
## 4                            0                          0
## 5                            1                          0
## 6                            0                          0
##   Department_Research_Development Department_Sales
## 1                               0                1
## 2                               1                0
## 3                               1                0
## 4                               1                0
## 5                               1                0
## 6                               1                0
##   EducationField_Human_Resources EducationField_Life_Sciences
## 1                              0                            1
## 2                              0                            1
## 3                              0                            0
## 4                              0                            1
## 5                              0                            0
## 6                              0                            1
##   EducationField_Marketing EducationField_Medical EducationField_Other
## 1                        0                      0                    0
## 2                        0                      0                    0
## 3                        0                      0                    1
## 4                        0                      0                    0
## 5                        0                      1                    0
## 6                        0                      0                    0
##   EducationField_Technical_Degree Gender_Female Gender_Male
## 1                               0             1           0
## 2                               0             0           1
## 3                               0             0           1
## 4                               0             1           0
## 5                               0             0           1
## 6                               0             0           1
##   JobRole_Healthcare_Representative JobRole_Human_Resources
## 1                                 0                       0
## 2                                 0                       0
## 3                                 0                       0
## 4                                 0                       0
## 5                                 0                       0
## 6                                 0                       0
##   JobRole_Laboratory_Technician JobRole_Manager JobRole_Manufacturing_Director
## 1                             0               0                              0
## 2                             0               0                              0
## 3                             1               0                              0
## 4                             0               0                              0
## 5                             1               0                              0
## 6                             1               0                              0
##   JobRole_Research_Director JobRole_Research_Scientist JobRole_Sales_Executive
## 1                         0                          0                       1
## 2                         0                          1                       0
## 3                         0                          0                       0
## 4                         0                          1                       0
## 5                         0                          0                       0
## 6                         0                          0                       0
##   JobRole_Sales_Representative MaritalStatus_Divorced MaritalStatus_Married
## 1                            0                      0                     0
## 2                            0                      0                     1
## 3                            0                      0                     0
## 4                            0                      0                     1
## 5                            0                      0                     1
## 6                            0                      0                     0
##   MaritalStatus_Single
## 1                    1
## 2                    0
## 3                    1
## 4                    0
## 5                    0
## 6                    1

Consolidamos la data para el analisis de clasificacion

#Seleccionamos todas las variables continuas, menos attrition (la usaremos luego para clasificar)

data_hr=sqldf('SELECT Age,DailyRate,DistanceFromHome,Education,EnvironmentSatisfaction,
                      HourlyRate,JobInvolvement,JobLevel,JobSatisfaction,MonthlyIncome,
                      MonthlyRate,NumCompaniesWorked,OverTime,PercentSalaryHike,
                      PerformanceRating,StockOptionLevel,TotalWorkingYears,
                      TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,
                      YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,
                      BusinessTravel_Travel_Frequently,BusinessTravel_Travel_Rarely,
                      Department_Human_Resources,Department_Research_Development,
                      EducationField_Human_Resources,EducationField_Life_Sciences,
                      EducationField_Marketing,EducationField_Medical,EducationField_Other,
                      Gender_Female,JobRole_Healthcare_Representative,JobRole_Human_Resources,
                      JobRole_Laboratory_Technician,JobRole_Manager,JobRole_Manufacturing_Director,
                      JobRole_Research_Director,JobRole_Research_Scientist,JobRole_Sales_Representative,
                      MaritalStatus_Married,MaritalStatus_Single
              FROM emp_att
              ')
head(data_hr,3)
##   Age DailyRate DistanceFromHome Education EnvironmentSatisfaction HourlyRate
## 1  41      1102               10         2                       2         48
## 2  49       279               11         1                       3         68
## 3  26      1050                5         2                       4         96
##   JobInvolvement JobLevel JobSatisfaction MonthlyIncome MonthlyRate
## 1              3        2               4          5993       19479
## 2              2        2               2          5130       24907
## 3              2        1               3          2090        2396
##   NumCompaniesWorked OverTime PercentSalaryHike PerformanceRating
## 1                  8        1                11                 3
## 2                  1        0                23                 4
## 3                  6        1                15                 3
##   StockOptionLevel TotalWorkingYears TrainingTimesLastYear WorkLifeBalance
## 1                0                 8                     0               1
## 2                1                10                     3               3
## 3                0                 7                     3               3
##   YearsAtCompany YearsInCurrentRole YearsSinceLastPromotion
## 1              6                  4                       0
## 2             10                  7                       1
## 3              0                  0                       0
##   YearsWithCurrManager BusinessTravel_Travel_Frequently
## 1                    5                                0
## 2                    7                                1
## 3                    0                                0
##   BusinessTravel_Travel_Rarely Department_Human_Resources
## 1                            1                          0
## 2                            0                          0
## 3                            1                          0
##   Department_Research_Development EducationField_Human_Resources
## 1                               0                              0
## 2                               1                              0
## 3                               1                              0
##   EducationField_Life_Sciences EducationField_Marketing EducationField_Medical
## 1                            1                        0                      0
## 2                            1                        0                      0
## 3                            0                        0                      0
##   EducationField_Other Gender_Female JobRole_Healthcare_Representative
## 1                    0             1                                 0
## 2                    0             0                                 0
## 3                    1             0                                 0
##   JobRole_Human_Resources JobRole_Laboratory_Technician JobRole_Manager
## 1                       0                             0               0
## 2                       0                             0               0
## 3                       0                             1               0
##   JobRole_Manufacturing_Director JobRole_Research_Director
## 1                              0                         0
## 2                              0                         0
## 3                              0                         0
##   JobRole_Research_Scientist JobRole_Sales_Representative MaritalStatus_Married
## 1                          0                            0                     0
## 2                          1                            0                     1
## 3                          0                            0                     0
##   MaritalStatus_Single
## 1                    1
## 2                    0
## 3                    1
summary(data_hr)
##       Age          DailyRate      DistanceFromHome   Education    
##  Min.   :18.00   Min.   : 102.0   Min.   : 1.000   Min.   :1.000  
##  1st Qu.:27.00   1st Qu.: 697.0   1st Qu.: 5.000   1st Qu.:2.000  
##  Median :30.50   Median : 974.5   Median : 7.000   Median :3.000  
##  Mean   :33.79   Mean   : 900.0   Mean   : 7.274   Mean   :2.913  
##  3rd Qu.:39.00   3rd Qu.:1119.8   3rd Qu.: 9.000   3rd Qu.:4.000  
##  Max.   :60.00   Max.   :1496.0   Max.   :29.000   Max.   :5.000  
##  EnvironmentSatisfaction   HourlyRate     JobInvolvement    JobLevel    
##  Min.   :1.000           Min.   : -5.00   Min.   :1.00   Min.   :1.000  
##  1st Qu.:2.000           1st Qu.: 47.00   1st Qu.:2.00   1st Qu.:1.000  
##  Median :3.000           Median : 69.00   Median :3.00   Median :2.000  
##  Mean   :2.722           Mean   : 77.05   Mean   :2.73   Mean   :2.064  
##  3rd Qu.:4.000           3rd Qu.:106.00   3rd Qu.:3.00   3rd Qu.:3.000  
##  Max.   :4.000           Max.   :184.00   Max.   :4.00   Max.   :5.000  
##  JobSatisfaction MonthlyIncome    MonthlyRate    NumCompaniesWorked
##  Min.   :1.000   Min.   : 1009   Min.   : 2094   Min.   :0.000     
##  1st Qu.:2.000   1st Qu.: 2911   1st Qu.: 8047   1st Qu.:1.000     
##  Median :3.000   Median : 4919   Median :14236   Median :2.000     
##  Mean   :2.729   Mean   : 6503   Mean   :14313   Mean   :2.693     
##  3rd Qu.:4.000   3rd Qu.: 8379   3rd Qu.:20462   3rd Qu.:4.000     
##  Max.   :4.000   Max.   :19999   Max.   :26999   Max.   :9.000     
##     OverTime     PercentSalaryHike PerformanceRating StockOptionLevel
##  Min.   :0.000   Min.   :11.00     Min.   :3.000     Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:12.00     1st Qu.:3.000     1st Qu.:0.0000  
##  Median :0.000   Median :14.00     Median :3.000     Median :1.0000  
##  Mean   :0.283   Mean   :15.21     Mean   :3.154     Mean   :0.7939  
##  3rd Qu.:1.000   3rd Qu.:18.00     3rd Qu.:3.000     3rd Qu.:1.0000  
##  Max.   :1.000   Max.   :25.00     Max.   :4.000     Max.   :3.0000  
##  TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany  
##  Min.   : 0.00     Min.   :0.000         Min.   :1.000   Min.   : 0.000  
##  1st Qu.: 6.00     1st Qu.:2.000         1st Qu.:2.000   1st Qu.: 3.000  
##  Median :10.00     Median :3.000         Median :3.000   Median : 5.000  
##  Mean   :11.28     Mean   :2.799         Mean   :2.761   Mean   : 7.008  
##  3rd Qu.:15.00     3rd Qu.:3.000         3rd Qu.:3.000   3rd Qu.: 9.000  
##  Max.   :40.00     Max.   :6.000         Max.   :4.000   Max.   :40.000  
##  YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
##  Min.   : 0.000     Min.   : 0.000          Min.   : 0.000      
##  1st Qu.: 2.000     1st Qu.: 0.000          1st Qu.: 2.000      
##  Median : 3.000     Median : 1.000          Median : 3.000      
##  Mean   : 4.229     Mean   : 2.188          Mean   : 4.123      
##  3rd Qu.: 7.000     3rd Qu.: 3.000          3rd Qu.: 7.000      
##  Max.   :18.000     Max.   :15.000          Max.   :17.000      
##  BusinessTravel_Travel_Frequently BusinessTravel_Travel_Rarely
##  Min.   :0.0000                   Min.   :0.0000              
##  1st Qu.:0.0000                   1st Qu.:0.0000              
##  Median :0.0000                   Median :1.0000              
##  Mean   :0.1884                   Mean   :0.7095              
##  3rd Qu.:0.0000                   3rd Qu.:1.0000              
##  Max.   :1.0000                   Max.   :1.0000              
##  Department_Human_Resources Department_Research_Development
##  Min.   :0.00000            Min.   :0.0000                 
##  1st Qu.:0.00000            1st Qu.:0.0000                 
##  Median :0.00000            Median :1.0000                 
##  Mean   :0.04286            Mean   :0.6537                 
##  3rd Qu.:0.00000            3rd Qu.:1.0000                 
##  Max.   :1.00000            Max.   :1.0000                 
##  EducationField_Human_Resources EducationField_Life_Sciences
##  Min.   :0.00000                Min.   :0.0000              
##  1st Qu.:0.00000                1st Qu.:0.0000              
##  Median :0.00000                Median :0.0000              
##  Mean   :0.01837                Mean   :0.4122              
##  3rd Qu.:0.00000                3rd Qu.:1.0000              
##  Max.   :1.00000                Max.   :1.0000              
##  EducationField_Marketing EducationField_Medical EducationField_Other
##  Min.   :0.0000           Min.   :0.0000         Min.   :0.00000     
##  1st Qu.:0.0000           1st Qu.:0.0000         1st Qu.:0.00000     
##  Median :0.0000           Median :0.0000         Median :0.00000     
##  Mean   :0.1082           Mean   :0.3156         Mean   :0.05578     
##  3rd Qu.:0.0000           3rd Qu.:1.0000         3rd Qu.:0.00000     
##  Max.   :1.0000           Max.   :1.0000         Max.   :1.00000     
##  Gender_Female JobRole_Healthcare_Representative JobRole_Human_Resources
##  Min.   :0.0   Min.   :0.00000                   Min.   :0.00000        
##  1st Qu.:0.0   1st Qu.:0.00000                   1st Qu.:0.00000        
##  Median :0.0   Median :0.00000                   Median :0.00000        
##  Mean   :0.4   Mean   :0.08912                   Mean   :0.03537        
##  3rd Qu.:1.0   3rd Qu.:0.00000                   3rd Qu.:0.00000        
##  Max.   :1.0   Max.   :1.00000                   Max.   :1.00000        
##  JobRole_Laboratory_Technician JobRole_Manager   JobRole_Manufacturing_Director
##  Min.   :0.0000                Min.   :0.00000   Min.   :0.00000               
##  1st Qu.:0.0000                1st Qu.:0.00000   1st Qu.:0.00000               
##  Median :0.0000                Median :0.00000   Median :0.00000               
##  Mean   :0.1762                Mean   :0.06939   Mean   :0.09864               
##  3rd Qu.:0.0000                3rd Qu.:0.00000   3rd Qu.:0.00000               
##  Max.   :1.0000                Max.   :1.00000   Max.   :1.00000               
##  JobRole_Research_Director JobRole_Research_Scientist
##  Min.   :0.00000           Min.   :0.0000            
##  1st Qu.:0.00000           1st Qu.:0.0000            
##  Median :0.00000           Median :0.0000            
##  Mean   :0.05442           Mean   :0.1986            
##  3rd Qu.:0.00000           3rd Qu.:0.0000            
##  Max.   :1.00000           Max.   :1.0000            
##  JobRole_Sales_Representative MaritalStatus_Married MaritalStatus_Single
##  Min.   :0.00000              Min.   :0.0000        Min.   :0.0000      
##  1st Qu.:0.00000              1st Qu.:0.0000        1st Qu.:0.0000      
##  Median :0.00000              Median :0.0000        Median :0.0000      
##  Mean   :0.05646              Mean   :0.4578        Mean   :0.3197      
##  3rd Qu.:0.00000              3rd Qu.:1.0000        3rd Qu.:1.0000      
##  Max.   :1.00000              Max.   :1.0000        Max.   :1.0000
#Posibles Agrupaciones
distMatrix = as.matrix(dist(data_hr))
heatmap(distMatrix)

#Aplicamos PCA
pca.data_hr <- prcomp(data_hr,scale=TRUE)
summary(pca.data_hr)
## Importance of components:
##                           PC1     PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.3264 1.65469 1.51599 1.43853 1.37953 1.34907 1.33021
## Proportion of Variance 0.1259 0.06367 0.05345 0.04812 0.04426 0.04233 0.04115
## Cumulative Proportion  0.1259 0.18954 0.24299 0.29111 0.33537 0.37770 0.41885
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     1.30128 1.26286 1.14405 1.10643 1.09445 1.07601 1.06268
## Proportion of Variance 0.03938 0.03709 0.03044 0.02847 0.02786 0.02693 0.02626
## Cumulative Proportion  0.45823 0.49531 0.52575 0.55422 0.58208 0.60900 0.63527
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     1.04894 1.01966 1.00288 0.99856 0.98456 0.97833 0.95793
## Proportion of Variance 0.02559 0.02418 0.02339 0.02319 0.02254 0.02226 0.02134
## Cumulative Proportion  0.66085 0.68503 0.70842 0.73161 0.75416 0.77641 0.79776
##                           PC22   PC23    PC24    PC25    PC26    PC27    PC28
## Standard deviation     0.93791 0.9110 0.90507 0.87834 0.86035 0.85092 0.74455
## Proportion of Variance 0.02046 0.0193 0.01905 0.01794 0.01721 0.01684 0.01289
## Cumulative Proportion  0.81821 0.8375 0.85656 0.87451 0.89172 0.90856 0.92145
##                           PC29    PC30    PC31    PC32    PC33   PC34    PC35
## Standard deviation     0.71390 0.68924 0.61988 0.56632 0.52346 0.5078 0.47467
## Proportion of Variance 0.01185 0.01105 0.00894 0.00746 0.00637 0.0060 0.00524
## Cumulative Proportion  0.93330 0.94435 0.95329 0.96074 0.96712 0.9731 0.97835
##                          PC36    PC37    PC38    PC39   PC40    PC41    PC42
## Standard deviation     0.4635 0.44607 0.38566 0.35510 0.3346 0.28047 0.18629
## Proportion of Variance 0.0050 0.00463 0.00346 0.00293 0.0026 0.00183 0.00081
## Cumulative Proportion  0.9833 0.98798 0.99144 0.99437 0.9970 0.99880 0.99961
##                           PC43
## Standard deviation     0.12985
## Proportion of Variance 0.00039
## Cumulative Proportion  1.00000
res= PCA(X = data_hr, scale.unit = TRUE, ncp = 5, graph = FALSE)
print(res)
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 1470 individuals, described by 43 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
#Varianza explicada por componente
fviz_screeplot(res, addlabels = TRUE, ylim = c(0, 100))

# Color by the contributions x individuos
fviz_pca_ind(res, col.ind="contrib") +
  scale_color_gradient2(low="white", mid="blue",
                        high="red", midpoint=0.5)

# Color by the contributions x variables
fviz_pca_var(res, col.var="contrib")+
  scale_color_gradient2(low="white", mid="blue",
                        high="red", midpoint=20) 

#Combinando
fviz_pca_biplot(res)+
  xlim(-3, 3) + ylim (-3, 3)
## Warning: Removed 286 rows containing missing values (geom_point).
## Warning: Removed 286 rows containing missing values (geom_text).
## Warning: Removed 13 rows containing missing values (geom_text).
## Warning: Removed 13 rows containing missing values (geom_segment).

#Aplicamos metodo jerarquico

dendrogram <- hclust(dist(data_hr, method = 'euclidean'), method = 'ward.D')
ggdendrogram(dendrogram, rotate = FALSE, labels = TRUE, theme_dendro = TRUE) + 
  labs(title = "Dendrograma")

#Obteniendo los k-means

#Calculando los k-means ideales, metodo del codo
set.seed(1234)
wcss <- vector()
for(i in 1:20){
  wcss[i] <- sum(kmeans(data_hr, i)$withinss)
}
ggplot() + geom_point(aes(x = 1:20, y = wcss), color = 'blue') + 
  geom_line(aes(x = 1:20, y = wcss), color = 'blue') + 
  ggtitle("Metodo del Codo") + 
  xlab('Cantidad de Centroides k') + 
  ylab('WCSS')

# Iteracion k-means with k = 2
set.seed(740)
km.res <- kmeans(data_hr, 2, nstart = 25)
#Calcular las medias de cada grupo
aggregate(data_hr, by=list(cluster=km.res$cluster), mean)
##   cluster      Age DailyRate DistanceFromHome Education EnvironmentSatisfaction
## 1       1 30.42493 1002.6836         6.214477  2.942359                2.675603
## 2       2 37.26519  794.1533         8.366022  2.882597                2.769337
##   HourlyRate JobInvolvement JobLevel JobSatisfaction MonthlyIncome MonthlyRate
## 1  104.21314       2.735925 2.012064        2.723861      6282.029    8217.247
## 2   49.06215       2.723757 2.117403        2.733425      6730.546   20594.193
##   NumCompaniesWorked  OverTime PercentSalaryHike PerformanceRating
## 1           2.691689 0.2707775          15.33646          3.167560
## 2           2.694751 0.2955801          15.07873          3.139503
##   StockOptionLevel TotalWorkingYears TrainingTimesLastYear WorkLifeBalance
## 1        0.8203753          11.01340              2.764075        2.769437
## 2        0.7665746          11.55387              2.835635        2.752762
##   YearsAtCompany YearsInCurrentRole YearsSinceLastPromotion
## 1       7.040214           4.313673                2.151475
## 2       6.975138           4.142265                2.225138
##   YearsWithCurrManager BusinessTravel_Travel_Frequently
## 1             4.254692                        0.1916890
## 2             3.987569                        0.1850829
##   BusinessTravel_Travel_Rarely Department_Human_Resources
## 1                    0.7104558                 0.04691689
## 2                    0.7085635                 0.03867403
##   Department_Research_Development EducationField_Human_Resources
## 1                       0.6528150                     0.01742627
## 2                       0.6546961                     0.01933702
##   EducationField_Life_Sciences EducationField_Marketing EducationField_Medical
## 1                    0.3954424                0.1112601              0.3203753
## 2                    0.4295580                0.1049724              0.3107735
##   EducationField_Other Gender_Female JobRole_Healthcare_Representative
## 1           0.06568365     0.3806971                        0.08847185
## 2           0.04558011     0.4198895                        0.08977901
##   JobRole_Human_Resources JobRole_Laboratory_Technician JobRole_Manager
## 1              0.04021448                     0.1863271      0.05898123
## 2              0.03038674                     0.1657459      0.08011050
##   JobRole_Manufacturing_Director JobRole_Research_Director
## 1                     0.09651475                0.04557641
## 2                     0.10082873                0.06353591
##   JobRole_Research_Scientist JobRole_Sales_Representative MaritalStatus_Married
## 1                  0.2064343                   0.05495979             0.4758713
## 2                  0.1906077                   0.05801105             0.4392265
##   MaritalStatus_Single
## 1            0.3016086
## 2            0.3383978
#graficar agrupaciones
fviz_cluster(object = km.res, data = data_hr, show.clust.cent = TRUE,
             ellipse.type = "norm", star.plot = TRUE, repel = TRUE,
             pointsize=0.5,outlier.color="darkred") +
  labs(title = "Resultados clustering K-means") +
  theme_bw() +  theme(legend.position = "none")
## Warning: ggrepel: 1438 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#agregar el cluster a la data original
data_final= cbind(data_hr, cluster = km.res$cluster,attrition=emp_att$Attrition)
head(data_final,10)
##    Age DailyRate DistanceFromHome Education EnvironmentSatisfaction HourlyRate
## 1   41      1102               10         2                       2         48
## 2   49       279               11         1                       3         68
## 3   26      1050                5         2                       4         96
## 4   33      1392               10         4                       4         33
## 5   27       591                9         1                       1         35
## 6   28      1003                5         2                       4        144
## 7   29      1086                5         3                       3        147
## 8   26      1058                5         1                       4        128
## 9   27       944                7         3                       4         75
## 10  36      1299                9         3                       3         45
##    JobInvolvement JobLevel JobSatisfaction MonthlyIncome MonthlyRate
## 1               3        2               4          5993       19479
## 2               2        2               2          5130       24907
## 3               2        1               3          2090        2396
## 4               3        1               3          2909       23159
## 5               3        1               2          3468       16632
## 6               3        1               4          3068       11864
## 7               4        1               1          2670        9964
## 8               3        1               3          2693       13335
## 9               2        3               3          9526        8787
## 10              3        2               3          5237       16577
##    NumCompaniesWorked OverTime PercentSalaryHike PerformanceRating
## 1                   8        1                11                 3
## 2                   1        0                23                 4
## 3                   6        1                15                 3
## 4                   1        1                11                 3
## 5                   9        0                12                 3
## 6                   0        0                13                 3
## 7                   4        1                20                 4
## 8                   1        0                22                 4
## 9                   0        0                21                 4
## 10                  6        0                13                 3
##    StockOptionLevel TotalWorkingYears TrainingTimesLastYear WorkLifeBalance
## 1                 0                 8                     0               1
## 2                 1                10                     3               3
## 3                 0                 7                     3               3
## 4                 0                 8                     3               3
## 5                 1                 6                     3               3
## 6                 0                 8                     2               2
## 7                 3                12                     3               2
## 8                 1                 1                     2               3
## 9                 0                10                     2               3
## 10                2                17                     3               2
##    YearsAtCompany YearsInCurrentRole YearsSinceLastPromotion
## 1               6                  4                       0
## 2              10                  7                       1
## 3               0                  0                       0
## 4               8                  7                       3
## 5               2                  2                       2
## 6               7                  7                       3
## 7               1                  0                       0
## 8               1                  0                       0
## 9               9                  7                       1
## 10              7                  7                       7
##    YearsWithCurrManager BusinessTravel_Travel_Frequently
## 1                     5                                0
## 2                     7                                1
## 3                     0                                0
## 4                     0                                1
## 5                     2                                0
## 6                     6                                1
## 7                     0                                0
## 8                     0                                0
## 9                     8                                1
## 10                    7                                0
##    BusinessTravel_Travel_Rarely Department_Human_Resources
## 1                             1                          0
## 2                             0                          0
## 3                             1                          0
## 4                             0                          0
## 5                             1                          0
## 6                             0                          0
## 7                             1                          0
## 8                             1                          0
## 9                             0                          0
## 10                            1                          0
##    Department_Research_Development EducationField_Human_Resources
## 1                                0                              0
## 2                                1                              0
## 3                                1                              0
## 4                                1                              0
## 5                                1                              0
## 6                                1                              0
## 7                                1                              0
## 8                                1                              0
## 9                                1                              0
## 10                               1                              0
##    EducationField_Life_Sciences EducationField_Marketing EducationField_Medical
## 1                             1                        0                      0
## 2                             1                        0                      0
## 3                             0                        0                      0
## 4                             1                        0                      0
## 5                             0                        0                      1
## 6                             1                        0                      0
## 7                             0                        0                      1
## 8                             1                        0                      0
## 9                             1                        0                      0
## 10                            0                        0                      1
##    EducationField_Other Gender_Female JobRole_Healthcare_Representative
## 1                     0             1                                 0
## 2                     0             0                                 0
## 3                     1             0                                 0
## 4                     0             1                                 0
## 5                     0             0                                 0
## 6                     0             0                                 0
## 7                     0             1                                 0
## 8                     0             0                                 0
## 9                     0             0                                 0
## 10                    0             0                                 1
##    JobRole_Human_Resources JobRole_Laboratory_Technician JobRole_Manager
## 1                        0                             0               0
## 2                        0                             0               0
## 3                        0                             1               0
## 4                        0                             0               0
## 5                        0                             1               0
## 6                        0                             1               0
## 7                        0                             1               0
## 8                        0                             1               0
## 9                        0                             0               0
## 10                       0                             0               0
##    JobRole_Manufacturing_Director JobRole_Research_Director
## 1                               0                         0
## 2                               0                         0
## 3                               0                         0
## 4                               0                         0
## 5                               0                         0
## 6                               0                         0
## 7                               0                         0
## 8                               0                         0
## 9                               1                         0
## 10                              0                         0
##    JobRole_Research_Scientist JobRole_Sales_Representative
## 1                           0                            0
## 2                           1                            0
## 3                           0                            0
## 4                           1                            0
## 5                           0                            0
## 6                           0                            0
## 7                           0                            0
## 8                           0                            0
## 9                           0                            0
## 10                          0                            0
##    MaritalStatus_Married MaritalStatus_Single cluster attrition
## 1                      0                    1       2         1
## 2                      1                    0       2         0
## 3                      0                    1       1         1
## 4                      1                    0       2         0
## 5                      1                    0       2         0
## 6                      0                    1       1         0
## 7                      1                    0       1         0
## 8                      0                    0       1         0
## 9                      0                    1       1         0
## 10                     1                    0       2         0
#Bonus track
write.csv(data_final,"G:/TRABAJO/DOCENCIA/KONRAND LORENZ/HR_datasets/data_hrx2.csv")