https://github.com/MariaGamero/TFM-ReasonsforEmployeeTurnover
## Loading required package: ggplot2
hr_data <- read.csv("data/HR_data_cleaned.csv",stringsAsFactors = FALSE)
str(hr_data)
## 'data.frame': 11991 obs. of 10 variables:
## $ satisfaction_level : num 0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
## $ last_evaluation : num 0.53 0.86 0.88 0.87 0.52 0.5 0.77 0.85 1 0.53 ...
## $ number_project : int 2 5 7 5 2 2 6 5 5 2 ...
## $ average_montly_hours : int 157 262 272 223 159 153 247 259 224 142 ...
## $ time_spend_company : int 3 6 4 5 3 3 4 5 5 3 ...
## $ work_accident : int 0 0 0 0 0 0 0 0 0 0 ...
## $ left : int 1 1 1 1 1 1 1 1 1 1 ...
## $ promotion_last_5years: int 0 0 0 0 0 0 0 0 0 0 ...
## $ department : chr "sales" "sales" "sales" "sales" ...
## $ salary : chr "low" "medium" "medium" "low" ...
head(hr_data)
## satisfaction_level last_evaluation number_project average_montly_hours
## 1 0.38 0.53 2 157
## 2 0.80 0.86 5 262
## 3 0.11 0.88 7 272
## 4 0.72 0.87 5 223
## 5 0.37 0.52 2 159
## 6 0.41 0.50 2 153
## time_spend_company work_accident left promotion_last_5years department
## 1 3 0 1 0 sales
## 2 6 0 1 0 sales
## 3 4 0 1 0 sales
## 4 5 0 1 0 sales
## 5 3 0 1 0 sales
## 6 3 0 1 0 sales
## salary
## 1 low
## 2 medium
## 3 medium
## 4 low
## 5 low
## 6 low
summary(hr_data)
## satisfaction_level last_evaluation number_project average_montly_hours
## Min. :0.0900 Min. :0.3600 Min. :2.000 Min. : 96.0
## 1st Qu.:0.4800 1st Qu.:0.5700 1st Qu.:3.000 1st Qu.:157.0
## Median :0.6600 Median :0.7200 Median :4.000 Median :200.0
## Mean :0.6297 Mean :0.7167 Mean :3.803 Mean :200.5
## 3rd Qu.:0.8200 3rd Qu.:0.8600 3rd Qu.:5.000 3rd Qu.:243.0
## Max. :1.0000 Max. :1.0000 Max. :7.000 Max. :310.0
## time_spend_company work_accident left promotion_last_5years
## Min. : 2.000 Min. :0.0000 Min. :0.000 Min. :0.00000
## 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.00000
## Median : 3.000 Median :0.0000 Median :0.000 Median :0.00000
## Mean : 3.365 Mean :0.1543 Mean :0.166 Mean :0.01693
## 3rd Qu.: 4.000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.00000
## Max. :10.000 Max. :1.0000 Max. :1.000 Max. :1.00000
## department salary
## Length:11991 Length:11991
## Class :character Class :character
## Mode :character Mode :character
##
##
##
hr_data$left <- as.factor(hr_data$left)
hr_data$left <- factor(hr_data$left,levels=c(0,1),
labels=c("People who stay","People who left"))
hr_data$promotion_last_5years <- as.factor(hr_data$promotion_last_5years)
hr_data$promotion_last_5years <- factor(hr_data$promotion_last_5years,levels=c(0,1),
labels=c("Not Promoted","Promoted"))
hr_data$work_accident <- as.factor(hr_data$work_accident)
hr_data$work_accident <-factor(hr_data$work_accident,levels=c(0,1),
labels=c("No Accident","Accident"))
Conclusions:
Conclusions:
Conclusions:
Conclusions:
Conclusions:
Conclusions:
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Conclusions: