This is a Markdown document about Analytics - Human Resource in an Organization
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
peopledataset <- read.csv("/Users/maheshgurumoorthi/Dropbox/People Analytics Dataset/hr_analytics.csv")
glimpse(peopledataset)
## Rows: 14,999
## Columns: 10
## $ satisfaction_level <dbl> 0.38, 0.80, 0.11, 0.72, 0.37, 0.41, 0.10, 0.92, …
## $ last_evaluation <dbl> 0.53, 0.86, 0.88, 0.87, 0.52, 0.50, 0.77, 0.85, …
## $ number_project <int> 2, 5, 7, 5, 2, 2, 6, 5, 5, 2, 2, 6, 4, 2, 2, 2, …
## $ average_montly_hours <int> 157, 262, 272, 223, 159, 153, 247, 259, 224, 142…
## $ time_spend_company <int> 3, 6, 4, 5, 3, 3, 4, 5, 5, 3, 3, 4, 5, 3, 3, 3, …
## $ Work_accident <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ left <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ promotion_last_5years <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ sales <chr> "sales", "sales", "sales", "sales", "sales", "sa…
## $ salary <chr> "low", "medium", "medium", "low", "low", "low", …
set.seed(1000)
subsIndex <- sample(1:nrow(peopledataset), 0.2*nrow(peopledataset))
subs <- peopledataset[subsIndex,]
attach(subs)
library(ggplot2)
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.