# 1. Load Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# 2. Load Dataset
employee <- read.csv("E:/archive (9)/Employee.csv")
# 3. Quick View
head(employee)
## Education JoiningYear City PaymentTier Age Gender EverBenched
## 1 Bachelors 2017 Bangalore 3 34 Male No
## 2 Bachelors 2013 Pune 1 28 Female No
## 3 Bachelors 2014 New Delhi 3 38 Female No
## 4 Masters 2016 Bangalore 3 27 Male No
## 5 Masters 2017 Pune 3 24 Male Yes
## 6 Bachelors 2016 Bangalore 3 22 Male No
## ExperienceInCurrentDomain LeaveOrNot
## 1 0 0
## 2 3 1
## 3 2 0
## 4 5 1
## 5 2 1
## 6 0 0
str(employee)
## 'data.frame': 4653 obs. of 9 variables:
## $ Education : chr "Bachelors" "Bachelors" "Bachelors" "Masters" ...
## $ JoiningYear : int 2017 2013 2014 2016 2017 2016 2015 2016 2016 2017 ...
## $ City : chr "Bangalore" "Pune" "New Delhi" "Bangalore" ...
## $ PaymentTier : int 3 1 3 3 3 3 3 3 3 2 ...
## $ Age : int 34 28 38 27 24 22 38 34 23 37 ...
## $ Gender : chr "Male" "Female" "Female" "Male" ...
## $ EverBenched : chr "No" "No" "No" "No" ...
## $ ExperienceInCurrentDomain: int 0 3 2 5 2 0 0 2 1 2 ...
## $ LeaveOrNot : int 0 1 0 1 1 0 0 1 0 0 ...
summary(employee)
## Education JoiningYear City PaymentTier
## Length:4653 Min. :2012 Length:4653 Min. :1.000
## Class :character 1st Qu.:2013 Class :character 1st Qu.:3.000
## Mode :character Median :2015 Mode :character Median :3.000
## Mean :2015 Mean :2.698
## 3rd Qu.:2017 3rd Qu.:3.000
## Max. :2018 Max. :3.000
## Age Gender EverBenched
## Min. :22.00 Length:4653 Length:4653
## 1st Qu.:26.00 Class :character Class :character
## Median :28.00 Mode :character Mode :character
## Mean :29.39
## 3rd Qu.:32.00
## Max. :41.00
## ExperienceInCurrentDomain LeaveOrNot
## Min. :0.000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:0.0000
## Median :3.000 Median :0.0000
## Mean :2.906 Mean :0.3439
## 3rd Qu.:4.000 3rd Qu.:1.0000
## Max. :7.000 Max. :1.0000
# 4. Convert to Factors
employee$Gender <- as.factor(employee$Gender)
employee$City <- as.factor(employee$City)
employee$EverBenched <- as.factor(employee$EverBenched)
# 5. Missing Values
colSums(is.na(employee))
## Education JoiningYear City
## 0 0 0
## PaymentTier Age Gender
## 0 0 0
## EverBenched ExperienceInCurrentDomain LeaveOrNot
## 0 0 0
# 6. Create Performance Score
employee$performance_score <-
(employee$ExperienceInCurrentDomain * 5) +
(employee$PaymentTier * 10) +
ifelse(employee$EverBenched == "No", 20, 0) +
ifelse(employee$LeaveOrNot == 0, 20, 0)
# 7. High Performers
high_perf <- employee %>%
filter(performance_score > 80)
# 8. Top 10 Employees
top10 <- employee %>%
arrange(desc(performance_score)) %>%
head(10)
# 9. Ranking
ranked <- employee %>%
arrange(desc(performance_score)) %>%
mutate(rank = row_number())
# 10. Performance Categories
employee$category <- ifelse(employee$performance_score >= 75, "High",
ifelse(employee$performance_score >= 50, "Medium", "Low"))
# 11. Avg Performance by Gender
employee %>%
group_by(Gender) %>%
summarise(avg_score = mean(performance_score))
## # A tibble: 2 × 2
## Gender avg_score
## <fct> <dbl>
## 1 Female 68.5
## 2 Male 75.3
# 12. Performance Distribution
ggplot(employee, aes(x = performance_score)) +
geom_histogram(binwidth = 5)

# 13. Experience vs Performance
ggplot(employee, aes(x = ExperienceInCurrentDomain, y = performance_score)) +
geom_point()

# 14. Age vs Performance
ggplot(employee, aes(x = Age, y = performance_score)) +
geom_point()

# 15. Salary vs Performance
ggplot(employee, aes(x = PaymentTier, y = performance_score)) +
geom_point()

# 16. Top 10 Visualization
ggplot(top10, aes(x = reorder(Age, performance_score), y = performance_score)) +
geom_bar(stat = "identity") +
coord_flip()

# 17. Category Count Plot
ggplot(employee, aes(x = category, fill = category)) +
geom_bar()
