# 1. Load Libraries

library(dplyr)      
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)    
# 2. Load Dataset

employee <- read.csv("E:/archive (9)/Employee.csv")
# 3. Quick View

head(employee)     
##   Education JoiningYear      City PaymentTier Age Gender EverBenched
## 1 Bachelors        2017 Bangalore           3  34   Male          No
## 2 Bachelors        2013      Pune           1  28 Female          No
## 3 Bachelors        2014 New Delhi           3  38 Female          No
## 4   Masters        2016 Bangalore           3  27   Male          No
## 5   Masters        2017      Pune           3  24   Male         Yes
## 6 Bachelors        2016 Bangalore           3  22   Male          No
##   ExperienceInCurrentDomain LeaveOrNot
## 1                         0          0
## 2                         3          1
## 3                         2          0
## 4                         5          1
## 5                         2          1
## 6                         0          0
str(employee)      
## 'data.frame':    4653 obs. of  9 variables:
##  $ Education                : chr  "Bachelors" "Bachelors" "Bachelors" "Masters" ...
##  $ JoiningYear              : int  2017 2013 2014 2016 2017 2016 2015 2016 2016 2017 ...
##  $ City                     : chr  "Bangalore" "Pune" "New Delhi" "Bangalore" ...
##  $ PaymentTier              : int  3 1 3 3 3 3 3 3 3 2 ...
##  $ Age                      : int  34 28 38 27 24 22 38 34 23 37 ...
##  $ Gender                   : chr  "Male" "Female" "Female" "Male" ...
##  $ EverBenched              : chr  "No" "No" "No" "No" ...
##  $ ExperienceInCurrentDomain: int  0 3 2 5 2 0 0 2 1 2 ...
##  $ LeaveOrNot               : int  0 1 0 1 1 0 0 1 0 0 ...
summary(employee)   
##   Education          JoiningYear       City            PaymentTier   
##  Length:4653        Min.   :2012   Length:4653        Min.   :1.000  
##  Class :character   1st Qu.:2013   Class :character   1st Qu.:3.000  
##  Mode  :character   Median :2015   Mode  :character   Median :3.000  
##                     Mean   :2015                      Mean   :2.698  
##                     3rd Qu.:2017                      3rd Qu.:3.000  
##                     Max.   :2018                      Max.   :3.000  
##       Age           Gender          EverBenched       
##  Min.   :22.00   Length:4653        Length:4653       
##  1st Qu.:26.00   Class :character   Class :character  
##  Median :28.00   Mode  :character   Mode  :character  
##  Mean   :29.39                                        
##  3rd Qu.:32.00                                        
##  Max.   :41.00                                        
##  ExperienceInCurrentDomain   LeaveOrNot    
##  Min.   :0.000             Min.   :0.0000  
##  1st Qu.:2.000             1st Qu.:0.0000  
##  Median :3.000             Median :0.0000  
##  Mean   :2.906             Mean   :0.3439  
##  3rd Qu.:4.000             3rd Qu.:1.0000  
##  Max.   :7.000             Max.   :1.0000
# 4. Convert to Factors

employee$Gender <- as.factor(employee$Gender)
employee$City <- as.factor(employee$City)
employee$EverBenched <- as.factor(employee$EverBenched)
# 5. Missing Values

colSums(is.na(employee))  
##                 Education               JoiningYear                      City 
##                         0                         0                         0 
##               PaymentTier                       Age                    Gender 
##                         0                         0                         0 
##               EverBenched ExperienceInCurrentDomain                LeaveOrNot 
##                         0                         0                         0
# 6. Create Performance Score

employee$performance_score <- 
  (employee$ExperienceInCurrentDomain * 5) +
  (employee$PaymentTier * 10) +
  ifelse(employee$EverBenched == "No", 20, 0) +
  ifelse(employee$LeaveOrNot == 0, 20, 0)
# 7. High Performers
high_perf <- employee %>%
  filter(performance_score > 80)
# 8. Top 10 Employees

top10 <- employee %>%
  arrange(desc(performance_score)) %>%
  head(10)
# 9. Ranking

ranked <- employee %>%
  arrange(desc(performance_score)) %>%
  mutate(rank = row_number())
# 10. Performance Categories

employee$category <- ifelse(employee$performance_score >= 75, "High",
                            ifelse(employee$performance_score >= 50, "Medium", "Low"))
# 11. Avg Performance by Gender

employee %>%
  group_by(Gender) %>%
  summarise(avg_score = mean(performance_score))
## # A tibble: 2 × 2
##   Gender avg_score
##   <fct>      <dbl>
## 1 Female      68.5
## 2 Male        75.3
# 12. Performance Distribution

ggplot(employee, aes(x = performance_score)) +
  geom_histogram(binwidth = 5)

# 13. Experience vs Performance

ggplot(employee, aes(x = ExperienceInCurrentDomain, y = performance_score)) +
  geom_point()

# 14. Age vs Performance

ggplot(employee, aes(x = Age, y = performance_score)) +
  geom_point()

# 15. Salary vs Performance

ggplot(employee, aes(x = PaymentTier, y = performance_score)) +
  geom_point()

# 16. Top 10 Visualization

ggplot(top10, aes(x = reorder(Age, performance_score), y = performance_score)) +
  geom_bar(stat = "identity") +
  coord_flip()

# 17. Category Count Plot

ggplot(employee, aes(x = category, fill = category)) +
  geom_bar()