R Markdown

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)

# Load the data
emp_data <- read.csv("HR_comma_sep.csv")

Including Plots

ggplot(emp_data, aes(x = satisfaction_level)) +
  geom_histogram(binwidth = 0.1, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Verify few employees are dissatisfied (< 0.5)",
       x = "Satisfaction Level",
       y = "Frequency") +
  theme_minimal()

# Create a histogram for last evaluation
ggplot(emp_data, aes(x = last_evaluation)) +
  geom_histogram(binwidth = 0.1, fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Employee Performance: Last Evaluation Distribution",
       x = "Last Evaluation",
       y = "Frequency") +
  theme_minimal()

# 2. Create a bar plot for the average satisfaction by employment status with meaningful labels
avg_satisfaction_by_status <- emp_data %>%
  mutate(emp_status = ifelse(left == 0, "Stayed", "Left")) %>%
  group_by(emp_status) %>%
  summarise(avg_satisfaction = mean(satisfaction_level, na.rm = TRUE))

ggplot(avg_satisfaction_by_status, aes(x = emp_status, y = avg_satisfaction, fill = emp_status)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Employee who stay, \n on average are 50% more not satisfied",
       x = "Employment Status",
       y = "Average Satisfaction") +
  scale_fill_manual(values = c("Stayed" = "blue", "Left" = "red")) +
  theme_minimal()