library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
  1. Histogram of satisfaction level
ggplot(hr, aes(x = satisfaction_level)) +
  geom_histogram(binwidth = 0.05, fill = "skyblue", color = "black") +
  labs(title = "Satisfaction Levels: Majority in Lower Range",
       x = "Satisfaction Level",
       y = "Frequency") +
  theme_minimal()

#Observation: From the histogram, we can see that most employees have a satisfaction level below 0.5, suggesting general dissatisfaction.
  1. Box plot of last evaluation scores
ggplot(hr, aes(y = last_evaluation)) +
  geom_boxplot(fill = "orange", color = "black") +
  labs(title = "Evaluation Scores: Presence of High Outliers",
       x = "Evaluation",
       y = "Scores") +
  theme_minimal()

#Observation: The box plot reveals high scores above the upper quartile, indicating some employees have exceptionally high evaluation scores, possibly reflecting top performance or certain overachieving employees.
  1. Comparative box plot of average monthly hours by department
ggplot(hr, aes(x = Department, y = average_montly_hours, fill = Department)) +
  geom_boxplot() +
  labs(title = "Workload Differences: Higher Monthly Hours in Certain Departments",
       x = "Department",
       y = "Average Monthly Hours") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_brewer(palette = "Set3")

#Observation: Departments like IT and sales show wider ranges and higher averages in monthly hours, possibly indicating heavier workloads or overtime compared to other departments.
  1. Prepare data for pie chart and Pie chart of attrition by salary
attrition_salary <- hr %>%
  filter(left == 1) %>%
  count(salary) %>%
  mutate(percentage = n / sum(n) * 100)

ggplot(attrition_salary, aes(x = "", y = percentage, fill = salary)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y") +
  labs(title = "Attrition by Salary Level: Higher Attrition at Low Salaries",
       fill = "Salary Level") +
  theme_minimal() +
  scale_fill_brewer(palette = "Pastel1")

#Observation: The pie chart indicates that employees with low salaries have the highest attrition, suggesting that salary may impact retention.
  1. Average satisfaction by department
satisfaction_level <- hr %>%
  group_by(Department) %>%
  summarize(satisfaction_level = mean(satisfaction_level))

ggplot(satisfaction_level, aes(x = Department, y = satisfaction_level, fill = Department)) +
  geom_bar(stat = "identity") +
  labs(title = "Departmental Satisfaction: Variability Across Departments",
       x = "Department",
       y = "Average Satisfaction Level") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_brewer(palette = "Paired")

#Observation: Departments such as HR and management have relatively higher satisfaction levels, possibly due to better work conditions or engagement practices.