library(readr)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hist(hr$satisfaction_level,
main = "Distribution of Satisfaction Levels: Skewed Towards Lower Values",
xlab = "Satisfaction Level",
col = "lightblue",
border = "black")
#Analysis The graph is scewed left implying a majority of employees are satisified
boxplot(hr$last_evaluation,
main = "Distribution of Last Evaluation Scores: Presence of Outliers",
ylab = "Evaluation Score",
col = "lightgreen",
border = "darkgreen")
#Analysis The average scores are at a median of 0.7, with the majority being between the scores 0.6 and 0.85. Outliers are as far as 0.4 to 1.0
boxplot(hr$average_montly_hours ~ Department,
data = hr,
main = "Comparative Box Plot of Average Monthly Hours: Variations Across Departments",
ylab = "Average Monthly Hours",
xlab = "Department",
col = c("lightblue", "lightgreen", "lightcoral"),
border = "darkblue")
#Analysis Most of the departments average around the same hours, while each have their own outliers around the same values
attrition_salary <- table(hr$salary, hr$left)
attrition_df <- as.data.frame(attrition_salary)
names(attrition_df) <- c("Salary", "Left", "Frequency")
ggplot(attrition_df, aes(x = "", y = Frequency, fill = interaction(Salary, Left))) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
labs(title = "Relationship Between Salary and Employee Attrition",
fill = "Salary & Attrition Status") +
theme_void() +
scale_fill_manual(values = c("lightblue", "lightcoral", "lightgreen", "lightsalmon", "lightyellow", "lightpink"))
#Analysis The pie chart shows that very few employees leave despite how much they may get paid. However those who get paid highly are less likely to leave the comapny
avg_satisfaction <- hr %>%
group_by(Department) %>%
summarize(Average_Satisfaction = mean(satisfaction_level, na.rm = TRUE))
ggplot(avg_satisfaction, aes(x = Department, y = Average_Satisfaction, fill = Department)) +
geom_bar(stat = "identity") +
labs(title = "Average Satisfaction Levels by Department: Key Differences Observed",
x = "Department",
y = "Average Satisfaction Level") +
theme_minimal() +
scale_fill_brewer(palette = "Set3") # Optional: Color palette
#Analysis Employees in Accounting are on average less satisfied than those in other departments. HR satisfaction is also lower in comparison.