library(readr)
library(dplyr) # for group_by/summarise
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Load dataset
hr <- read_csv("https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv")
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Create a histogram of the satisfaction_level variable.
The title should reflect a key takeaway from the distribution.
plot_ly(
data = hr,
x = ~satisfaction_level,
type = "histogram"
) |>
layout(
title = list(text = "Employee Satisfaction: Many Employees Score Higher"),
xaxis = list(title = "Satisfaction Level"),
yaxis = list(title = "Count of Employees")
)
Create a box plot of the last_evaluation variable. The
title should highlight an important insight about the evaluation
scores.
plot_ly(
data = hr,
y = ~last_evaluation,
type = "box",
boxpoints = "outliers"
) |>
layout(
title = "Last Evaluation Scores: High Performance With Some Outliers",
yaxis = list(title = "Evaluation Score (0–1 Scale)")
)
Create a comparative box plot of average_montly_hours
grouped by department.
plot_ly(
data = hr,
x = ~Department, # case-sensitive
y = ~average_montly_hours, # exact column name in this file
type = "box"
) |>
layout(
title = "Monthly Work Hours by Department — Some Teams Work Significantly Longer",
xaxis = list(title = "Department"),
yaxis = list(title = "Average Monthly Hours")
)
Show the frequency of employees who left (left == 1) by
salary category.
left_data <- hr[hr$left == 1, ]
salary_counts <- table(left_data$salary)
plot_ly(
labels = names(salary_counts),
values = as.numeric(salary_counts),
type = "pie"
) |>
layout(
title = "Attrition by Salary Level: Lower Salaries Account for Most Turnover",
showlegend = TRUE
)
Display the average satisfaction_level for each
department.
dept_satisfaction <-
hr |>
group_by(Department) |>
summarise(avg_satisfaction = mean(satisfaction_level, na.rm = TRUE)) |>
arrange(Department)
plot_ly(
data = dept_satisfaction,
x = ~Department,
y = ~avg_satisfaction,
type = "bar"
) |>
layout(
title = "Average Satisfaction by Department: Noticeable Differences in Morale",
xaxis = list(title = "Department"),
yaxis = list(title = "Average Satisfaction Level"),
bargap = 0.2
)