library(readr)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Load the dataset
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Histogram: Employee Satisfaction
plot_ly(hr, x = ~satisfaction_level, type = "histogram") %>%
layout(title = "Most employees are satisfied (satisfaction > .5)",
xaxis = list(title = "Satisfaction Level"),
yaxis = list(title = "Number of Employees"))
Over half of the employees have a satisfaction level above .5
There is a high group of employees with a low satisfaction rate
ggplot(hr, aes(y = last_evaluation)) +
geom_boxplot(fill = "tomato", color = "black") +
labs(
title = "Evaluation Scores Cluster Around the Median with Outliers",
x = "",
y = "Last Evaluation Score"
)

The Max value was 1 and the Min was .3
Most Departments have a similar Avergae monthly hours, all medians
are around 195-205
The Max hours were around 310, and the Min hours were around 96
There was no relationship between monhtly hours and department
avg_satisfaction <- hr %>% group_by(Department) %>% summarise(avg_satisfaction = mean(satisfaction_level))
plot_ly(avg_satisfaction, x = ~factor(Department), y = ~avg_satisfaction, type = 'bar') %>%
layout(title = 'Average Satisfaction is Mostly the same across Departments',
xaxis = list(title = 'Department'),
yaxis = list(title = 'Average Satisfaction'))
Management had the highest average satsfaction
The Average Satisfaction level was between .58 and .62