library(readr)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Load the dataset

hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Histogram: Employee Satisfaction

plot_ly(hr, x = ~satisfaction_level, type = "histogram") %>%
  layout(title = "Most employees are satisfied (satisfaction > .5)",
         xaxis = list(title = "Satisfaction Level"),
         yaxis = list(title = "Number of Employees"))

Over half of the employees have a satisfaction level above .5

There is a high group of employees with a low satisfaction rate

ggplot(hr, aes(y = last_evaluation)) +
  geom_boxplot(fill = "tomato", color = "black") +
  labs(
    title = "Evaluation Scores Cluster Around the Median with Outliers",
    x = "",
    y = "Last Evaluation Score"
  )

The Max value was 1 and the Min was .3

The median ws .7, meaning employees generally did well on their evaluations

plot_ly(hr, x = ~as.factor(Department), y = ~average_montly_hours, type = "box") %>%
  layout(title = "Most Departments have similar Average Monthly Hours around 200",
         xaxis = list(title = "Department"),
         yaxis = list(title = "Average Monthyl Hours"))

Most Departments have a similar Avergae monthly hours, all medians are around 195-205

The Max hours were around 310, and the Min hours were around 96

There was no relationship between monhtly hours and department

  avg_satisfaction <- hr %>% group_by(Department) %>% summarise(avg_satisfaction = mean(satisfaction_level))
plot_ly(avg_satisfaction, x = ~factor(Department), y = ~avg_satisfaction, type = 'bar') %>%
  layout(title = 'Average Satisfaction is Mostly the same across Departments',
         xaxis = list(title = 'Department'),
         yaxis = list(title = 'Average Satisfaction'))

Management had the highest average satsfaction

The Average Satisfaction level was between .58 and .62