library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

1. Create a histogram of the satisfaction_level variable. The title should reflect a key takeaway from the distribution.

plot_ly(hr, x = ~satisfaction_level, type = "histogram") %>%
  layout(title = "Most employees are satisfied (satisfaction > .5)",
         xaxis = list(title = "Satisfaction Level"),
         yaxis = list(title = "Number of Employees"))

a. Most employees are satisfied (satisfaction > .5)

b. About 6% of employees are extremely dissatisfied(satisfaction <= .11)

2. Create a box plot of the last_evaluation variable. The title should highlight an important insight about the evaluation scores.

plot_ly(hr, x = ~last_evaluation, type = "box") %>%
  layout(title = "Most Employees Scored Well on Last Evaluation (Last Evaluation Score > .5)",
         xaxis = list(title = "Last Evaluation Score"),
         yaxis = list(title = "Number of Evaluations"))

a. The distribution is slightly scored toward higher evaluation scores

b. About 50% of employees scored between 0.56 and 0.87

3. Create a comparative box plot of average_montly_hours grouped by department. The title should emphasize a significant difference or pattern among departments.

plot_ly(hr, x = ~Department, y = ~average_montly_hours, type = "box") %>%
  layout(title = "Average Monthly Hours Show Similarity Across Departments.",
         xaxis = list(title = "Department"),
         yaxis = list(title = "Average Monthly Hours "))

a. The average monthly hours between departments seem consistent

b. All departments have relatively the same minimum and maximum number of monthly hours

4. Create a pie chart showing the frequency of employee attrition (left) for each salary category. The title should point out the relationship between salary and attrition.

# Count the number of employees who left for each salary category
attrition_counts <- hr %>% count(salary, left)

# Create a pie chart showing the frequency of employee attrition by salary category
plot_ly(attrition_counts %>% filter(left == 1), labels = ~salary, values = ~n, type = 'pie') %>%
  layout(title = 'Low Salaries Drive 60.8% of Employee Attrition')

a. A majority of the employee attrition (60.8%) were employees with low salary

b. A very small amount of employee with a high salary (2.3%) left the company

5. Create a bar plot displaying the average satisfaction_level for each department. The title should highlight a key observation about departmental satisfaction.

# Assuming the dataset is named 'hr' and it contains the columns 'Department' and 'satisfaction_level'
avg_satisfaction <- hr %>%
  group_by(Department) %>%
  summarise(avg_satisfaction = mean(satisfaction_level))

# Creating the bar plot for average satisfaction by department
plot_ly(avg_satisfaction, x = ~Department, y = ~avg_satisfaction, type = 'bar') %>%
  layout(title = 'Most Departments Are Satisfied (satisfaction > .5)',
         xaxis = list(title = 'Department'),
         yaxis = list(title = 'Average Satisfaction Level'))

a. Almost all departments are equally satisfied, with accounting department levels of satisfaction being slightly lower (0.58)

b. The highest average satisfaction was found in the Management department (0.62)