library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Histogram: Distribution of Employee Satisfaction
Create a histogram of the satisfaction_level variable.
The title should reflect a key takeaway from the distribution.
plot_ly(hr,
        x = ~satisfaction_level, type = "histogram") %>%
  layout(title = "About 50% of Employees are Satisfied (satisfaction > .7)",
         xaxis = list(title = "Satisfaction Level"),
         yaxis = list(title = "Count of Employees"))
Insightful Comment:
The histogram reveals that nearly half of the employees have a satisfaction level above 0.7, indicating a generally positive sentiment within the organization.
However, the distribution also shows a significant number of employees with lower satisfaction scores, suggesting areas for improvement in employee engagement and support.
Box Plot: Last Evaluation Scores
Create a box plot of the last_evaluation variable.
The title should highlight an important insight about the evaluation scores.
plot_ly(
  hr, 
  y = ~last_evaluation, 
  type = "box", 
  name = "Evaluation Scores"
) %>%
  layout(
    title = "Wide Variation in Employee Evaluation Scores with Some High Performers",
    yaxis = list(title = "Last Evaluation Score"),
    xaxis = list(title = "")
  )
Insightful Comment:
The box plot demonstrates a wide range of evaluation scores among employees, highlighting the presence of high performers.
This variance suggests differing levels of performance and indicates potential areas for targeted training and development initiatives.
Comparative Box Plot: Monthly Hours by Department
Create a comparative box plot of average_montly_hours grouped by department.
The title should emphasize a significant difference or pattern among departments.
plot_ly(
  data = hr, 
  x = ~as.factor(Department),  # Use the correct column name with capital "D"
  y = ~average_montly_hours, 
  type = "box"
) %>%
  layout(
    title = "Significant Variation in Monthly Hours by Department: Highlighting Workload Differences",
    xaxis = list(title = "Department", tickangle = -45),
    yaxis = list(title = "Average Monthly Hours")
  )
Insightful Comment:
The comparative box plot reveals considerable variation in average monthly working hours across departments.
Departments with higher average hours may face increased workloads, which could impact employee satisfaction and retention if not managed effectively.
Pie Chart of Frequencies: Attrition by Salary Level
Create a pie chart showing the frequency of employee attrition (left) for each salary category.
The title should point out the relationship between salary and attrition.
attrition_summary <- hr %>%
  group_by(salary, left) %>%       # Group by salary and left (attrition)
  summarise(count = n()) %>%      # Count the number of employees in each group
  ungroup() %>%
  mutate(attrition_status = ifelse(left == 1, "Attrition", "Retention"))
## `summarise()` has grouped output by 'salary'. You can override using the
## `.groups` argument.
plot_ly(
  data = attrition_summary,
  labels = ~salary,
  values = ~count,
  type = 'pie',
  textinfo = 'label+percent',
  marker = list(colors = c('#FF9999', '#66B3FF'))  # Optional: custom colors
) %>%
  layout(
    title = "Relationship Between Salary Level and Employee Attrition",
    showlegend = TRUE,
    legend = list(title = list(text = "Attrition Status")),
    annotations = list(text = "Attrition Rates", x = 0.5, y = 0.5, font = list(size = 20), showarrow = FALSE)
  )
Insightful Comment:
The pie chart illustrates that salary level has a significant impact on employee attrition, with noticeable differences in retention rates across salary categories.
This insight suggests that competitive compensation may play a critical role in retaining talent within the organization.
Bar Plot of Averages: Average Satisfaction by Department
Create a bar plot displaying the average satisfaction_level for each department.
The title should highlight a key observation about departmental satisfaction.
avg_satisfaction <- hr %>%
  group_by(Department) %>%
  summarise(average_satisfaction = mean(satisfaction_level, na.rm = TRUE)) %>%  # Calculate mean and handle NA values
  arrange(desc(average_satisfaction))

plot_ly(
  data = avg_satisfaction,
  x = ~Department,
  y = ~average_satisfaction,
  type = 'bar',
  marker = list(color = '#69b3a2')  # Optional: custom color for the bars
) %>%
  layout(
    title = "Key Observation: Average Satisfaction Levels Vary Significantly Across Departments",
    xaxis = list(title = "Department"),
    yaxis = list(title = "Average Satisfaction Level"),
    barmode = 'group'  # Optional: can also be 'overlay'
  )
Insightful Comment:
The bar plot highlights significant variations in average satisfaction levels across different departments.
These differences may reflect the effectiveness of management practices, team dynamics, or departmental culture, warranting further investigation to enhance overall employee satisfaction.