Assignment 6

library(readr)
library(dplyr)   # for group_by/summarise

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(plotly)

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

# Load dataset
hr <- read_csv("https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv")

## Rows: 14999 Columns: 10

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Question 1 — Histogram: Distribution of Employee Satisfaction

Create a histogram of the satisfaction_level variable. The title should reflect a key takeaway from the distribution.

plot_ly(
  data = hr,
  x = ~satisfaction_level,
  type = "histogram"
) |>
  layout(
    title = list(text = "Employee Satisfaction: Many Employees Score Higher"),
    xaxis = list(title = "Satisfaction Level"),
    yaxis = list(title = "Count of Employees")
  )

Question 2 — Box Plot: Last Evaluation Scores

Create a box plot of the last_evaluation variable. The title should highlight an important insight about the evaluation scores.

plot_ly(
  data = hr,
  y = ~last_evaluation,
  type = "box",
  boxpoints = "outliers"
) |>
  layout(
    title = "Last Evaluation Scores: High Performance With Some Outliers",
    yaxis = list(title = "Evaluation Score (0–1 Scale)")
  )

Question 3 — Comparative Box Plot: Monthly Hours by Department

Create a comparative box plot of average_montly_hours grouped by department.

plot_ly(
  data = hr,
  x = ~Department,              # case-sensitive
  y = ~average_montly_hours,    # exact column name in this file
  type = "box"
) |>
  layout(
    title = "Monthly Work Hours by Department — Some Teams Work Significantly Longer",
    xaxis = list(title = "Department"),
    yaxis = list(title = "Average Monthly Hours")
  )

Question 4 — Pie Chart: Attrition by Salary Level

Show the frequency of employees who left (left == 1) by salary category.

left_data <- hr[hr$left == 1, ]
salary_counts <- table(left_data$salary)

plot_ly(
  labels = names(salary_counts),
  values = as.numeric(salary_counts),
  type = "pie"
) |>
  layout(
    title = "Attrition by Salary Level: Lower Salaries Account for Most Turnover",
    showlegend = TRUE
  )

Question 5 — Bar Plot: Average Satisfaction by Department

Display the average satisfaction_level for each department.

dept_satisfaction <-
  hr |>
  group_by(Department) |>
  summarise(avg_satisfaction = mean(satisfaction_level, na.rm = TRUE)) |>
  arrange(Department)

plot_ly(
  data = dept_satisfaction,
  x = ~Department,
  y = ~avg_satisfaction,
  type = "bar"
) |>
  layout(
    title = "Average Satisfaction by Department: Noticeable Differences in Morale",
    xaxis = list(title = "Department"),
    yaxis = list(title = "Average Satisfaction Level"),
    bargap = 0.2
  )