library(readr)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Q1 Histogram: Distribution of Employee Satisfaction

hist(hr$satisfaction_level, 
     main = "Distribution of Satisfaction Levels: Skewed Towards Lower Values",
     xlab = "Satisfaction Level",
     col = "lightblue",
     border = "black")

#Analysis The graph is scewed left implying a majority of employees are satisified

Q2 Box Plot: Last Evaluation Scores

boxplot(hr$last_evaluation, 
        main = "Distribution of Last Evaluation Scores: Presence of Outliers",
        ylab = "Evaluation Score",
        col = "lightgreen",
        border = "darkgreen")

#Analysis The average scores are at a median of 0.7, with the majority being between the scores 0.6 and 0.85. Outliers are as far as 0.4 to 1.0

Q3 Comparative Box Plot: Monthly Hours by Department

boxplot(hr$average_montly_hours ~ Department, 
        data = hr, 
        main = "Comparative Box Plot of Average Monthly Hours: Variations Across Departments",
        ylab = "Average Monthly Hours",
        xlab = "Department",
        col = c("lightblue", "lightgreen", "lightcoral"),
        border = "darkblue")

#Analysis Most of the departments average around the same hours, while each have their own outliers around the same values

Q4 Pie Chart of Frequencies: Attrition by Salary Level

attrition_salary <- table(hr$salary, hr$left)
attrition_df <- as.data.frame(attrition_salary)
names(attrition_df) <- c("Salary", "Left", "Frequency")
ggplot(attrition_df, aes(x = "", y = Frequency, fill = interaction(Salary, Left))) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y") +
  labs(title = "Relationship Between Salary and Employee Attrition",
       fill = "Salary & Attrition Status") +
  theme_void() +
  scale_fill_manual(values = c("lightblue", "lightcoral", "lightgreen", "lightsalmon", "lightyellow", "lightpink"))

#Analysis The pie chart shows that very few employees leave despite how much they may get paid. However those who get paid highly are less likely to leave the comapny

Q5 Bar Plot of Averages: Average Satisfaction by Department

avg_satisfaction <- hr %>%
  group_by(Department) %>%
  summarize(Average_Satisfaction = mean(satisfaction_level, na.rm = TRUE))
ggplot(avg_satisfaction, aes(x = Department, y = Average_Satisfaction, fill = Department)) +
  geom_bar(stat = "identity") +
  labs(title = "Average Satisfaction Levels by Department: Key Differences Observed",
       x = "Department",
       y = "Average Satisfaction Level") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set3")  # Optional: Color palette

#Analysis Employees in Accounting are on average less satisfied than those in other departments. HR satisfaction is also lower in comparison.